You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
|
|
|
1
(13) |
2
(15) |
|
3
(16) |
4
(42) |
5
(9) |
6
(20) |
7
(22) |
8
(24) |
9
(12) |
|
10
(24) |
11
(11) |
12
(2) |
13
(13) |
14
(8) |
15
|
16
(16) |
|
17
(24) |
18
(36) |
19
(100) |
20
(94) |
21
(50) |
22
(39) |
23
(10) |
|
24
(14) |
25
(19) |
26
(2) |
27
(6) |
28
(17) |
29
(9) |
30
(8) |
|
31
(21) |
|
|
|
|
|
|
|
From: <sv...@va...> - 2009-05-21 23:59:41
|
Author: njn
Date: 2009-05-22 00:59:34 +0100 (Fri, 22 May 2009)
New Revision: 10087
Log:
Merge r10085, r10086 (post-fork handling) from the DARWIN branch.
Modified:
trunk/coregrind/m_syswrap/syswrap-generic.c
Modified: trunk/coregrind/m_syswrap/syswrap-generic.c
===================================================================
--- trunk/coregrind/m_syswrap/syswrap-generic.c 2009-05-21 23:58:47 UTC (rev 10086)
+++ trunk/coregrind/m_syswrap/syswrap-generic.c 2009-05-21 23:59:34 UTC (rev 10087)
@@ -2780,7 +2780,8 @@
// ignore the various args it gets, and so it looks arch-neutral. Hmm.
PRE(sys_fork)
{
- UWord result;
+ Bool is_child;
+ Int child_pid;
vki_sigset_t mask;
PRINT("sys_fork ( )");
@@ -2795,12 +2796,13 @@
if (!SUCCESS) return;
- result = RES;
+ // RES is 0 for child, non-0 (the child's PID) for parent.
+ is_child = ( RES == 0 ? True : False );
+ child_pid = ( is_child ? -1 : RES );
VG_(do_atfork_pre)(tid);
- if (SUCCESS && RES == 0) {
- /* child */
+ if (is_child) {
VG_(do_atfork_child)(tid);
/* restore signal mask */
@@ -2812,13 +2814,11 @@
duly stops writing any further logging output. */
if (!VG_(logging_to_socket) && VG_(clo_child_silent_after_fork))
VG_(clo_log_fd) = -1;
- }
- else
- if (SUCCESS && RES > 0) {
- /* parent */
+
+ } else {
VG_(do_atfork_parent)(tid);
- PRINT(" fork: process %d created child %ld\n", VG_(getpid)(), RES);
+ PRINT(" fork: process %d created child %d\n", VG_(getpid)(), child_pid);
/* restore signal mask */
VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
|
|
From: <sv...@va...> - 2009-05-21 23:58:53
|
Author: njn
Date: 2009-05-22 00:58:47 +0100 (Fri, 22 May 2009)
New Revision: 10086
Log:
Tweak post-fork handling.
Modified:
branches/DARWIN/coregrind/m_syswrap/syswrap-generic.c
Modified: branches/DARWIN/coregrind/m_syswrap/syswrap-generic.c
===================================================================
--- branches/DARWIN/coregrind/m_syswrap/syswrap-generic.c 2009-05-21 23:54:19 UTC (rev 10085)
+++ branches/DARWIN/coregrind/m_syswrap/syswrap-generic.c 2009-05-21 23:58:47 UTC (rev 10086)
@@ -2901,20 +2901,21 @@
if (!SUCCESS) return;
-#if defined(VGO_darwin)
+#if defined(VGO_linux) || defined(VGO_aix5)
+ // RES is 0 for child, non-0 (the child's PID) for parent.
+ is_child = ( RES == 0 ? True : False );
+ child_pid = ( is_child ? -1 : RES );
+#elif defined(VGO_darwin)
// RES is the child's pid. RESHI is 1 for child, 0 for parent.
is_child = RESHI;
child_pid = RES;
#else
- // RES is 0 for child, non-0 (the child's PID) for parent.
- is_child = ( RES == 0 ? True : False );
- child_pid = ( is_child ? -1 : RES );
+# error Unknown OS
#endif
VG_(do_atfork_pre)(tid);
if (is_child) {
- /* child */
VG_(do_atfork_child)(tid);
/* restore signal mask */
@@ -2926,9 +2927,8 @@
duly stops writing any further logging output. */
if (!VG_(logging_to_socket) && VG_(clo_child_silent_after_fork))
VG_(clo_log_fd) = -1;
- }
- else {
- /* parent */
+
+ } else {
VG_(do_atfork_parent)(tid);
PRINT(" fork: process %d created child %d\n", VG_(getpid)(), child_pid);
|
|
From: <sv...@va...> - 2009-05-21 23:54:23
|
Author: njn
Date: 2009-05-22 00:54:19 +0100 (Fri, 22 May 2009)
New Revision: 10085
Log:
Make post-fork code clearer.
Modified:
branches/DARWIN/coregrind/m_syswrap/syswrap-generic.c
Modified: branches/DARWIN/coregrind/m_syswrap/syswrap-generic.c
===================================================================
--- branches/DARWIN/coregrind/m_syswrap/syswrap-generic.c 2009-05-21 23:53:40 UTC (rev 10084)
+++ branches/DARWIN/coregrind/m_syswrap/syswrap-generic.c 2009-05-21 23:54:19 UTC (rev 10085)
@@ -2885,7 +2885,8 @@
// ignore the various args it gets, and so it looks arch-neutral. Hmm.
PRE(sys_fork)
{
- UWord result;
+ Bool is_child;
+ Int child_pid;
vki_sigset_t mask;
PRINT("sys_fork ( )");
@@ -2901,15 +2902,18 @@
if (!SUCCESS) return;
#if defined(VGO_darwin)
- // RES is child's pid; RESHI is 1 for child, 0 for parent
- result = RESHI ? 0 : RES;
+ // RES is the child's pid. RESHI is 1 for child, 0 for parent.
+ is_child = RESHI;
+ child_pid = RES;
#else
- result = RES;
+ // RES is 0 for child, non-0 (the child's PID) for parent.
+ is_child = ( RES == 0 ? True : False );
+ child_pid = ( is_child ? -1 : RES );
#endif
VG_(do_atfork_pre)(tid);
- if (result == 0) {
+ if (is_child) {
/* child */
VG_(do_atfork_child)(tid);
@@ -2927,7 +2931,7 @@
/* parent */
VG_(do_atfork_parent)(tid);
- PRINT(" fork: process %d created child %ld\n", VG_(getpid)(), result);
+ PRINT(" fork: process %d created child %d\n", VG_(getpid)(), child_pid);
/* restore signal mask */
VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
|
|
From: <sv...@va...> - 2009-05-21 23:53:47
|
Author: njn
Date: 2009-05-22 00:53:40 +0100 (Fri, 22 May 2009)
New Revision: 10084
Log:
DARWIN sync: improve syscall retval tracking, and factor out position code.
Modified:
trunk/coregrind/m_syswrap/priv_types_n_macros.h
trunk/coregrind/m_syswrap/syswrap-main.c
Modified: trunk/coregrind/m_syswrap/priv_types_n_macros.h
===================================================================
--- trunk/coregrind/m_syswrap/priv_types_n_macros.h 2009-05-21 23:52:52 UTC (rev 10083)
+++ trunk/coregrind/m_syswrap/priv_types_n_macros.h 2009-05-21 23:53:40 UTC (rev 10084)
@@ -91,7 +91,6 @@
Int o_arg6;
Int uu_arg7;
Int uu_arg8;
- Int o_retval;
# elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
Int o_arg1;
Int o_arg2;
@@ -101,7 +100,6 @@
Int o_arg6;
Int o_arg7;
Int o_arg8;
- Int o_retval;
# else
# error "Unknown platform"
# endif
Modified: trunk/coregrind/m_syswrap/syswrap-main.c
===================================================================
--- trunk/coregrind/m_syswrap/syswrap-main.c 2009-05-21 23:52:52 UTC (rev 10083)
+++ trunk/coregrind/m_syswrap/syswrap-main.c 2009-05-21 23:53:40 UTC (rev 10084)
@@ -535,7 +535,8 @@
}
static
-void putSyscallStatusIntoGuestState ( /*IN*/ SyscallStatus* canonical,
+void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid,
+ /*IN*/ SyscallStatus* canonical,
/*OUT*/VexGuestArchState* gst_vanilla )
{
# if defined(VGP_x86_linux)
@@ -549,6 +550,8 @@
} else {
gst->guest_EAX = sr_Res(canonical->sres);
}
+ VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
+ OFFSET_x86_EAX, sizeof(UWord) );
# elif defined(VGP_amd64_linux)
VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
@@ -561,6 +564,8 @@
} else {
gst->guest_RAX = sr_Res(canonical->sres);
}
+ VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
+ OFFSET_amd64_RAX, sizeof(UWord) );
# elif defined(VGP_ppc32_linux)
VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
@@ -575,6 +580,10 @@
LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst );
gst->guest_GPR3 = sr_Res(canonical->sres);
}
+ VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
+ OFFSET_ppc32_GPR3, sizeof(UWord) );
+ VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
+ OFFSET_ppc32_CR0_0, sizeof(UChar) );
# elif defined(VGP_ppc64_linux)
VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
@@ -589,18 +598,30 @@
LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst );
gst->guest_GPR3 = sr_Res(canonical->sres);
}
+ VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
+ OFFSET_ppc64_GPR3, sizeof(UWord) );
+ VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
+ OFFSET_ppc64_CR0_0, sizeof(UChar) );
# elif defined(VGP_ppc32_aix5)
VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
vg_assert(canonical->what == SsComplete);
gst->guest_GPR3 = canonical->sres.res;
gst->guest_GPR4 = canonical->sres.err;
+ VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
+ OFFSET_ppc32_GPR3, sizeof(UWord) );
+ VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
+ OFFSET_ppc32_GPR4, sizeof(UWord) );
# elif defined(VGP_ppc64_aix5)
VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
vg_assert(canonical->what == SsComplete);
gst->guest_GPR3 = canonical->sres.res;
gst->guest_GPR4 = canonical->sres.err;
+ VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
+ OFFSET_ppc64_GPR3, sizeof(UWord) );
+ VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
+ OFFSET_ppc64_GPR4, sizeof(UWord) );
# else
# error "putSyscallStatusIntoGuestState: unknown arch"
@@ -625,7 +646,6 @@
layout->o_arg6 = OFFSET_x86_EBP;
layout->uu_arg7 = -1; /* impossible value */
layout->uu_arg8 = -1; /* impossible value */
- layout->o_retval = OFFSET_x86_EAX;
#elif defined(VGP_amd64_linux)
layout->o_sysno = OFFSET_amd64_RAX;
@@ -637,7 +657,6 @@
layout->o_arg6 = OFFSET_amd64_R9;
layout->uu_arg7 = -1; /* impossible value */
layout->uu_arg8 = -1; /* impossible value */
- layout->o_retval = OFFSET_amd64_RAX;
#elif defined(VGP_ppc32_linux)
layout->o_sysno = OFFSET_ppc32_GPR0;
@@ -649,7 +668,6 @@
layout->o_arg6 = OFFSET_ppc32_GPR8;
layout->uu_arg7 = -1; /* impossible value */
layout->uu_arg8 = -1; /* impossible value */
- layout->o_retval = OFFSET_ppc32_GPR3;
#elif defined(VGP_ppc64_linux)
layout->o_sysno = OFFSET_ppc64_GPR0;
@@ -661,7 +679,6 @@
layout->o_arg6 = OFFSET_ppc64_GPR8;
layout->uu_arg7 = -1; /* impossible value */
layout->uu_arg8 = -1; /* impossible value */
- layout->o_retval = OFFSET_ppc64_GPR3;
#elif defined(VGP_ppc32_aix5)
layout->o_sysno = OFFSET_ppc32_GPR2;
@@ -673,7 +690,6 @@
layout->o_arg6 = OFFSET_ppc32_GPR8;
layout->o_arg7 = OFFSET_ppc32_GPR9;
layout->o_arg8 = OFFSET_ppc32_GPR10;
- layout->o_retval = OFFSET_ppc32_GPR3;
#elif defined(VGP_ppc64_aix5)
layout->o_sysno = OFFSET_ppc64_GPR2;
@@ -685,7 +701,6 @@
layout->o_arg6 = OFFSET_ppc64_GPR8;
layout->o_arg7 = OFFSET_ppc64_GPR9;
layout->o_arg8 = OFFSET_ppc64_GPR10;
- layout->o_retval = OFFSET_ppc64_GPR3;
#else
# error "getSyscallLayout: unknown arch"
@@ -1124,7 +1139,7 @@
/* Dump the syscall result back in the guest state. This is
a platform-specific action. */
if (!(sci->flags & SfNoWriteResult))
- putSyscallStatusIntoGuestState( &sci->status, &tst->arch.vex );
+ putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
/* Situation now:
- the guest state is now correctly modified following the syscall
@@ -1153,7 +1168,6 @@
*/
void VG_(post_syscall) (ThreadId tid)
{
- SyscallArgLayout layout;
SyscallInfo* sci;
const SyscallTableEntry* ent;
SyscallStatus test_status;
@@ -1193,14 +1207,6 @@
sysno = sci->args.sysno;
ent = get_syscall_entry(sysno);
- /* We need the arg layout .. sigh */
- getSyscallArgLayout( &layout );
-
- /* Tell the tool that the assignment has occurred, so it can update
- shadow regs as necessary. */
- VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, layout.o_retval,
- sizeof(UWord) );
-
/* pre: status == Complete (asserted above) */
/* Consider either success or failure. Now run the post handler if:
- it exists, and
@@ -1219,7 +1225,7 @@
failure if the kernel supplied a fd that it doesn't like), once
again dump the syscall result back in the guest state.*/
if (!(sci->flags & SfNoWriteResult))
- putSyscallStatusIntoGuestState( &sci->status, &tst->arch.vex );
+ putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
/* Do any post-syscall actions required by the tool. */
if (VG_(needs).syscall_wrapper)
@@ -1271,11 +1277,15 @@
/* These are addresses within ML_(do_syscall_for_client_WRK). See
syscall-$PLAT.S for details.
*/
-extern const Addr ML_(blksys_setup);
-extern const Addr ML_(blksys_restart);
-extern const Addr ML_(blksys_complete);
-extern const Addr ML_(blksys_committed);
-extern const Addr ML_(blksys_finished);
+#if defined(VGO_linux) || defined(VGO_aix5)
+ extern const Addr ML_(blksys_setup);
+ extern const Addr ML_(blksys_restart);
+ extern const Addr ML_(blksys_complete);
+ extern const Addr ML_(blksys_committed);
+ extern const Addr ML_(blksys_finished);
+#else
+# error "Unknown OS"
+#endif
/* Back up guest state to restart a system call. */
@@ -1406,31 +1416,54 @@
SysRes sres,
Bool restart)
{
- /* Note that the sysnum arg seems to contain not-dependable-on info
- (I think it depends on the state the real syscall was in at
- interrupt) and so is ignored, apart from in the following
- printf.
+ /* Note that we don't know the syscall number here, since (1) in
+ general there's no reliable way to get hold of it short of
+ stashing it in the guest state before the syscall, and (2) in
+ any case we don't need to know it for the actions done by this
+ routine.
Furthermore, 'sres' is only used in the case where the syscall
is complete, but the result has not been committed to the guest
- state yet. */
+ state yet. In any other situation it will be meaningless and
+ therefore ignored. */
- static const Bool debug = False;
-
ThreadState* tst;
SyscallStatus canonical;
ThreadArchState* th_regs;
SyscallInfo* sci;
- if (debug)
- VG_(printf)( "interrupted_syscall: tid=%d, IP=0x%llx, "
- "restart=%s, sysret.isError=%s, sysret.val=%lld\n",
- (Int)tid,
- (ULong)ip,
- restart ? "True" : "False",
- sr_isError(sres) ? "True" : "False",
- (Long)(sr_isError(sres) ? sr_Err(sres) : sr_Res(sres)) );
+ /* Compute some Booleans indicating which range we're in. */
+ Bool outside_range,
+ in_setup_to_restart, // [1,2) in the .S files
+ at_restart, // [2] in the .S files
+ in_complete_to_committed, // [3,4) in the .S files
+ in_committed_to_finished; // [4,5) in the .S files
+# if defined(VGO_linux) || defined(VGO_aix5)
+ outside_range
+ = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished);
+ in_setup_to_restart
+ = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart);
+ at_restart
+ = ip == ML_(blksys_restart);
+ in_complete_to_committed
+ = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
+ in_committed_to_finished
+ = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
+# else
+# error "Unknown OS"
+# endif
+
+ if (VG_(clo_trace_signals))
+ VG_(message)( Vg_DebugMsg,
+ "interrupted_syscall: tid=%d, ip=0x%llx, "
+ "restart=%s, sres.isErr=%s, sres.val=%lld",
+ (Int)tid,
+ (ULong)ip,
+ restart ? "True" : "False",
+ sr_isError(sres) ? "True" : "False",
+ (Long)(sr_isError(sres) ? sr_Err(sres) : sr_Res(sres)) );
+
vg_assert(VG_(is_valid_tid)(tid));
vg_assert(tid >= 1 && tid < VG_N_THREADS);
vg_assert(VG_(is_running_thread)(tid));
@@ -1441,10 +1474,10 @@
/* Figure out what the state of the syscall was by examining the
(real) IP at the time of the signal, and act accordingly. */
-
- if (ip < ML_(blksys_setup) || ip >= ML_(blksys_finished)) {
- VG_(printf)(" not in syscall (%#lx - %#lx)\n",
- ML_(blksys_setup), ML_(blksys_finished));
+ if (outside_range) {
+ if (VG_(clo_trace_signals))
+ VG_(message)( Vg_DebugMsg,
+ " not in syscall at all: hmm, very suspicious" );
/* Looks like we weren't in a syscall at all. Hmm. */
vg_assert(sci->status.what != SsIdle);
return;
@@ -1455,53 +1488,62 @@
Hence: */
vg_assert(sci->status.what != SsIdle);
- if (ip >= ML_(blksys_setup) && ip < ML_(blksys_restart)) {
+ /* now, do one of four fixup actions, depending on where the IP has
+ got to. */
+
+ if (in_setup_to_restart) {
/* syscall hasn't even started; go around again */
- if (debug)
- VG_(printf)(" not started: restart\n");
+ if (VG_(clo_trace_signals))
+ VG_(message)( Vg_DebugMsg, " not started: restarting");
vg_assert(sci->status.what == SsHandToKernel);
ML_(fixup_guest_state_to_restart_syscall)(th_regs);
}
else
- if (ip == ML_(blksys_restart)) {
+ if (at_restart) {
/* We're either about to run the syscall, or it was interrupted
and the kernel restarted it. Restart if asked, otherwise
EINTR it. */
- if (restart)
+ if (restart) {
+ if (VG_(clo_trace_signals))
+ VG_(message)( Vg_DebugMsg, " at syscall instr: restarting");
ML_(fixup_guest_state_to_restart_syscall)(th_regs);
- else {
+ } else {
+ if (VG_(clo_trace_signals))
+ VG_(message)( Vg_DebugMsg, " at syscall instr: returning EINTR");
canonical = convert_SysRes_to_SyscallStatus(
VG_(mk_SysRes_Error)( VKI_EINTR )
);
if (!(sci->flags & SfNoWriteResult))
- putSyscallStatusIntoGuestState( &canonical, &th_regs->vex );
+ putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
sci->status = canonical;
VG_(post_syscall)(tid);
}
}
else
- if (ip >= ML_(blksys_complete) && ip < ML_(blksys_committed)) {
+ if (in_complete_to_committed) {
/* Syscall complete, but result hasn't been written back yet.
Write the SysRes we were supplied with back to the guest
state. */
- if (debug)
- VG_(printf)(" completed\n");
+ if (VG_(clo_trace_signals))
+ VG_(message)( Vg_DebugMsg,
+ " completed, but uncommitted: committing");
canonical = convert_SysRes_to_SyscallStatus( sres );
if (!(sci->flags & SfNoWriteResult))
- putSyscallStatusIntoGuestState( &canonical, &th_regs->vex );
+ putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
sci->status = canonical;
VG_(post_syscall)(tid);
}
- else
- if (ip >= ML_(blksys_committed) && ip < ML_(blksys_finished)) {
+ else
+ if (in_committed_to_finished) {
/* Result committed, but the signal mask has not been restored;
we expect our caller (the signal handler) will have fixed
this up. */
- if (debug)
- VG_(printf)(" all done\n");
+ if (VG_(clo_trace_signals))
+ VG_(message)( Vg_DebugMsg,
+ " completed and committed: nothing to do");
VG_(post_syscall)(tid);
}
|
|
From: <sv...@va...> - 2009-05-21 23:52:55
|
Author: njn
Date: 2009-05-22 00:52:52 +0100 (Fri, 22 May 2009)
New Revision: 10083
Log:
DARWIN sync: spacing and layout.
Modified:
trunk/coregrind/pub_core_libcfile.h
Modified: trunk/coregrind/pub_core_libcfile.h
===================================================================
--- trunk/coregrind/pub_core_libcfile.h 2009-05-21 23:40:31 UTC (rev 10082)
+++ trunk/coregrind/pub_core_libcfile.h 2009-05-21 23:52:52 UTC (rev 10083)
@@ -56,17 +56,18 @@
none specified. */
#define VG_CLO_DEFAULT_LOGPORT 1500
+extern Int VG_(connect_via_socket)( UChar* str );
+
extern UInt VG_(htonl) ( UInt x );
extern UInt VG_(ntohl) ( UInt x );
extern UShort VG_(htons) ( UShort x );
extern UShort VG_(ntohs) ( UShort x );
extern Int VG_(write_socket)( Int sd, void *msg, Int count );
-extern Int VG_(connect_via_socket)( UChar* str );
extern Int VG_(getsockname) ( Int sd, struct vki_sockaddr *name, Int *namelen );
extern Int VG_(getpeername) ( Int sd, struct vki_sockaddr *name, Int *namelen );
-extern Int VG_(getsockopt) ( Int sd, Int level, Int optname, void *optval,
- Int *optlen );
+extern Int VG_(getsockopt) ( Int sd, Int level, Int optname,
+ void *optval, Int *optlen );
extern Int VG_(access) ( const HChar* path, Bool irusr, Bool iwusr,
Bool ixusr );
@@ -75,9 +76,10 @@
extern Int VG_(check_executable)(/*OUT*/Bool* is_setuid,
const HChar* f, Bool allow_setuid);
-/* Note this moves (or at least, is believed to move) the file pointer
+/* DDD: Note this moves (or at least, is believed to move) the file pointer
on Linux and AIX5 but doesn't on Darwin. This inconsistency should
- be fixed. */
+ be fixed. (In other words, why isn't the Linux/AIX5 version implemented in
+ terms of pread()?) */
extern SysRes VG_(pread) ( Int fd, void* buf, Int count, OffT offset );
/* Create and open (-rw------) a tmp file name incorporating said arg.
|
|
From: <sv...@va...> - 2009-05-21 23:40:38
|
Author: sewardj Date: 2009-05-22 00:40:31 +0100 (Fri, 22 May 2009) New Revision: 10082 Log: This should have been part of r10081 (which was: In the core/tool iface, VG_TDICT_CALL(tool_pre_syscall, ...) and VG_TDICT_CALL(tool_post_syscall, ...), also pass the arguments to syscall, for the tool to inspect if it wants.) Modified: branches/DCAS/include/pub_tool_tooliface.h Modified: branches/DCAS/include/pub_tool_tooliface.h =================================================================== --- branches/DCAS/include/pub_tool_tooliface.h 2009-05-21 23:35:43 UTC (rev 10081) +++ branches/DCAS/include/pub_tool_tooliface.h 2009-05-21 23:40:31 UTC (rev 10082) @@ -386,9 +386,19 @@ /* Tool does stuff before and/or after system calls? */ // Nb: If either of the pre_ functions malloc() something to return, the // corresponding post_ function had better free() it! +// Also, the args are the 'original args' -- that is, it may be +// that the syscall pre-wrapper will modify the args before the +// syscall happens. So these args are the original, un-modified +// args. Finally, nArgs merely indicates the length of args[..], +// it does not indicate how many of those values are actually +// relevant to the syscall. args[0 .. nArgs-1] is guaranteed +// to be defined and to contain all the args for this syscall, +// possibly including some trailing zeroes. extern void VG_(needs_syscall_wrapper) ( - void (* pre_syscall)(ThreadId tid, UInt syscallno), - void (*post_syscall)(ThreadId tid, UInt syscallno, SysRes res) + void (* pre_syscall)(ThreadId tid, UInt syscallno, + UWord* args, UInt nArgs), + void (*post_syscall)(ThreadId tid, UInt syscallno, + UWord* args, UInt nArgs, SysRes res) ); /* Are tool-state sanity checks performed? */ |
|
From: <sv...@va...> - 2009-05-21 23:35:49
|
Author: sewardj
Date: 2009-05-22 00:35:43 +0100 (Fri, 22 May 2009)
New Revision: 10081
Log:
In the core/tool iface, VG_TDICT_CALL(tool_pre_syscall, ...) and
VG_TDICT_CALL(tool_post_syscall, ...), also pass the arguments to
syscall, for the tool to inspect if it wants.
Modified:
branches/DCAS/coregrind/m_syswrap/syswrap-main.c
branches/DCAS/coregrind/m_tooliface.c
branches/DCAS/coregrind/pub_core_tooliface.h
Modified: branches/DCAS/coregrind/m_syswrap/syswrap-main.c
===================================================================
--- branches/DCAS/coregrind/m_syswrap/syswrap-main.c 2009-05-21 23:33:17 UTC (rev 10080)
+++ branches/DCAS/coregrind/m_syswrap/syswrap-main.c 2009-05-21 23:35:43 UTC (rev 10081)
@@ -360,7 +360,6 @@
canonical->arg7 = 0;
canonical->arg8 = 0;
-
#elif defined(VGP_ppc32_linux)
VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
canonical->sysno = gst->guest_GPR0;
@@ -373,7 +372,6 @@
canonical->arg7 = 0;
canonical->arg8 = 0;
-
#elif defined(VGP_ppc64_linux)
VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
canonical->sysno = gst->guest_GPR0;
@@ -386,7 +384,6 @@
canonical->arg7 = 0;
canonical->arg8 = 0;
-
#elif defined(VGP_ppc32_aix5)
VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
canonical->sysno = gst->guest_GPR2;
@@ -956,7 +953,17 @@
/* Do any pre-syscall actions */
if (VG_(needs).syscall_wrapper) {
- VG_TDICT_CALL(tool_pre_syscall, tid, sysno);
+ UWord tmpv[8];
+ tmpv[0] = sci->orig_args.arg1;
+ tmpv[1] = sci->orig_args.arg2;
+ tmpv[2] = sci->orig_args.arg3;
+ tmpv[3] = sci->orig_args.arg4;
+ tmpv[4] = sci->orig_args.arg5;
+ tmpv[5] = sci->orig_args.arg6;
+ tmpv[6] = sci->orig_args.arg7;
+ tmpv[7] = sci->orig_args.arg8;
+ VG_TDICT_CALL(tool_pre_syscall, tid, sysno,
+ &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]));
}
vg_assert(ent);
@@ -1222,8 +1229,21 @@
putSyscallStatusIntoGuestState( &sci->status, &tst->arch.vex );
/* Do any post-syscall actions required by the tool. */
- if (VG_(needs).syscall_wrapper)
- VG_TDICT_CALL(tool_post_syscall, tid, sysno, sci->status.sres);
+ if (VG_(needs).syscall_wrapper) {
+ UWord tmpv[8];
+ tmpv[0] = sci->orig_args.arg1;
+ tmpv[1] = sci->orig_args.arg2;
+ tmpv[2] = sci->orig_args.arg3;
+ tmpv[3] = sci->orig_args.arg4;
+ tmpv[4] = sci->orig_args.arg5;
+ tmpv[5] = sci->orig_args.arg6;
+ tmpv[6] = sci->orig_args.arg7;
+ tmpv[7] = sci->orig_args.arg8;
+ VG_TDICT_CALL(tool_post_syscall, tid,
+ sysno,
+ &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]),
+ sci->status.sres);
+ }
/* The syscall is done. */
vg_assert(sci->status.what == SsComplete);
Modified: branches/DCAS/coregrind/m_tooliface.c
===================================================================
--- branches/DCAS/coregrind/m_tooliface.c 2009-05-21 23:33:17 UTC (rev 10080)
+++ branches/DCAS/coregrind/m_tooliface.c 2009-05-21 23:35:43 UTC (rev 10081)
@@ -269,8 +269,8 @@
}
void VG_(needs_syscall_wrapper)(
- void(*pre) (ThreadId, UInt),
- void(*post)(ThreadId, UInt, SysRes res)
+ void(*pre) (ThreadId, UInt, UWord*, UInt),
+ void(*post)(ThreadId, UInt, UWord*, UInt, SysRes res)
)
{
VG_(needs).syscall_wrapper = True;
Modified: branches/DCAS/coregrind/pub_core_tooliface.h
===================================================================
--- branches/DCAS/coregrind/pub_core_tooliface.h 2009-05-21 23:33:17 UTC (rev 10080)
+++ branches/DCAS/coregrind/pub_core_tooliface.h 2009-05-21 23:35:43 UTC (rev 10081)
@@ -138,8 +138,8 @@
Bool (*tool_handle_client_request)(ThreadId, UWord*, UWord*);
// VG_(needs).syscall_wrapper
- void (*tool_pre_syscall) (ThreadId, UInt);
- void (*tool_post_syscall)(ThreadId, UInt, SysRes);
+ void (*tool_pre_syscall) (ThreadId, UInt, UWord*, UInt);
+ void (*tool_post_syscall)(ThreadId, UInt, UWord*, UInt, SysRes);
// VG_(needs).sanity_checks
Bool (*tool_cheap_sanity_check)(void);
|
|
From: <sv...@va...> - 2009-05-21 23:33:20
|
Author: njn
Date: 2009-05-22 00:33:17 +0100 (Fri, 22 May 2009)
New Revision: 10080
Log:
Fix typo.
Modified:
branches/DARWIN/coregrind/m_syswrap/syswrap-main.c
Modified: branches/DARWIN/coregrind/m_syswrap/syswrap-main.c
===================================================================
--- branches/DARWIN/coregrind/m_syswrap/syswrap-main.c 2009-05-21 23:27:17 UTC (rev 10079)
+++ branches/DARWIN/coregrind/m_syswrap/syswrap-main.c 2009-05-21 23:33:17 UTC (rev 10080)
@@ -894,7 +894,7 @@
VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
OFFSET_ppc32_GPR3, sizeof(UWord) );
VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
- OFFSET_ppc32_GPR3, sizeof(UWord) );
+ OFFSET_ppc32_GPR4, sizeof(UWord) );
# elif defined(VGP_ppc64_aix5)
VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
|
|
From: <sv...@va...> - 2009-05-21 23:27:29
|
Author: njn
Date: 2009-05-22 00:27:17 +0100 (Fri, 22 May 2009)
New Revision: 10079
Log:
Remove dead code relating to the old way syscall return values were tracked.
Modified:
branches/DARWIN/coregrind/m_syswrap/priv_types_n_macros.h
branches/DARWIN/coregrind/m_syswrap/syswrap-main.c
branches/DARWIN/docs/internals/Darwin-notes.txt
Modified: branches/DARWIN/coregrind/m_syswrap/priv_types_n_macros.h
===================================================================
--- branches/DARWIN/coregrind/m_syswrap/priv_types_n_macros.h 2009-05-21 22:06:09 UTC (rev 10078)
+++ branches/DARWIN/coregrind/m_syswrap/priv_types_n_macros.h 2009-05-21 23:27:17 UTC (rev 10079)
@@ -97,7 +97,6 @@
Int o_arg6;
Int uu_arg7;
Int uu_arg8;
- Int o_retval;
# elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
Int o_arg1;
Int o_arg2;
@@ -107,7 +106,6 @@
Int o_arg6;
Int o_arg7;
Int o_arg8;
- Int o_retval;
# elif defined(VGP_x86_darwin)
Int s_arg1;
Int s_arg2;
@@ -117,8 +115,6 @@
Int s_arg6;
Int s_arg7;
Int s_arg8;
- Int o_retval_lo;
- Int o_retval_hi;
# elif defined(VGP_amd64_darwin)
Int o_arg1;
Int o_arg2;
@@ -128,8 +124,6 @@
Int o_arg6;
Int s_arg7;
Int s_arg8;
- Int o_retval_lo;
- Int o_retval_hi;
# else
# error "Unknown platform"
# endif
Modified: branches/DARWIN/coregrind/m_syswrap/syswrap-main.c
===================================================================
--- branches/DARWIN/coregrind/m_syswrap/syswrap-main.c 2009-05-21 22:06:09 UTC (rev 10078)
+++ branches/DARWIN/coregrind/m_syswrap/syswrap-main.c 2009-05-21 23:27:17 UTC (rev 10079)
@@ -995,7 +995,6 @@
layout->o_arg6 = OFFSET_x86_EBP;
layout->uu_arg7 = -1; /* impossible value */
layout->uu_arg8 = -1; /* impossible value */
- layout->o_retval = OFFSET_x86_EAX;
#elif defined(VGP_amd64_linux)
layout->o_sysno = OFFSET_amd64_RAX;
@@ -1007,7 +1006,6 @@
layout->o_arg6 = OFFSET_amd64_R9;
layout->uu_arg7 = -1; /* impossible value */
layout->uu_arg8 = -1; /* impossible value */
- layout->o_retval = OFFSET_amd64_RAX;
#elif defined(VGP_ppc32_linux)
layout->o_sysno = OFFSET_ppc32_GPR0;
@@ -1019,7 +1017,6 @@
layout->o_arg6 = OFFSET_ppc32_GPR8;
layout->uu_arg7 = -1; /* impossible value */
layout->uu_arg8 = -1; /* impossible value */
- layout->o_retval = OFFSET_ppc32_GPR3;
#elif defined(VGP_ppc64_linux)
layout->o_sysno = OFFSET_ppc64_GPR0;
@@ -1031,7 +1028,6 @@
layout->o_arg6 = OFFSET_ppc64_GPR8;
layout->uu_arg7 = -1; /* impossible value */
layout->uu_arg8 = -1; /* impossible value */
- layout->o_retval = OFFSET_ppc64_GPR3;
#elif defined(VGP_ppc32_aix5)
layout->o_sysno = OFFSET_ppc32_GPR2;
@@ -1043,7 +1039,6 @@
layout->o_arg6 = OFFSET_ppc32_GPR8;
layout->o_arg7 = OFFSET_ppc32_GPR9;
layout->o_arg8 = OFFSET_ppc32_GPR10;
- layout->o_retval = OFFSET_ppc32_GPR3;
#elif defined(VGP_ppc64_aix5)
layout->o_sysno = OFFSET_ppc64_GPR2;
@@ -1055,12 +1050,9 @@
layout->o_arg6 = OFFSET_ppc64_GPR8;
layout->o_arg7 = OFFSET_ppc64_GPR9;
layout->o_arg8 = OFFSET_ppc64_GPR10;
- layout->o_retval = OFFSET_ppc64_GPR3;
#elif defined(VGP_x86_darwin)
layout->o_sysno = OFFSET_x86_EAX;
- layout->o_retval_lo = OFFSET_x86_EAX;
- layout->o_retval_hi = OFFSET_x86_EDX;
// syscall parameters are on stack in C convention
layout->s_arg1 = sizeof(UWord) * 1;
layout->s_arg2 = sizeof(UWord) * 2;
@@ -1081,8 +1073,6 @@
layout->o_arg6 = OFFSET_amd64_R9;
layout->s_arg7 = sizeof(UWord) * 1;
layout->s_arg8 = sizeof(UWord) * 2;
- layout->o_retval_lo = OFFSET_amd64_RAX;
- layout->o_retval_hi = OFFSET_amd64_RDX;
#else
# error "getSyscallLayout: unknown arch"
@@ -1622,7 +1612,6 @@
*/
void VG_(post_syscall) (ThreadId tid)
{
- //SyscallArgLayout layout; DDD (see below)
SyscallInfo* sci;
const SyscallTableEntry* ent;
SyscallStatus test_status;
@@ -1671,17 +1660,6 @@
sysno = sci->args.sysno;
ent = get_syscall_entry(sysno);
- // DDD: the trunk has the following code...
-#if 0
- /* We need the arg layout .. sigh */
- getSyscallArgLayout( &layout );
-
- /* Tell the tool that the assignment has occurred, so it can update
- shadow regs as necessary. */
- VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, layout.o_retval,
- sizeof(UWord) );
-#endif
-
/* pre: status == Complete (asserted above) */
/* Consider either success or failure. Now run the post handler if:
- it exists, and
Modified: branches/DARWIN/docs/internals/Darwin-notes.txt
===================================================================
--- branches/DARWIN/docs/internals/Darwin-notes.txt 2009-05-21 22:06:09 UTC (rev 10078)
+++ branches/DARWIN/docs/internals/Darwin-notes.txt 2009-05-21 23:27:17 UTC (rev 10079)
@@ -105,13 +105,6 @@
Also, startup is not done on the interim startup stack -- why not?
-VG_(post_syscall): contains important-looking code that is #if 0'd
-out. Said code does VG_TRACK( post_reg_write). Needs looking into.
-[The 'post_reg_write' call has been moved into
-putSyscallStatusIntoGuestState(). This might be an improvement, as on some
-platforms more than one register is written with a result.]
-
-
VG_(di_notify_mmap): Linux version is also used for Darwin, and
contains some ifdeffery. Clean up.
|
|
From: Philippe W. <phi...@sk...> - 2009-05-21 23:20:40
|
To implement in callgrind/kcachegrind the heap status, I am using the needs_malloc_replacement with the preload mechanism. However, I would like to use this mechanism only when option --collect-alloc=yes is given (to replace the "real" functions only when tracking the heap). So, when --collect-alloc=no, I am not calling VG_(needs_malloc_replacement). However, having a "preload" and not calling VG_(needs_malloc_replacement) does not work: I get debug messages such as: --11913-- VG_USERREQ__CLIENT_CALL1: func=0x0 and then it crashes. Is there a way to "de-activate" the malloc replacement during the command line processing of the tool ? |
|
From: Philippe W. <phi...@sk...> - 2009-05-21 23:11:25
|
> So, at least in terms of stack trace size, I imagine that a callstack of JCC for each > allocation should work. (using an hash table to share the identical JCC callstacks to > avoid duplication). I have now implemented this solution (but using an OSetGen of jCC callstack rather than an hash table). Testing this on firefox startup gives reasonable result: there is about 72000 different jCC stack traces, 4,000,000 elements in these stack traces. => so, on a x86, this needs about 20Mb of memory I still have a few things to cleanup/improve. I will then send the current state of the code as a basis to discuss/review (there are for sure points to improve/change/enhance/...) Philippe |
|
From: <sv...@va...> - 2009-05-21 22:06:17
|
Author: sewardj
Date: 2009-05-21 23:06:09 +0100 (Thu, 21 May 2009)
New Revision: 10078
Log:
At startup, check processor capabilities a bit more carefully:
amd64:
- make cmpxchg8b support be a minimum requirement
- detect and note presence/absence of cmpxchg16b support
- detect and note presence/absence of SSE3 support
x86:
- make cmpxchg8b support be a minimum requirement
Modified:
branches/DCAS/coregrind/m_machine.c
Modified: branches/DCAS/coregrind/m_machine.c
===================================================================
--- branches/DCAS/coregrind/m_machine.c 2009-05-21 19:37:04 UTC (rev 10077)
+++ branches/DCAS/coregrind/m_machine.c 2009-05-21 22:06:09 UTC (rev 10078)
@@ -154,7 +154,7 @@
static void apply_to_GPs_of_tid(VexGuestArchState* vex, void (*f)(Addr))
{
-#if defined(VGA_x86)
+# if defined(VGA_x86)
(*f)(vex->guest_EAX);
(*f)(vex->guest_ECX);
(*f)(vex->guest_EDX);
@@ -163,7 +163,7 @@
(*f)(vex->guest_EDI);
(*f)(vex->guest_ESP);
(*f)(vex->guest_EBP);
-#elif defined(VGA_amd64)
+# elif defined(VGA_amd64)
(*f)(vex->guest_RAX);
(*f)(vex->guest_RCX);
(*f)(vex->guest_RDX);
@@ -180,7 +180,7 @@
(*f)(vex->guest_R13);
(*f)(vex->guest_R14);
(*f)(vex->guest_R15);
-#elif defined(VGA_ppc32) || defined(VGA_ppc64)
+# elif defined(VGA_ppc32) || defined(VGA_ppc64)
/* XXX ask tool about validity? */
(*f)(vex->guest_GPR0);
(*f)(vex->guest_GPR1);
@@ -217,9 +217,9 @@
(*f)(vex->guest_CTR);
(*f)(vex->guest_LR);
-#else
-# error Unknown arch
-#endif
+# else
+# error "Unknown arch"
+# endif
}
@@ -347,7 +347,7 @@
LibVEX_default_VexArchInfo(&vai);
#if defined(VGA_x86)
- { Bool have_sse1, have_sse2;
+ { Bool have_sse1, have_sse2, have_cx8;
UInt eax, ebx, ecx, edx;
if (!VG_(has_cpuid)())
@@ -365,6 +365,13 @@
have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
+ /* cmpxchg8b is a minimum requirement now; if we don't have it we
+ must simply give up. But all CPUs since Pentium-I have it, so
+ that doesn't seem like much of a restriction. */
+ have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
+ if (!have_cx8)
+ return False;
+
if (have_sse2 && have_sse1) {
va = VexArchX86;
vai.hwcaps = VEX_HWCAPS_X86_SSE1;
@@ -387,11 +394,41 @@
}
#elif defined(VGA_amd64)
- vg_assert(VG_(has_cpuid)());
- va = VexArchAMD64;
- vai.hwcaps = 0; /*baseline - SSE2 */
- return True;
+ { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_cx16;
+ UInt eax, ebx, ecx, edx;
+ if (!VG_(has_cpuid)())
+ /* we can't do cpuid at all. Give up. */
+ return False;
+
+ VG_(cpuid)(0, &eax, &ebx, &ecx, &edx);
+ if (eax < 1)
+ /* we can't ask for cpuid(x) for x > 0. Give up. */
+ return False;
+
+ /* get capabilities bits into edx */
+ VG_(cpuid)(1, &eax, &ebx, &ecx, &edx);
+
+ have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
+ have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
+ have_sse3 = (ecx & (1<<9)) != 0; /* True => have sse3 insns */
+
+ /* cmpxchg8b is a minimum requirement now; if we don't have it we
+ must simply give up. But all CPUs since Pentium-I have it, so
+ that doesn't seem like much of a restriction. */
+ have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
+ if (!have_cx8)
+ return False;
+
+ /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
+ have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
+
+ va = VexArchAMD64;
+ vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
+ | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0);
+ return True;
+ }
+
#elif defined(VGA_ppc32)
{
/* Find out which subset of the ppc32 instruction set is supported by
|
|
From: <sv...@va...> - 2009-05-21 21:55:57
|
Author: sewardj
Date: 2009-05-21 22:55:50 +0100 (Thu, 21 May 2009)
New Revision: 1897
Log:
This should have been committed as part of r1894 (Add a new
capabilities bit for AMD64, indicating whether or not cmpxchg16b is
supported.)
Modified:
branches/DCAS/priv/main/vex_main.c
Modified: branches/DCAS/priv/main/vex_main.c
===================================================================
--- branches/DCAS/priv/main/vex_main.c 2009-05-21 21:53:41 UTC (rev 1896)
+++ branches/DCAS/priv/main/vex_main.c 2009-05-21 21:55:50 UTC (rev 1897)
@@ -744,17 +744,22 @@
| VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3))
return "x86-sse1-sse2-sse3";
- return False;
+ return NULL;
}
static HChar* show_hwcaps_amd64 ( UInt hwcaps )
{
- /* Monotonic, SSE3 > baseline. */
- if (hwcaps == 0)
- return "amd64-sse2";
- if (hwcaps == VEX_HWCAPS_AMD64_SSE3)
- return "amd64-sse3";
- return False;
+ /* SSE3 and CX16 are orthogonal and > baseline, although we really
+ don't expect to come across anything which can do SSE3 but can't
+ do CX16. Still, we can handle that case. */
+ const UInt SSE3 = VEX_HWCAPS_AMD64_SSE3;
+ const UInt CX16 = VEX_HWCAPS_AMD64_CX16;
+ UInt c = hwcaps;
+ if (c == 0) return "amd64-sse2";
+ if (c == SSE3) return "amd64-sse3";
+ if (c == CX16) return "amd64-sse2-cx16";
+ if (c == (SSE3|CX16)) return "amd64-sse3-cx16";
+ return NULL;
}
static HChar* show_hwcaps_ppc32 ( UInt hwcaps )
|
|
From: <sv...@va...> - 2009-05-21 21:53:46
|
Author: sewardj
Date: 2009-05-21 22:53:41 +0100 (Thu, 21 May 2009)
New Revision: 1896
Log:
Use the new IRStmt_IRCAS as the basis of the translation for all
LOCK-prefixed x86 instructions, and for xchg, which is implicitly
LOCK-prefixed.
Modified:
branches/DCAS/priv/guest-x86/toIR.c
Modified: branches/DCAS/priv/guest-x86/toIR.c
===================================================================
--- branches/DCAS/priv/guest-x86/toIR.c 2009-05-21 21:51:20 UTC (rev 1895)
+++ branches/DCAS/priv/guest-x86/toIR.c 2009-05-21 21:53:41 UTC (rev 1896)
@@ -44,6 +44,8 @@
without prior written permission.
*/
+/* Translates x86 code to IR. */
+
/* TODO:
All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
@@ -76,8 +78,6 @@
zeroes all the FP registers. It should leave the registers
unchanged.
- RDTSC returns one, always.
-
SAHF should cause eflags[1] == 1, and in fact it produces 0. As
per Intel docs this bit has no meaning anyway. Since PUSHF is the
only way to observe eflags[1], a proper fix would be to make that
@@ -90,14 +90,14 @@
happen. Programs that set it to 1 and then rely on the resulting
SIGBUSs to inform them of misaligned accesses will not work.
- Implementation sysenter is necessarily partial. sysenter is a kind
- of system call entry. When doing a sysenter, the return address is
- not known -- that is something that is beyond Vex's knowledge. So
- the generated IR forces a return to the scheduler, which can do
- what it likes to simulate the systemter, but it MUST set this
- thread's guest_EIP field with the continuation address before
- resuming execution. If that doesn't happen, the thread will jump
- to address zero, which is probably fatal.
+ Implementation of sysenter is necessarily partial. sysenter is a
+ kind of system call entry. When doing a sysenter, the return
+ address is not known -- that is something that is beyond Vex's
+ knowledge. So the generated IR forces a return to the scheduler,
+ which can do what it likes to simulate the systenter, but it MUST
+ set this thread's guest_EIP field with the continuation address
+ before resuming execution. If that doesn't happen, the thread will
+ jump to address zero, which is probably fatal.
This module uses global variables and so is not MT-safe (if that
should ever become relevant).
@@ -105,8 +105,21 @@
The delta values are 32-bit ints, not 64-bit ints. That means
this module may not work right if run on a 64-bit host. That should
be fixed properly, really -- if anyone ever wants to use Vex to
- translate x86 code for execution on a 64-bit host. */
+ translate x86 code for execution on a 64-bit host.
+ casLE (implementation of lock-prefixed insns) and rep-prefixed
+ insns: the side-exit back to the start of the insn is done with
+ Ijk_Boring. This is quite wrong, it should be done with
+ Ijk_NoRedir, since otherwise the side exit, which is intended to
+ restart the instruction for whatever reason, could go somewhere
+ entirely else. Doing it right (with Ijk_NoRedir jumps) would make
+ no-redir jumps performance critical, at least for rep-prefixed
+ instructions, since all iterations thereof would involve such a
+ jump. It's not such a big deal with casLE since the side exit is
+ only taken if the CAS fails, that is, the location is contended,
+ which is relatively unlikely.
+*/
+
/* Performance holes:
- fcom ; fstsw %ax ; sahf
@@ -145,9 +158,25 @@
No prefixes may precede a "Special" instruction.
*/
+/* LOCK prefixed instructions. These are translated using IR-level
+ CAS statements (IRCAS) and are believed to preserve atomicity, even
+ from the point of view of some other process racing against a
+ simulated one (presumably they communicate via a shared memory
+ segment).
-/* Translates x86 code to IR. */
+ Handlers which are aware of LOCK prefixes are:
+ dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
+ dis_cmpxchg_G_E (cmpxchg)
+ dis_Grp1 (add, or, adc, sbb, and, sub, xor)
+ dis_Grp3 (not, neg)
+ dis_Grp4 (inc, dec)
+ dis_Grp5 (inc, dec)
+ dis_Grp8_Imm (bts, btc, btr)
+ dis_bt_G_E (bts, btc, btr)
+ dis_xadd_G_E (xadd)
+*/
+
#include "libvex_basictypes.h"
#include "libvex_ir.h"
#include "libvex.h"
@@ -715,7 +744,34 @@
unop(Iop_1Uto32,y)));
}
+/* Generate a compare-and-swap operation, operating on memory at
+ 'addr'. The expected value is 'expVal' and the new value is
+ 'newVal'. If the operation fails, then transfer control (with a
+ no-redir jump (XXX no -- see comment at top of this file)) to
+ 'restart_point', which is presumably the address of the guest
+ instruction again -- retrying, essentially. */
+static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
+ Addr32 restart_point )
+{
+ IRCAS* cas;
+ IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
+ IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
+ IRTemp oldTmp = newTemp(tyE);
+ IRTemp expTmp = newTemp(tyE);
+ vassert(tyE == tyN);
+ vassert(tyE == Ity_I32 || tyE == Ity_I16 || tyE == Ity_I8);
+ assign(expTmp, expVal);
+ cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
+ NULL, mkexpr(expTmp), NULL, newVal );
+ stmt( IRStmt_CAS(cas) );
+ stmt( IRStmt_Exit(
+ binop( mkSizedOp(tyE,Iop_CmpNE8), mkexpr(oldTmp), mkexpr(expTmp) ),
+ Ijk_Boring, /*Ijk_NoRedir*/
+ IRConst_U32( restart_point )
+ ));
+}
+
/*------------------------------------------------------------*/
/*--- Helpers for %eflags. ---*/
/*------------------------------------------------------------*/
@@ -1039,9 +1095,28 @@
/* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
appropriately.
+
+ Optionally, generate a store for the 'tres' value. This can either
+ be a normal store, or it can be a cas-with-possible-failure style
+ store:
+
+ if taddr is IRTemp_INVALID, then no store is generated.
+
+ if taddr is not IRTemp_INVALID, then a store (using taddr as
+ the address) is generated:
+
+ if texpVal is IRTemp_INVALID then a normal store is
+ generated, and restart_point must be zero (it is irrelevant).
+
+ if texpVal is not IRTemp_INVALID then a cas-style store is
+ generated. texpVal is the expected value, restart_point
+ is the restart point if the store fails, and texpVal must
+ have the same type as tres.
*/
static void helper_ADC ( Int sz,
- IRTemp tres, IRTemp ta1, IRTemp ta2 )
+ IRTemp tres, IRTemp ta1, IRTemp ta2,
+ /* info about optional store: */
+ IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
{
UInt thunkOp;
IRType ty = szToITy(sz);
@@ -1050,6 +1125,7 @@
IROp plus = mkSizedOp(ty, Iop_Add8);
IROp xor = mkSizedOp(ty, Iop_Xor8);
+ vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
vassert(sz == 1 || sz == 2 || sz == 4);
thunkOp = sz==4 ? X86G_CC_OP_ADCL
: (sz==2 ? X86G_CC_OP_ADCW : X86G_CC_OP_ADCB);
@@ -1065,6 +1141,20 @@
binop(plus,mkexpr(ta1),mkexpr(ta2)),
mkexpr(oldcn)) );
+ /* Possibly generate a store of 'tres' to 'taddr'. See comment at
+ start of this function. */
+ if (taddr != IRTemp_INVALID) {
+ if (texpVal == IRTemp_INVALID) {
+ vassert(restart_point == 0);
+ storeLE( mkexpr(taddr), mkexpr(tres) );
+ } else {
+ vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
+ /* .. and hence 'texpVal' has the same type as 'tres'. */
+ casLE( mkexpr(taddr),
+ mkexpr(texpVal), mkexpr(tres), restart_point );
+ }
+ }
+
stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1)) ));
stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
@@ -1074,10 +1164,13 @@
/* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
- appropriately.
+ appropriately. As with helper_ADC, possibly generate a store of
+ the result -- see comments on helper_ADC for details.
*/
static void helper_SBB ( Int sz,
- IRTemp tres, IRTemp ta1, IRTemp ta2 )
+ IRTemp tres, IRTemp ta1, IRTemp ta2,
+ /* info about optional store: */
+ IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
{
UInt thunkOp;
IRType ty = szToITy(sz);
@@ -1086,6 +1179,7 @@
IROp minus = mkSizedOp(ty, Iop_Sub8);
IROp xor = mkSizedOp(ty, Iop_Xor8);
+ vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
vassert(sz == 1 || sz == 2 || sz == 4);
thunkOp = sz==4 ? X86G_CC_OP_SBBL
: (sz==2 ? X86G_CC_OP_SBBW : X86G_CC_OP_SBBB);
@@ -1101,6 +1195,20 @@
binop(minus,mkexpr(ta1),mkexpr(ta2)),
mkexpr(oldcn)) );
+ /* Possibly generate a store of 'tres' to 'taddr'. See comment at
+ start of this function. */
+ if (taddr != IRTemp_INVALID) {
+ if (texpVal == IRTemp_INVALID) {
+ vassert(restart_point == 0);
+ storeLE( mkexpr(taddr), mkexpr(tres) );
+ } else {
+ vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
+ /* .. and hence 'texpVal' has the same type as 'tres'. */
+ casLE( mkexpr(taddr),
+ mkexpr(texpVal), mkexpr(tres), restart_point );
+ }
+ }
+
stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1) )) );
stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
@@ -1723,11 +1831,13 @@
assign( src, getIReg(size,eregOfRM(rm)) );
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIReg(size, gregOfRM(rm), mkexpr(dst1));
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIReg(size, gregOfRM(rm), mkexpr(dst1));
} else {
assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
@@ -1750,11 +1860,13 @@
assign( src, loadLE(szToITy(size), mkexpr(addr)) );
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIReg(size, gregOfRM(rm), mkexpr(dst1));
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIReg(size, gregOfRM(rm), mkexpr(dst1));
} else {
assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
@@ -1795,6 +1907,7 @@
*/
static
UInt dis_op2_G_E ( UChar sorb,
+ Bool locked,
Bool addSubCarry,
IROp op8,
Bool keep,
@@ -1831,11 +1944,13 @@
assign(src, getIReg(size,gregOfRM(rm)));
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIReg(size, eregOfRM(rm), mkexpr(dst1));
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIReg(size, eregOfRM(rm), mkexpr(dst1));
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
@@ -1860,20 +1975,43 @@
assign(src, getIReg(size,gregOfRM(rm)));
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
- storeLE(mkexpr(addr), mkexpr(dst1));
+ if (locked) {
+ /* cas-style store */
+ helper_ADC( size, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_ADC( size, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
- storeLE(mkexpr(addr), mkexpr(dst1));
+ if (locked) {
+ /* cas-style store */
+ helper_SBB( size, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_SBB( size, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (keep) {
+ if (locked) {
+ if (0) vex_printf("locked case\n" );
+ casLE( mkexpr(addr),
+ mkexpr(dst0)/*expval*/,
+ mkexpr(dst1)/*newval*/, guest_EIP_curr_instr );
+ } else {
+ if (0) vex_printf("nonlocked case\n");
+ storeLE(mkexpr(addr), mkexpr(dst1));
+ }
+ }
if (isAddSub(op8))
setFlags_DEP1_DEP2(op8, dst0, src, ty);
else
setFlags_DEP1(op8, dst1, ty);
- if (keep)
- storeLE(mkexpr(addr), mkexpr(dst1));
}
DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
@@ -2000,11 +2138,13 @@
}
else
if (op8 == Iop_Add8 && carrying) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
}
else
if (op8 == Iop_Sub8 && carrying) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
}
else
vpanic("dis_op_imm_A(x86,guest)");
@@ -2099,7 +2239,7 @@
static
-UInt dis_Grp1 ( UChar sorb,
+UInt dis_Grp1 ( UChar sorb, Bool locked,
Int delta, UChar modrm,
Int am_sz, Int d_sz, Int sz, UInt d32 )
{
@@ -2130,10 +2270,12 @@
assign(src, mkU(ty,d32 & mask));
if (gregOfRM(modrm) == 2 /* ADC */) {
- helper_ADC( sz, dst1, dst0, src );
+ helper_ADC( sz, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
} else
if (gregOfRM(modrm) == 3 /* SBB */) {
- helper_SBB( sz, dst1, dst0, src );
+ helper_SBB( sz, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
if (isAddSub(op8))
@@ -2155,21 +2297,43 @@
assign(src, mkU(ty,d32 & mask));
if (gregOfRM(modrm) == 2 /* ADC */) {
- helper_ADC( sz, dst1, dst0, src );
+ if (locked) {
+ /* cas-style store */
+ helper_ADC( sz, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_ADC( sz, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else
if (gregOfRM(modrm) == 3 /* SBB */) {
- helper_SBB( sz, dst1, dst0, src );
+ if (locked) {
+ /* cas-style store */
+ helper_SBB( sz, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_SBB( sz, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (gregOfRM(modrm) < 7) {
+ if (locked) {
+ casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
+ mkexpr(dst1)/*newVal*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr), mkexpr(dst1));
+ }
+ }
if (isAddSub(op8))
setFlags_DEP1_DEP2(op8, dst0, src, ty);
else
setFlags_DEP1(op8, dst1, ty);
}
- if (gregOfRM(modrm) < 7)
- storeLE(mkexpr(addr), mkexpr(dst1));
-
delta += (len+d_sz);
DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz),
d32, dis_buf);
@@ -2421,6 +2585,7 @@
/* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
static
UInt dis_Grp8_Imm ( UChar sorb,
+ Bool locked,
Int delta, UChar modrm,
Int am_sz, Int sz, UInt src_val,
Bool* decode_OK )
@@ -2477,20 +2642,6 @@
src_val, dis_buf);
}
- /* Copy relevant bit from t2 into the carry flag. */
- /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
- stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
- stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
- stmt( IRStmt_Put(
- OFFB_CC_DEP1,
- binop(Iop_And32,
- binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)),
- mkU32(1))
- ));
- /* Set NDEP even though it isn't used. This makes redundant-PUT
- elimination of previous stores to this field work better. */
- stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
-
/* Compute the new value into t2m, if non-BT. */
switch (gregOfRM(modrm)) {
case 4: /* BT */
@@ -2509,15 +2660,38 @@
vassert(0);
}
- /* Write the result back, if non-BT. */
+ /* Write the result back, if non-BT. If the CAS fails then we
+ side-exit from the trace at this point, and so the flag state is
+ not affected. This is of course as required. */
if (gregOfRM(modrm) != 4 /* BT */) {
if (epartIsReg(modrm)) {
- putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m)));
+ putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m)));
} else {
- storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
+ if (locked) {
+ casLE( mkexpr(t_addr),
+ narrowTo(ty, mkexpr(t2))/*expd*/,
+ narrowTo(ty, mkexpr(t2m))/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
+ }
}
}
+ /* Copy relevant bit from t2 into the carry flag. */
+ /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)),
+ mkU32(1))
+ ));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+
return delta;
}
@@ -2585,7 +2759,7 @@
/* Group 3 extended opcodes. */
static
-UInt dis_Grp3 ( UChar sorb, Int sz, Int delta, Bool* decode_OK )
+UInt dis_Grp3 ( UChar sorb, Bool locked, Int sz, Int delta, Bool* decode_OK )
{
UInt d32;
UChar modrm;
@@ -2599,6 +2773,13 @@
*decode_OK = True; /* may change this later */
modrm = getIByte(delta);
+
+ if (locked && (gregOfRM(modrm) != 2 && gregOfRM(modrm) != 3)) {
+ /* LOCK prefix only allowed with not and neg subopcodes */
+ *decode_OK = False;
+ return delta;
+ }
+
if (epartIsReg(modrm)) {
switch (gregOfRM(modrm)) {
case 0: { /* TEST */
@@ -2686,7 +2867,14 @@
*decode_OK = False;
break;
case 2: /* NOT */
- storeLE( mkexpr(addr), unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
+ dst1 = newTemp(ty);
+ assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
+ if (locked) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(dst1) );
+ }
DIP("not%c %s\n", nameISize(sz), dis_buf);
break;
case 3: /* NEG */
@@ -2695,9 +2883,15 @@
dst1 = newTemp(ty);
assign(dst0, mkU(ty,0));
assign(src, mkexpr(t1));
- assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src)));
+ assign(dst1, binop(mkSizedOp(ty,Iop_Sub8),
+ mkexpr(dst0), mkexpr(src)));
+ if (locked) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(dst1) );
+ }
setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
- storeLE( mkexpr(addr), mkexpr(dst1) );
DIP("neg%c %s\n", nameISize(sz), dis_buf);
break;
case 4: /* MUL */
@@ -2725,7 +2919,7 @@
/* Group 4 extended opcodes. */
static
-UInt dis_Grp4 ( UChar sorb, Int delta, Bool* decode_OK )
+UInt dis_Grp4 ( UChar sorb, Bool locked, Int delta, Bool* decode_OK )
{
Int alen;
UChar modrm;
@@ -2737,6 +2931,13 @@
*decode_OK = True;
modrm = getIByte(delta);
+
+ if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
+ /* LOCK prefix only allowed with inc and dec subopcodes */
+ *decode_OK = False;
+ return delta;
+ }
+
if (epartIsReg(modrm)) {
assign(t1, getIReg(1, eregOfRM(modrm)));
switch (gregOfRM(modrm)) {
@@ -2763,12 +2964,22 @@
switch (gregOfRM(modrm)) {
case 0: /* INC */
assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
- storeLE( mkexpr(addr), mkexpr(t2) );
+ if (locked) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(t2) );
+ }
setFlags_INC_DEC( True, t2, ty );
break;
case 1: /* DEC */
assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
- storeLE( mkexpr(addr), mkexpr(t2) );
+ if (locked) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(t2) );
+ }
setFlags_INC_DEC( False, t2, ty );
break;
default:
@@ -2784,7 +2995,7 @@
/* Group 5 extended opcodes. */
static
-UInt dis_Grp5 ( UChar sorb, Int sz, Int delta,
+UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta,
DisResult* dres, Bool* decode_OK )
{
Int len;
@@ -2798,6 +3009,13 @@
*decode_OK = True;
modrm = getIByte(delta);
+
+ if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
+ /* LOCK prefix only allowed with inc and dec subopcodes */
+ *decode_OK = False;
+ return delta;
+ }
+
if (epartIsReg(modrm)) {
assign(t1, getIReg(sz,eregOfRM(modrm)));
switch (gregOfRM(modrm)) {
@@ -2853,15 +3071,25 @@
t2 = newTemp(ty);
assign(t2, binop(mkSizedOp(ty,Iop_Add8),
mkexpr(t1), mkU(ty,1)));
+ if (locked) {
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr),mkexpr(t2));
+ }
setFlags_INC_DEC( True, t2, ty );
- storeLE(mkexpr(addr),mkexpr(t2));
break;
case 1: /* DEC */
t2 = newTemp(ty);
assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
mkexpr(t1), mkU(ty,1)));
+ if (locked) {
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr),mkexpr(t2));
+ }
setFlags_INC_DEC( False, t2, ty );
- storeLE(mkexpr(addr),mkexpr(t2));
break;
case 2: /* call Ev */
vassert(sz == 4);
@@ -5898,20 +6126,22 @@
static
-UInt dis_bt_G_E ( UChar sorb, Int sz, Int delta, BtOp op )
+UInt dis_bt_G_E ( UChar sorb, Bool locked, Int sz, Int delta, BtOp op )
{
HChar dis_buf[50];
UChar modrm;
Int len;
IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
- t_addr1, t_esp, t_mask;
+ t_addr1, t_esp, t_mask, t_new;
vassert(sz == 2 || sz == 4);
t_fetched = t_bitno0 = t_bitno1 = t_bitno2
- = t_addr0 = t_addr1 = t_esp = t_mask = IRTemp_INVALID;
+ = t_addr0 = t_addr1 = t_esp
+ = t_mask = t_new = IRTemp_INVALID;
t_fetched = newTemp(Ity_I8);
+ t_new = newTemp(Ity_I8);
t_bitno0 = newTemp(Ity_I32);
t_bitno1 = newTemp(Ity_I32);
t_bitno2 = newTemp(Ity_I8);
@@ -5976,24 +6206,29 @@
if (op != BtOpNone) {
switch (op) {
- case BtOpSet:
- storeLE( mkexpr(t_addr1),
- binop(Iop_Or8, mkexpr(t_fetched),
- mkexpr(t_mask)) );
+ case BtOpSet:
+ assign( t_new,
+ binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
break;
- case BtOpComp:
- storeLE( mkexpr(t_addr1),
- binop(Iop_Xor8, mkexpr(t_fetched),
- mkexpr(t_mask)) );
+ case BtOpComp:
+ assign( t_new,
+ binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
break;
- case BtOpReset:
- storeLE( mkexpr(t_addr1),
- binop(Iop_And8, mkexpr(t_fetched),
- unop(Iop_Not8, mkexpr(t_mask))) );
+ case BtOpReset:
+ assign( t_new,
+ binop(Iop_And8, mkexpr(t_fetched),
+ unop(Iop_Not8, mkexpr(t_mask))) );
break;
default:
vpanic("dis_bt_G_E(x86)");
}
+ if (locked && !epartIsReg(modrm)) {
+ casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
+ mkexpr(t_new)/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE( mkexpr(t_addr1), mkexpr(t_new) );
+ }
}
/* Side effect done; now get selected bit into Carry flag */
@@ -6208,6 +6443,7 @@
static
UInt dis_cmpxchg_G_E ( UChar sorb,
+ Bool locked,
Int size,
Int delta0 )
{
@@ -6217,7 +6453,6 @@
IRType ty = szToITy(size);
IRTemp acc = newTemp(ty);
IRTemp src = newTemp(ty);
- //IRTemp res = newTemp(ty);
IRTemp dest = newTemp(ty);
IRTemp dest2 = newTemp(ty);
IRTemp acc2 = newTemp(ty);
@@ -6225,107 +6460,75 @@
IRTemp addr = IRTemp_INVALID;
UChar rm = getUChar(delta0);
+ /* There are 3 cases to consider:
+
+ reg-reg: ignore any lock prefix, generate sequence based
+ on Mux0X
+
+ reg-mem, not locked: ignore any lock prefix, generate sequence
+ based on Mux0X
+
+ reg-mem, locked: use IRCAS
+ */
if (epartIsReg(rm)) {
+ /* case 1 */
assign( dest, getIReg(size, eregOfRM(rm)) );
delta0++;
+ assign( src, getIReg(size, gregOfRM(rm)) );
+ assign( acc, getIReg(size, R_EAX) );
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
+ assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIReg(size, R_EAX, mkexpr(acc2));
+ putIReg(size, eregOfRM(rm), mkexpr(dest2));
DIP("cmpxchg%c %s,%s\n", nameISize(size),
nameIReg(size,gregOfRM(rm)),
nameIReg(size,eregOfRM(rm)) );
- } else {
+ }
+ else if (!epartIsReg(rm) && !locked) {
+ /* case 2 */
addr = disAMode ( &len, sorb, delta0, dis_buf );
assign( dest, loadLE(ty, mkexpr(addr)) );
delta0 += len;
+ assign( src, getIReg(size, gregOfRM(rm)) );
+ assign( acc, getIReg(size, R_EAX) );
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
+ assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIReg(size, R_EAX, mkexpr(acc2));
+ storeLE( mkexpr(addr), mkexpr(dest2) );
DIP("cmpxchg%c %s,%s\n", nameISize(size),
nameIReg(size,gregOfRM(rm)), dis_buf);
}
-
- assign( src, getIReg(size, gregOfRM(rm)) );
- assign( acc, getIReg(size, R_EAX) );
- //assign( res, binop( mkSizedOp(ty,Iop_Sub8), mkexpr(acc), mkexpr(dest) ));
- setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
- assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
- assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
- assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
- putIReg(size, R_EAX, mkexpr(acc2));
-
- if (epartIsReg(rm)) {
- putIReg(size, eregOfRM(rm), mkexpr(dest2));
- } else {
- storeLE( mkexpr(addr), mkexpr(dest2) );
+ else if (!epartIsReg(rm) && locked) {
+ /* case 3 */
+ /* src is new value. acc is expected value. dest is old value.
+ Compute success from the output of the IRCAS, and steer the
+ new value for EAX accordingly: in case of success, EAX is
+ unchanged. */
+ addr = disAMode ( &len, sorb, delta0, dis_buf );
+ delta0 += len;
+ assign( src, getIReg(size, gregOfRM(rm)) );
+ assign( acc, getIReg(size, R_EAX) );
+ stmt( IRStmt_CAS(
+ mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
+ NULL, mkexpr(acc), NULL, mkexpr(src) )
+ ));
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIReg(size, R_EAX, mkexpr(acc2));
+ DIP("lock cmpxchg%c %s,%s\n", nameISize(size),
+ nameIReg(size,gregOfRM(rm)), dis_buf);
}
+ else vassert(0);
return delta0;
}
-//-- static
-//-- Addr dis_cmpxchg8b ( UCodeBlock* cb,
-//-- UChar sorb,
-//-- Addr eip0 )
-//-- {
-//-- Int tal, tah, junkl, junkh, destl, desth, srcl, srch, accl, acch;
-//-- HChar dis_buf[50];
-//-- UChar rm;
-//-- UInt pair;
-//--
-//-- rm = getUChar(eip0);
-//-- accl = newTemp(cb);
-//-- acch = newTemp(cb);
-//-- srcl = newTemp(cb);
-//-- srch = newTemp(cb);
-//-- destl = newTemp(cb);
-//-- desth = newTemp(cb);
-//-- junkl = newTemp(cb);
-//-- junkh = newTemp(cb);
-//--
-//-- vg_assert(!epartIsReg(rm));
-//--
-//-- pair = disAMode ( cb, sorb, eip0, dis_buf );
-//-- tal = LOW24(pair);
-//-- tah = newTemp(cb);
-//-- uInstr2(cb, MOV, 4, TempReg, tal, TempReg, tah);
-//-- uInstr2(cb, ADD, 4, Literal, 0, TempReg, tah);
-//-- uLiteral(cb, 4);
-//-- eip0 += HI8(pair);
-//-- DIP("cmpxchg8b %s\n", dis_buf);
-//--
-//-- uInstr0(cb, CALLM_S, 0);
-//--
-//-- uInstr2(cb, LOAD, 4, TempReg, tah, TempReg, desth);
-//-- uInstr1(cb, PUSH, 4, TempReg, desth);
-//-- uInstr2(cb, LOAD, 4, TempReg, tal, TempReg, destl);
-//-- uInstr1(cb, PUSH, 4, TempReg, destl);
-//-- uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, srch);
-//-- uInstr1(cb, PUSH, 4, TempReg, srch);
-//-- uInstr2(cb, GET, 4, ArchReg, R_EBX, TempReg, srcl);
-//-- uInstr1(cb, PUSH, 4, TempReg, srcl);
-//-- uInstr2(cb, GET, 4, ArchReg, R_EDX, TempReg, acch);
-//-- uInstr1(cb, PUSH, 4, TempReg, acch);
-//-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, accl);
-//-- uInstr1(cb, PUSH, 4, TempReg, accl);
-//--
-//-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_cmpxchg8b));
-//-- uFlagsRWU(cb, FlagsEmpty, FlagZ, FlagsEmpty);
-//--
-//-- uInstr1(cb, POP, 4, TempReg, accl);
-//-- uInstr2(cb, PUT, 4, TempReg, accl, ArchReg, R_EAX);
-//-- uInstr1(cb, POP, 4, TempReg, acch);
-//-- uInstr2(cb, PUT, 4, TempReg, acch, ArchReg, R_EDX);
-//-- uInstr1(cb, POP, 4, TempReg, srcl);
-//-- uInstr2(cb, PUT, 4, TempReg, srcl, ArchReg, R_EBX);
-//-- uInstr1(cb, POP, 4, TempReg, srch);
-//-- uInstr2(cb, PUT, 4, TempReg, srch, ArchReg, R_ECX);
-//-- uInstr1(cb, POP, 4, TempReg, destl);
-//-- uInstr2(cb, STORE, 4, TempReg, destl, TempReg, tal);
-//-- uInstr1(cb, POP, 4, TempReg, desth);
-//-- uInstr2(cb, STORE, 4, TempReg, desth, TempReg, tah);
-//--
-//-- uInstr0(cb, CALLM_E, 0);
-//--
-//-- return eip0;
-//-- }
-
-
/* Handle conditional move instructions of the form
cmovcc E(reg-or-mem), G(reg)
@@ -6397,48 +6600,67 @@
static
-UInt dis_xadd_G_E ( UChar sorb, Int sz, Int delta0, Bool* decodeOK )
+UInt dis_xadd_G_E ( UChar sorb, Bool locked, Int sz, Int delta0,
+ Bool* decodeOK )
{
Int len;
UChar rm = getIByte(delta0);
HChar dis_buf[50];
- // Int tmpd = newTemp(cb);
- //Int tmpt = newTemp(cb);
-
IRType ty = szToITy(sz);
IRTemp tmpd = newTemp(ty);
IRTemp tmpt0 = newTemp(ty);
IRTemp tmpt1 = newTemp(ty);
+ /* There are 3 cases to consider:
+
+ reg-reg: currently unhandled
+
+ reg-mem, not locked: ignore any lock prefix, generate 'naive'
+ (non-atomic) sequence
+
+ reg-mem, locked: use IRCAS
+ */
+
if (epartIsReg(rm)) {
+ /* case 1 */
*decodeOK = False;
return delta0;
/* Currently we don't handle xadd_G_E with register operand. */
-#if 0
- uInstr2(cb, GET, sz, ArchReg, eregOfRM(rm), TempReg, tmpd);
- uInstr2(cb, GET, sz, ArchReg, gregOfRM(rm), TempReg, tmpt);
- uInstr2(cb, ADD, sz, TempReg, tmpd, TempReg, tmpt);
- setFlagsFromUOpcode(cb, ADD);
- uInstr2(cb, PUT, sz, TempReg, tmpd, ArchReg, gregOfRM(rm));
- uInstr2(cb, PUT, sz, TempReg, tmpt, ArchReg, eregOfRM(rm));
+ }
+ else if (!epartIsReg(rm) && !locked) {
+ /* case 2 */
+ IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
+ assign( tmpd, loadLE(ty, mkexpr(addr)) );
+ assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
+ assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(tmpd), mkexpr(tmpt0)) );
+ storeLE( mkexpr(addr), mkexpr(tmpt1) );
+ setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
+ putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
DIP("xadd%c %s, %s\n",
- nameISize(sz), nameIReg(sz,gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
- return 1+eip0;
-#endif
- } else {
+ nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
+ *decodeOK = True;
+ return len+delta0;
+ }
+ else if (!epartIsReg(rm) && locked) {
+ /* case 3 */
IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
assign( tmpd, loadLE(ty, mkexpr(addr)) );
assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
- assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), mkexpr(tmpd), mkexpr(tmpt0)) );
+ assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(tmpd), mkexpr(tmpt0)) );
+ casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
+ mkexpr(tmpt1)/*newVal*/, guest_EIP_curr_instr );
setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
- storeLE( mkexpr(addr), mkexpr(tmpt1) );
putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
DIP("xadd%c %s, %s\n",
nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
*decodeOK = True;
return len+delta0;
}
+ /*UNREACHED*/
+ vassert(0);
}
/* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
@@ -7424,16 +7646,15 @@
/* Helper for deciding whether a given insn (starting at the opcode
byte) may validly be used with a LOCK prefix. The following insns
may be used with LOCK when their destination operand is in memory.
- Note, this is slightly too permissive. Oh well. Note also, AFAICS
- this is exactly the same for both 32-bit and 64-bit mode.
+ AFAICS this is exactly the same for both 32-bit and 64-bit mode.
- ADD 80 /0, 81 /0, 83 /0, 00, 01, 02, 03
- OR 80 /1, 81 /1, 83 /1, 08, 09, 0A, 0B
- ADC 80 /2, 81 /2, 83 /2, 10, 11, 12, 13
- SBB 81 /3, 81 /3, 83 /3, 18, 19, 1A, 1B
- AND 80 /4, 81 /4, 83 /4, 20, 21, 22, 23
- SUB 80 /5, 81 /5, 83 /5, 28, 29, 2A, 2B
- XOR 80 /6, 81 /6, 83 /6, 30, 31, 32, 33
+ ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
+ OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
+ ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
+ SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
+ AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
+ SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
+ XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
DEC FE /1, FF /1
INC FE /0, FF /0
@@ -7441,7 +7662,7 @@
NEG F6 /3, F7 /3
NOT F6 /2, F7 /2
- XCHG 86, 87
+ XCHG 86, 87
BTC 0F BB, 0F BA /7
BTR 0F B3, 0F BA /6
@@ -7451,52 +7672,93 @@
CMPXCHG8B 0F C7 /1
XADD 0F C0, 0F C1
+
+ ------------------------------
+
+ 80 /0 = addb $imm8, rm8
+ 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
+ 82 /0 = addb $imm8, rm8
+ 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
+
+ 00 = addb r8, rm8
+ 01 = addl r32, rm32 and addw r16, rm16
+
+ Same for ADD OR ADC SBB AND SUB XOR
+
+ FE /1 = dec rm8
+ FF /1 = dec rm32 and dec rm16
+
+ FE /0 = inc rm8
+ FF /0 = inc rm32 and inc rm16
+
+ F6 /3 = neg rm8
+ F7 /3 = neg rm32 and neg rm16
+
+ F6 /2 = not rm8
+ F7 /2 = not rm32 and not rm16
+
+ 0F BB = btcw r16, rm16 and btcl r32, rm32
+ OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
+
+ Same for BTS, BTR
*/
static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
{
switch (opc[0]) {
- case 0x00: case 0x01: case 0x02: case 0x03: return True;
- case 0x08: case 0x09: case 0x0A: case 0x0B: return True;
- case 0x10: case 0x11: case 0x12: case 0x13: return True;
- case 0x18: case 0x19: case 0x1A: case 0x1B: return True;
- case 0x20: case 0x21: case 0x22: case 0x23: return True;
- case 0x28: case 0x29: case 0x2A: case 0x2B: return True;
- case 0x30: case 0x31: case 0x32: case 0x33: return True;
+ case 0x00: case 0x01: case 0x08: case 0x09:
+ case 0x10: case 0x11: case 0x18: case 0x19:
+ case 0x20: case 0x21: case 0x28: case 0x29:
+ case 0x30: case 0x31:
+ if (!epartIsReg(opc[1]))
+ return True;
+ break;
- case 0x80: case 0x81: case 0x83:
- if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6)
+ case 0x80: case 0x81: case 0x82: case 0x83:
+ if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6
+ && !epartIsReg(opc[1]))
return True;
break;
case 0xFE: case 0xFF:
- if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1)
+ if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1
+ && !epartIsReg(opc[1]))
return True;
break;
case 0xF6: case 0xF7:
- if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3)
+ if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3
+ && !epartIsReg(opc[1]))
return True;
break;
case 0x86: case 0x87:
- return True;
+ if (!epartIsReg(opc[1]))
+ return True;
+ break;
case 0x0F: {
switch (opc[1]) {
case 0xBB: case 0xB3: case 0xAB:
- return True;
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
case 0xBA:
- if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7)
+ if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7
+ && !epartIsReg(opc[2]))
return True;
break;
case 0xB0: case 0xB1:
- return True;
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
case 0xC7:
- if (gregOfRM(opc[2]) == 1)
+ if (gregOfRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
return True;
break;
case 0xC0: case 0xC1:
- return True;
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
default:
break;
} /* switch (opc[1]) */
@@ -7515,11 +7777,20 @@
/*--- Disassemble a single instruction ---*/
/*------------------------------------------------------------*/
-/* Disassemble a single instruction into IR. The instruction
- is located in host memory at &guest_code[delta]. */
-
+/* Disassemble a single instruction into IR. The instruction is
+ located in host memory at &guest_code[delta]. *expect_CAS is set
+ to True if the resulting IR is expected to contain an IRCAS
+ statement, and False if it's not expected to. This makes it
+ possible for the caller of disInstr_X86_WRK to check that
+ LOCK-prefixed instructions are at least plausibly translated, in
+ that it becomes possible to check that a (validly) LOCK-prefixed
+ instruction generates a translation containing an IRCAS, and
+ instructions without LOCK prefixes don't generate translations
+ containing an IRCAS.
+*/
static
-DisResult disInstr_X86_WRK (
+DisResult disInstr_X86_WRK (
+ /*OUT*/Bool* expect_CAS,
Bool put_IP,
Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
void* callback_opaque,
@@ -7564,8 +7835,11 @@
dres.len = 0;
dres.continueAt = 0;
+ *expect_CAS = False;
+
addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID;
+ vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr);
DIP("\t0x%x: ", guest_EIP_bbstart+delta);
/* We may be asked to update the guest EIP before going further. */
@@ -7666,6 +7940,7 @@
break;
case 0xF0:
pfx_lock = True;
+ *expect_CAS = True;
break;
case 0x3E: /* %DS: */
case 0x26: /* %ES: */
@@ -7706,41 +7981,13 @@
leading F2 or F3. Check that any LOCK prefix is actually
allowed. */
- /* Kludge re LOCK prefixes. We assume here that all code generated
- by Vex is going to be run in a single-threaded context, in other
- words that concurrent executions of Vex-generated translations
- will not happen. So we don't need to worry too much about
- preserving atomicity. However, mark the fact that the notional
- hardware bus lock is being acquired (and, after the insn,
- released), so that thread checking tools know this is a locked
- insn.
-
- We check for, and immediately reject, (most) inappropriate uses
- of the LOCK prefix. Later (at decode_failure: and
- decode_success:), if we've added a BusLock event, then we will
- follow up with a BusUnlock event. How do we know execution will
- actually ever get to the BusUnlock event? Because
- can_be_used_with_LOCK_prefix rejects all control-flow changing
- instructions.
-
- One loophole, though: if a LOCK prefix insn (seg)faults, then
- the BusUnlock event will never be reached. This could cause
- tools which track bus hardware lock to lose track. Really, we
- should explicitly release the lock after every insn, but that's
- obviously way too expensive. Really, any tool which tracks the
- state of the bus lock needs to ask V's core/tool interface to
- notify it of signal deliveries. On delivery of SIGSEGV to the
- guest, the tool will be notified, in which case it should
- release the bus hardware lock if it is held.
-
- Note, guest-amd64/toIR.c contains identical logic.
- */
if (pfx_lock) {
if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
stmt( IRStmt_MBE(Imbe_BusLock) );
unlock_bus_after_insn = True;
DIP("lock ");
} else {
+ *expect_CAS = False;
goto decode_failure;
}
}
@@ -13006,59 +13253,75 @@
/* ------------------------ opl Gv, Ev ----------------- */
case 0x00: /* ADD Gb,Eb */
- delta = dis_op2_G_E ( sorb, False, Iop_Add8, True, 1, delta, "add" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Add8, True, 1, delta, "add" );
break;
case 0x01: /* ADD Gv,Ev */
- delta = dis_op2_G_E ( sorb, False, Iop_Add8, True, sz, delta, "add" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Add8, True, sz, delta, "add" );
break;
case 0x08: /* OR Gb,Eb */
- delta = dis_op2_G_E ( sorb, False, Iop_Or8, True, 1, delta, "or" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Or8, True, 1, delta, "or" );
break;
case 0x09: /* OR Gv,Ev */
- delta = dis_op2_G_E ( sorb, False, Iop_Or8, True, sz, delta, "or" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Or8, True, sz, delta, "or" );
break;
case 0x10: /* ADC Gb,Eb */
- delta = dis_op2_G_E ( sorb, True, Iop_Add8, True, 1, delta, "adc" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, True,
+ Iop_Add8, True, 1, delta, "adc" );
break;
case 0x11: /* ADC Gv,Ev */
- delta = dis_op2_G_E ( sorb, True, Iop_Add8, True, sz, delta, "adc" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, True,
+ Iop_Add8, True, sz, delta, "adc" );
break;
case 0x18: /* SBB Gb,Eb */
- delta = dis_op2_G_E ( sorb, True, Iop_Sub8, True, 1, delta, "sbb" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, True,
+ Iop_Sub8, True, 1, delta, "sbb" );
break;
case 0x19: /* SBB Gv,Ev */
- delta = dis_op2_G_E ( sorb, True, Iop_Sub8, True, sz, delta, "sbb" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, True,
+ Iop_Sub8, True, sz, delta, "sbb" );
break;
case 0x20: /* AND Gb,Eb */
- delta = dis_op2_G_E ( sorb, False, Iop_And8, True, 1, delta, "and" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_And8, True, 1, delta, "and" );
break;
case 0x21: /* AND Gv,Ev */
- delta = dis_op2_G_E ( sorb, False, Iop_And8, True, sz, delta, "and" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_And8, True, sz, delta, "and" );
break;
case 0x28: /* SUB Gb,Eb */
- delta = dis_op2_G_E ( sorb, False, Iop_Sub8, True, 1, delta, "sub" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Sub8, True, 1, delta, "sub" );
break;
case 0x29: /* SUB Gv,Ev */
- delta = dis_op2_G_E ( sorb, False, Iop_Sub8, True, sz, delta, "sub" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Sub8, True, sz, delta, "sub" );
break;
case 0x30: /* XOR Gb,Eb */
- delta = dis_op2_G_E ( sorb, False, Iop_Xor8, True, 1, delta, "xor" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Xor8, True, 1, delta, "xor" );
break;
case 0x31: /* XOR Gv,Ev */
- delta = dis_op2_G_E ( sorb, False, Iop_Xor8, True, sz, delta, "xor" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Xor8, True, sz, delta, "xor" );
break;
case 0x38: /* CMP Gb,Eb */
- delta = dis_op2_G_E ( sorb, False, Iop_Sub8, False, 1, delta, "cmp" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Sub8, False, 1, delta, "cmp" );
break;
case 0x39: /* CMP Gv,Ev */
- delta = dis_op2_G_E ( sorb, False, Iop_Sub8, False, sz, delta, "cmp" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Sub8, False, sz, delta, "cmp" );
break;
/* ------------------------ POP ------------------------ */
@@ -13527,6 +13790,7 @@
nameISize(sz), nameIReg(sz,gregOfRM(modrm)),
nameIReg(sz,eregOfRM(modrm)));
} else {
+ *expect_CAS = True;
/* Need to add IRStmt_MBE(Imbe_BusLock). */
if (pfx_lock) {
/* check it's already been taken care of */
@@ -13542,7 +13806,8 @@
addr = disAMode ( &alen, sorb, delta, dis_buf );
assign( t1, loadLE(ty,mkexpr(addr)) );
assign( t2, getIReg(sz,gregOfRM(modrm)) );
- storeLE( mkexpr(addr), mkexpr(t2) );
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
putIReg( sz, gregOfRM(modrm), mkexpr(t1) );
delta += alen;
DIP("xchg%c %s, %s\n", nameISize(sz),
@@ -13687,7 +13952,7 @@
sz = 1;
d_sz = 1;
d32 = getUChar(delta + am_sz);
- delta = dis_Grp1 ( sorb, delta, modrm, am_sz, d_sz, sz, d32 );
+ delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
break;
case 0x81: /* Grp1 Iv,Ev */
@@ -13695,7 +13960,7 @@
am_sz = lengthAMode(delta);
d_sz = sz;
d32 = getUDisp(d_sz, delta + am_sz);
- delta = dis_Grp1 ( sorb, delta, modrm, am_sz, d_sz, sz, d32 );
+ delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
break;
case 0x83: /* Grp1 Ib,Ev */
@@ -13703,7 +13968,7 @@
am_sz = lengthAMode(delta);
d_sz = 1;
d32 = getSDisp8(delta + am_sz);
- delta = dis_Grp1 ( sorb, delta, modrm, am_sz, d_sz, sz, d32 );
+ delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
break;
/* ------------------------ (Grp2 extensions) ---------- */
@@ -13786,14 +14051,14 @@
case 0xF6: { /* Grp3 Eb */
Bool decode_OK = True;
- delta = dis_Grp3 ( sorb, 1, delta, &decode_OK );
+ delta = dis_Grp3 ( sorb, pfx_lock, 1, delta, &decode_OK );
if (!decode_OK)
goto decode_failure;
break;
}
case 0xF7: { /* Grp3 Ev */
Bool decode_OK = True;
- delta = dis_Grp3 ( sorb, sz, delta, &decode_OK );
+ delta = dis_Grp3 ( sorb, pfx_lock, sz, delta, &decode_OK );
if (!decode_OK)
goto decode_failure;
break;
@@ -13803,7 +14068,7 @@
case 0xFE: { /* Grp4 Eb */
Bool decode_OK = True;
- delta = dis_Grp4 ( sorb, delta, &decode_OK );
+ delta = dis_Grp4 ( sorb, pfx_lock, delta, &decode_OK );
if (!decode_OK)
goto decode_failure;
break;
@@ -13813,7 +14078,7 @@
case 0xFF: { /* Grp5 Ev */
Bool decode_OK = True;
- delta = dis_Grp5 ( sorb, sz, delta, &dres, &decode_OK );
+ delta = dis_Grp5 ( sorb, pfx_lock, sz, delta, &dres, &decode_OK );
if (!decode_OK)
goto decode_failure;
break;
@@ -13832,8 +14097,8 @@
modrm = getUChar(delta);
am_sz = lengthAMode(delta);
d32 = getSDisp8(delta + am_sz);
- delta = dis_Grp8_Imm ( sorb, delta, modrm, am_sz, sz, d32,
- &decode_OK );
+ delta = dis_Grp8_Imm ( sorb, pfx_lock, delta, modrm,
+ am_sz, sz, d32, &decode_OK );
if (!decode_OK)
goto decode_failure;
break;
@@ -13885,16 +14150,16 @@
/* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
case 0xA3: /* BT Gv,Ev */
- delta = dis_bt_G_E ( sorb, sz, delta, BtOpNone );
+ delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpNone );
break;
case 0xB3: /* BTR Gv,Ev */
- delta = dis_bt_G_E ( sorb, sz, delta, BtOpReset );
+ delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpReset );
break;
case 0xAB: /* BTS Gv,Ev */
- delta = dis_bt_G_E ( sorb, sz, delta, BtOpSet );
+ delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpSet );
break;
case 0xBB: /* BTC Gv,Ev */
- delta = dis_bt_G_E ( sorb, sz, delta, BtOpComp );
+ delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpComp );
break;
/* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
@@ -13921,51 +14186,80 @@
/* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
case 0xB0: /* CMPXCHG Gb,Eb */
- delta = dis_cmpxchg_G_E ( sorb, 1, delta );
+ delta = dis_cmpxchg_G_E ( sorb, pfx_lock, 1, delta );
break;
case 0xB1: /* CMPXCHG Gv,Ev */
- delta = dis_cmpxchg_G_E ( sorb, sz, delta );
+ delta = dis_cmpxchg_G_E ( sorb, pfx_lock, sz, delta );
break;
case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */
- IRTemp m64_old = newTemp(Ity_I64);
- IRTemp m64_new = newTemp(Ity_I64);
- IRTemp da_old = newTemp(Ity_I64);
- IRTemp da_new = newTemp(Ity_I64);
- IRTemp cb_old = newTemp(Ity_I64);
+ IRTemp expdHi = newTemp(Ity_I32);
+ IRTemp expdLo = newTemp(Ity_I32);
+ IRTemp dataHi = newTemp(Ity_I32);
+ IRTemp dataLo = newTemp(Ity_I32);
+ IRTemp oldHi = newTemp(Ity_I32);
+ IRTemp oldLo = newTemp(Ity_I32);
IRTemp flags_old = newTemp(Ity_I32);
IRTemp flags_new = newTemp(Ity_I32);
- IRTemp cond = newTemp(Ity_I8);
+ IRTemp success = newTemp(Ity_I1);
+ /* Translate this using a DCAS, even if there is no LOCK
+ prefix. Life is too short to bother with generating two
+ different translations for the with/without-LOCK-prefix
+ cases. */
+ *expect_CAS = True;
+
/* Decode, and generate address. */
+ if (sz != 4) goto decode_failure;
modrm = getIByte(delta);
if (epartIsReg(modrm)) goto decode_failure;
if (gregOfRM(modrm) != 1) goto decode_failure;
addr = disAMode ( &alen, sorb, delta, dis_buf );
delta += alen;
- /* Fetch the old 64-bit values and compute the guard. */
- assign( m64_old, loadLE(Ity_I64, mkexpr(addr) ));
- assign( da_old, binop(Iop_32HLto64,
- getIReg(4,R_EDX), getIReg(4,R_EAX)) );
- assign( cb_old, binop(Iop_32HLto64,
- getIReg(4,R_ECX), getIReg(4,R_EBX)) );
+ /* Get the expected and new values. */
+ assign( expdHi, getIReg(4,R_EDX) );
+ assign( expdLo, getIReg(4,R_EAX) );
+ assign( dataHi, getIReg(4,R_ECX) );
+ assign( dataLo, getIReg(4,R_EBX) );
- assign( cond,
- unop(Iop_1Uto8,
- binop(Iop_CmpEQ64, mkexpr(da_old), mkexpr(m64_old))) );
+ /* Do the DCAS */
+ stmt( IRStmt_CAS(
+ mkIRCAS( oldHi, oldLo,
+ Iend_LE, mkexpr(addr),
+ mkexpr(expdHi), mkexpr(expdLo),
+ mkexpr(dataHi), mkexpr(dataLo)
+ )));
- /* Compute new %edx:%eax and m64 values, and put in place */
- assign( da_new,
- IRExpr_Mux0X(mkexpr(cond), mkexpr(m64_old), mkexpr(da_old)));
- assign( m64_new,
- IRExpr_Mux0X(mkexpr(cond), mkexpr(m64_old), mkexpr(cb_old)));
+ /* success when oldHi:oldLo == expdHi:expdLo */
+ assign( success,
+ binop(Iop_CmpEQ32,
+ binop(Iop_Or32,
+ binop(Iop_Xor32, mkexpr(oldHi), mkexpr(expdHi)),
+ binop(Iop_Xor32, mkexpr(oldLo), mkexpr(expdLo))
+ ),
+ mkU32(0)
+ ));
- putIReg(4, R_EDX, unop(Iop_64HIto32, mkexpr(da_new)) );
- putIReg(4, R_EAX, unop(Iop_64to32, mkexpr(da_new)) );
- storeLE( mkexpr(addr), mkexpr(m64_new) );
+ /* If the DCAS is successful, that is to say oldHi:oldLo ==
+ expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX,
+ which is where they came from originally. Both the actual
+ contents of these two regs, and any shadow values, are
+ unchanged. If the DCAS fails then we're putting into
+ EDX:EAX the value seen in memory. */
+ putIReg(4, R_EDX,
+ IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
+ mkexpr(oldHi),
+ mkexpr(expdHi)
+ ));
+ putIReg(4, R_EAX,
+ IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
+ mkexpr(oldLo),
+ mkexpr(expdLo)
+ ));
- /* Copy the guard into the Z flag and leave the others unchanged */
+ /* Copy the success bit into the Z flag and leave the others
+ unchanged */
assign( flags_old, widenUto32(mk_x86g_calculate_eflags_all()));
assign(
flags_new,
@@ -13974,7 +14268,7 @@
mkU32(~X86G_CC_MASK_Z)),
binop(Iop_Shl32,
binop(Iop_And32,
- unop(Iop_8Uto32, mkexpr(cond)), mkU32(1)),
+ unop(Iop_1Uto32, mkexpr(success)), mkU32(1)),
mkU8(X86G_CC_SHIFT_Z)) ));
stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
@@ -14300,13 +14594,13 @@
case 0xC0: { /* XADD Gb,Eb */
Bool decodeOK;
- delta = dis_xadd_G_E ( sorb, 1, delta, &decodeOK );
+ delta = dis_xadd_G_E ( sorb, pfx_lock, 1, delta, &decodeOK );
if (!decodeOK) goto decode_failure;
break;
}
case 0xC1: { /* XADD Gv,Ev */
Bool decodeOK;
- delta = dis_xadd_G_E ( sorb, sz, delta, &decodeOK );
+ delta = dis_xadd_G_E ( sorb, pfx_lock, sz, delta, &decodeOK );
if (!decodeOK) goto decode_failure;
break;
}
@@ -14437,6 +14731,12 @@
jmp_lit(Ijk_NoDecode, guest_EIP_curr_instr);
dres.whatNext = Dis_StopHere;
dres.len = 0;
+ /* We also need to say that a CAS is not expected now, regardless
+ of what it might have been set to at the start of the function,
+ since the IR that we've emitted just above (to synthesis a
+ SIGILL) does not involve any CAS, and presumably no other IR has
+ been emitted for this (non-decoded) insn. */
+ *expect_CAS = False;
return dres;
} /* switch (opc) for the main (primary) opcode switch. */
@@ -14473,6 +14773,8 @@
V...
[truncated message content] |
|
From: <sv...@va...> - 2009-05-21 21:51:47
|
Author: sewardj
Date: 2009-05-21 22:51:20 +0100 (Thu, 21 May 2009)
New Revision: 1895
Log:
Use the new IRStmt_IRCAS as the basis of the translation for all
LOCK-prefixed AMD64 instructions, and for xchg, which is implicitly
LOCK-prefixed.
This also involves dealing the the new VEX_HWCAPS_AMD64_CX16
capabilities bit. It adds a new cpuid emulation, for processors
that support neither SSE3 nor cmpxchg16b.
Modified:
branches/DCAS/priv/guest-amd64/gdefs.h
branches/DCAS/priv/guest-amd64/ghelpers.c
branches/DCAS/priv/guest-amd64/toIR.c
Modified: branches/DCAS/priv/guest-amd64/gdefs.h
===================================================================
--- branches/DCAS/priv/guest-amd64/gdefs.h 2009-05-21 21:44:38 UTC (rev 1894)
+++ branches/DCAS/priv/guest-amd64/gdefs.h 2009-05-21 21:51:20 UTC (rev 1895)
@@ -150,7 +150,8 @@
extern void amd64g_dirtyhelper_storeF80le ( ULong/*addr*/, ULong/*data*/ );
-extern void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* st );
+extern void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st );
+extern void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st );
extern void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* );
Modified: branches/DCAS/priv/guest-amd64/ghelpers.c
===================================================================
--- branches/DCAS/priv/guest-amd64/ghelpers.c 2009-05-21 21:44:38 UTC (rev 1894)
+++ branches/DCAS/priv/guest-amd64/ghelpers.c 2009-05-21 21:51:20 UTC (rev 1895)
@@ -1799,7 +1799,85 @@
/*--- Misc integer helpers, including rotates and CPUID. ---*/
/*---------------------------------------------------------------*/
-/* Claim to be the following CPU (2 x ...):
+/* Claim to be the following CPU, which is probably representative of
+ the lowliest (earliest) amd64 offerings. It can do neither sse3
+ nor cx16.
+
+ vendor_id : AuthenticAMD
+ cpu family : 15
+ model : 5
+ model name : AMD Opteron (tm) Processor 848
+ stepping : 10
+ cpu MHz : 1797.682
+ cache size : 1024 KB
+ fpu : yes
+ fpu_exception : yes
+ cpuid level : 1
+ wp : yes
+ flags : fpu vme de pse tsc msr pae mce cx8 apic sep
+ mtrr pge mca cmov pat pse36 clflush mmx fxsr
+ sse sse2 syscall nx mmxext lm 3dnowext 3dnow
+ bogomips : 3600.62
+ TLB size : 1088 4K pages
+ clflush size : 64
+ cache_alignment : 64
+ address sizes : 40 bits physical, 48 bits virtual
+ power management: ts fid vid ttp
+*/
+void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
+{
+# define SET_ABCD(_a,_b,_c,_d) \
+ do { st->guest_RAX = (ULong)(_a); \
+ st->guest_RBX = (ULong)(_b); \
+ st->guest_RCX = (ULong)(_c); \
+ st->guest_RDX = (ULong)(_d); \
+ } while (0)
+
+ switch (0xFFFFFFFF & st->guest_RAX) {
+ case 0x00000000:
+ SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
+ break;
+ case 0x00000001:
+ SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
+ break;
+ case 0x80000000:
+ SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
+ break;
+ case 0x80000001:
+ SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, 0xe1d3fbff);
+ break;
+ case 0x80000002:
+ SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
+ break;
+ case 0x80000003:
+ SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
+ break;
+ case 0x80000004:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000005:
+ SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
+ break;
+ case 0x80000006:
+ SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
+ break;
+ case 0x80000007:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
+ break;
+ case 0x80000008:
+ SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ default:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ }
+# undef SET_ABCD
+}
+
+
+/* Claim to be the following CPU (2 x ...), which is sse3 and cx16
+ capable.
+
vendor_id : GenuineIntel
cpu family : 6
model : 15
@@ -1826,7 +1904,7 @@
address sizes : 36 bits physical, 48 bits virtual
power management:
*/
-void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* st )
+void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
{
# define SET_ABCD(_a,_b,_c,_d) \
do { st->guest_RAX = (ULong)(_a); \
Modified: branches/DCAS/priv/guest-amd64/toIR.c
===================================================================
--- branches/DCAS/priv/guest-amd64/toIR.c 2009-05-21 21:44:38 UTC (rev 1894)
+++ branches/DCAS/priv/guest-amd64/toIR.c 2009-05-21 21:51:20 UTC (rev 1895)
@@ -44,53 +44,45 @@
without prior written permission.
*/
-/* LIMITATIONS:
+/* Translates AMD64 code to IR. */
- LOCK prefix handling is only safe in the situation where
- Vex-generated code is run single-threadedly. (This is not the same
- as saying that Valgrind can't safely use Vex to run multithreaded
- programs). See comment attached to LOCK prefix handling in
- disInstr for details.
-*/
-
/* TODO:
All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
to ensure a 64-bit value is being written.
-//.. x87 FP Limitations:
-//..
-//.. * all arithmetic done at 64 bits
-//..
-//.. * no FP exceptions, except for handling stack over/underflow
-//..
-//.. * FP rounding mode observed only for float->int conversions
-//.. and int->float conversions which could lose accuracy, and
-//.. for float-to-float rounding. For all other operations,
-//.. round-to-nearest is used, regardless.
-//..
-//.. * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the
-//.. simulation claims the argument is in-range (-2^63 <= arg <= 2^63)
-//.. even when it isn't.
-//..
-//.. * some of the FCOM cases could do with testing -- not convinced
-//.. that the args are the right way round.
-//..
-//.. * FSAVE does not re-initialise the FPU; it should do
-//..
-//.. * FINIT not only initialises the FPU environment, it also
-//.. zeroes all the FP registers. It should leave the registers
-//.. unchanged.
-//..
-//.. RDTSC returns zero, always.
-//..
-//.. SAHF should cause eflags[1] == 1, and in fact it produces 0. As
-//.. per Intel docs this bit has no meaning anyway. Since PUSHF is the
-//.. only way to observe eflags[1], a proper fix would be to make that
-//.. bit be set by PUSHF.
-//..
-//.. This module uses global variables and so is not MT-safe (if that
-//.. should ever become relevant).
+ x87 FP Limitations:
+
+ * all arithmetic done at 64 bits
+
+ * no FP exceptions, except for handling stack over/underflow
+
+ * FP rounding mode observed only for float->int conversions and
+ int->float conversions which could lose accuracy, and for
+ float-to-float rounding. For all other operations,
+ round-to-nearest is used, regardless.
+
+ * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the
+ simulation claims the argument is in-range (-2^63 <= arg <= 2^63)
+ even when it isn't.
+
+ * some of the FCOM cases could do with testing -- not convinced
+ that the args are the right way round.
+
+ * FSAVE does not re-initialise the FPU; it should do
+
+ * FINIT not only initialises the FPU environment, it also zeroes
+ all the FP registers. It should leave the registers unchanged.
+
+ RDTSC returns zero, always.
+
+ SAHF should cause eflags[1] == 1, and in fact it produces 0. As
+ per Intel docs this bit has no meaning anyway. Since PUSHF is the
+ only way to observe eflags[1], a proper fix would be to make that
+ bit be set by PUSHF.
+
+ This module uses global variables and so is not MT-safe (if that
+ should ever become relevant).
*/
/* Notes re address size overrides (0x67).
@@ -142,10 +134,41 @@
that the preamble will never occur except in specific code
fragments designed for Valgrind to catch.
- No prefixes may precede a "Special" instruction. */
+ No prefixes may precede a "Special" instruction.
+*/
-/* Translates AMD64 code to IR. */
+/* casLE (implementation of lock-prefixed insns) and rep-prefixed
+ insns: the side-exit back to the start of the insn is done with
+ Ijk_Boring. This is quite wrong, it should be done with
+ Ijk_NoRedir, since otherwise the side exit, which is intended to
+ restart the instruction for whatever reason, could go somewhere
+ entirely else. Doing it right (with Ijk_NoRedir jumps) would make
+ no-redir jumps performance critical, at least for rep-prefixed
+ instructions, since all iterations thereof would involve such a
+ jump. It's not such a big deal with casLE since the side exit is
+ only taken if the CAS fails, that is, the location is contended,
+ which is relatively unlikely.
+*/
+/* LOCK prefixed instructions. These are translated using IR-level
+ CAS statements (IRCAS) and are believed to preserve atomicity, even
+ from the point of view of some other process racing against a
+ simulated one (presumably they communicate via a shared memory
+ segment).
+
+ Handlers which are aware of LOCK prefixes are:
+ dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
+ dis_cmpxchg_G_E (cmpxchg)
+ dis_Grp1 (add, or, adc, sbb, and, sub, xor)
+ dis_Grp3 (not, neg)
+ dis_Grp4 (inc, dec)
+ dis_Grp5 (inc, dec)
+ dis_Grp8_Imm (bts, btc, btr)
+ dis_bt_G_E (bts, btc, btr)
+ dis_xadd_G_E (xadd)
+*/
+
+
#include "libvex_basictypes.h"
#include "libvex_ir.h"
#include "libvex.h"
@@ -1391,7 +1414,35 @@
unop(Iop_1Uto64,y)));
}
+/* Generate a compare-and-swap operation, operating on memory at
+ 'addr'. The expected value is 'expVal' and the new value is
+ 'newVal'. If the operation fails, then transfer control (with a
+ no-redir jump (XXX no -- see comment at top of this file)) to
+ 'restart_point', which is presumably the address of the guest
+ instruction again -- retrying, essentially. */
+static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
+ Addr64 restart_point )
+{
+ IRCAS* cas;
+ IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
+ IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
+ IRTemp oldTmp = newTemp(tyE);
+ IRTemp expTmp = newTemp(tyE);
+ vassert(tyE == tyN);
+ vassert(tyE == Ity_I64 || tyE == Ity_I32
+ || tyE == Ity_I16 || tyE == Ity_I8);
+ assign(expTmp, expVal);
+ cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
+ NULL, mkexpr(expTmp), NULL, newVal );
+ stmt( IRStmt_CAS(cas) );
+ stmt( IRStmt_Exit(
+ binop( mkSizedOp(tyE,Iop_CmpNE8), mkexpr(oldTmp), mkexpr(expTmp) ),
+ Ijk_Boring, /*Ijk_NoRedir*/
+ IRConst_U64( restart_point )
+ ));
+}
+
/*------------------------------------------------------------*/
/*--- Helpers for %rflags. ---*/
/*------------------------------------------------------------*/
@@ -1733,9 +1784,29 @@
/* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
appropriately.
+
+ Optionally, generate a store for the 'tres' value. This can either
+ be a normal store, or it can be a cas-with-possible-failure style
+ store:
+
+ if taddr is IRTemp_INVALID, then no store is generated.
+
+ if taddr is not IRTemp_INVALID, then a store (using taddr as
+ the address) is generated:
+
+ if texpVal is IRTemp_INVALID then a normal store is
+ generated, and restart_point must be zero (it is irrelevant).
+
+ if texpVal is not IRTemp_INVALID then a cas-style store is
+ generated. texpVal is the expected value, restart_point
+ is the restart point if the store fails, and texpVal must
+ have the same type as tres.
+
*/
static void helper_ADC ( Int sz,
- IRTemp tres, IRTemp ta1, IRTemp ta2 )
+ IRTemp tres, IRTemp ta1, IRTemp ta2,
+ /* info about optional store: */
+ IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
{
UInt thunkOp;
IRType ty = szToITy(sz);
@@ -1744,6 +1815,8 @@
IROp plus = mkSizedOp(ty, Iop_Add8);
IROp xor = mkSizedOp(ty, Iop_Xor8);
+ vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
+
switch (sz) {
case 8: thunkOp = AMD64G_CC_OP_ADCQ; break;
case 4: thunkOp = AMD64G_CC_OP_ADCL; break;
@@ -1763,6 +1836,20 @@
binop(plus,mkexpr(ta1),mkexpr(ta2)),
mkexpr(oldcn)) );
+ /* Possibly generate a store of 'tres' to 'taddr'. See comment at
+ start of this function. */
+ if (taddr != IRTemp_INVALID) {
+ if (texpVal == IRTemp_INVALID) {
+ vassert(restart_point == 0);
+ storeLE( mkexpr(taddr), mkexpr(tres) );
+ } else {
+ vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
+ /* .. and hence 'texpVal' has the same type as 'tres'. */
+ casLE( mkexpr(taddr),
+ mkexpr(texpVal), mkexpr(tres), restart_point );
+ }
+ }
+
stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
@@ -1772,10 +1859,13 @@
/* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
- appropriately.
+ appropriately. As with helper_ADC, possibly generate a store of
+ the result -- see comments on helper_ADC for details.
*/
static void helper_SBB ( Int sz,
- IRTemp tres, IRTemp ta1, IRTemp ta2 )
+ IRTemp tres, IRTemp ta1, IRTemp ta2,
+ /* info about optional store: */
+ IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
{
UInt thunkOp;
IRType ty = szToITy(sz);
@@ -1784,6 +1874,8 @@
IROp minus = mkSizedOp(ty, Iop_Sub8);
IROp xor = mkSizedOp(ty, Iop_Xor8);
+ vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
+
switch (sz) {
case 8: thunkOp = AMD64G_CC_OP_SBBQ; break;
case 4: thunkOp = AMD64G_CC_OP_SBBL; break;
@@ -1803,6 +1895,20 @@
binop(minus,mkexpr(ta1),mkexpr(ta2)),
mkexpr(oldcn)) );
+ /* Possibly generate a store of 'tres' to 'taddr'. See comment at
+ start of this function. */
+ if (taddr != IRTemp_INVALID) {
+ if (texpVal == IRTemp_INVALID) {
+ vassert(restart_point == 0);
+ storeLE( mkexpr(taddr), mkexpr(tres) );
+ } else {
+ vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
+ /* .. and hence 'texpVal' has the same type as 'tres'. */
+ casLE( mkexpr(taddr),
+ mkexpr(texpVal), mkexpr(tres), restart_point );
+ }
+ }
+
stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) );
stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
@@ -2541,11 +2647,13 @@
assign( src, getIRegE(size,pfx,rm) );
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIRegG(size, pfx, rm, mkexpr(dst1));
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIRegG(size, pfx, rm, mkexpr(dst1));
} else {
assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
@@ -2568,11 +2676,13 @@
assign( src, loadLE(szToITy(size), mkexpr(addr)) );
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIRegG(size, pfx, rm, mkexpr(dst1));
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIRegG(size, pfx, rm, mkexpr(dst1));
} else {
assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
@@ -2651,11 +2761,13 @@
assign(src, getIRegG(size,pfx,rm));
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIRegE(size, pfx, rm, mkexpr(dst1));
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIRegE(size, pfx, rm, mkexpr(dst1));
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
@@ -2680,20 +2792,43 @@
assign(src, getIRegG(size,pfx,rm));
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
- storeLE(mkexpr(addr), mkexpr(dst1));
+ if (pfx & PFX_LOCK) {
+ /* cas-style store */
+ helper_ADC( size, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_ADC( size, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
- storeLE(mkexpr(addr), mkexpr(dst1));
+ if (pfx & PFX_LOCK) {
+ /* cas-style store */
+ helper_SBB( size, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_SBB( size, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (keep) {
+ if (pfx & PFX_LOCK) {
+ if (0) vex_printf("locked case\n" );
+ casLE( mkexpr(addr),
+ mkexpr(dst0)/*expval*/,
+ mkexpr(dst1)/*newval*/, guest_RIP_curr_instr );
+ } else {
+ if (0) vex_printf("nonlocked case\n");
+ storeLE(mkexpr(addr), mkexpr(dst1));
+ }
+ }
if (isAddSub(op8))
setFlags_DEP1_DEP2(op8, dst0, src, ty);
else
setFlags_DEP1(op8, dst1, ty);
- if (keep)
- storeLE(mkexpr(addr), mkexpr(dst1));
}
DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
@@ -2825,11 +2960,13 @@
}
else
if (op8 == Iop_Add8 && carrying) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
}
else
if (op8 == Iop_Sub8 && carrying) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
}
else
vpanic("dis_op_imm_A(amd64,guest)");
@@ -2981,10 +3118,12 @@
assign(src, mkU(ty,d64 & mask));
if (gregLO3ofRM(modrm) == 2 /* ADC */) {
- helper_ADC( sz, dst1, dst0, src );
+ helper_ADC( sz, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
} else
if (gregLO3ofRM(modrm) == 3 /* SBB */) {
- helper_SBB( sz, dst1, dst0, src );
+ helper_SBB( sz, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
if (isAddSub(op8))
@@ -3007,21 +3146,43 @@
assign(src, mkU(ty,d64 & mask));
if (gregLO3ofRM(modrm) == 2 /* ADC */) {
- helper_ADC( sz, dst1, dst0, src );
+ if (pfx & PFX_LOCK) {
+ /* cas-style store */
+ helper_ADC( sz, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_ADC( sz, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else
if (gregLO3ofRM(modrm) == 3 /* SBB */) {
- helper_SBB( sz, dst1, dst0, src );
+ if (pfx & PFX_LOCK) {
+ /* cas-style store */
+ helper_SBB( sz, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_SBB( sz, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (gregLO3ofRM(modrm) < 7) {
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
+ mkexpr(dst1)/*newVal*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr), mkexpr(dst1));
+ }
+ }
if (isAddSub(op8))
setFlags_DEP1_DEP2(op8, dst0, src, ty);
else
setFlags_DEP1(op8, dst1, ty);
}
- if (gregLO3ofRM(modrm) < 7)
- storeLE(mkexpr(addr), mkexpr(dst1));
-
delta += (len+d_sz);
DIP("%s%c $%lld, %s\n",
nameGrp1(gregLO3ofRM(modrm)), nameISize(sz),
@@ -3370,17 +3531,6 @@
src_val, dis_buf);
}
- /* Copy relevant bit from t2 into the carry flag. */
- /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
- stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
- stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
- stmt( IRStmt_Put(
- OFFB_CC_DEP1,
- binop(Iop_And64,
- binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
- mkU64(1))
- ));
-
/* Compute the new value into t2m, if non-BT. */
switch (gregLO3ofRM(modrm)) {
case 4: /* BT */
@@ -3395,6 +3545,7 @@
assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) );
break;
default:
+ /*NOTREACHED*/ /*the previous switch guards this*/
vassert(0);
}
@@ -3403,10 +3554,31 @@
if (epartIsReg(modrm)) {
putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m)));
} else {
- storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(t_addr),
+ narrowTo(ty, mkexpr(t2))/*expd*/,
+ narrowTo(ty, mkexpr(t2m))/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
+ }
}
}
+ /* Copy relevant bit from t2 into the carry flag. */
+ /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ binop(Iop_And64,
+ binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
+ mkU64(1))
+ ));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+
return delta;
}
@@ -3601,7 +3773,14 @@
*decode_OK = False;
return delta;
case 2: /* NOT */
- storeLE( mkexpr(addr), unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
+ dst1 = newTemp(ty);
+ assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(dst1) );
+ }
DIP("not%c %s\n", nameISize(sz), dis_buf);
break;
case 3: /* NEG */
@@ -3612,8 +3791,13 @@
assign(src, mkexpr(t1));
assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
mkexpr(src)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(dst1) );
+ }
setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
- storeLE( mkexpr(addr), mkexpr(dst1) );
DIP("neg%c %s\n", nameISize(sz), dis_buf);
break;
case 4: /* MUL (unsigned widening) */
@@ -3680,12 +3864,22 @@
switch (gregLO3ofRM(modrm)) {
case 0: /* INC */
assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
- storeLE( mkexpr(addr), mkexpr(t2) );
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(t2) );
+ }
setFlags_INC_DEC( True, t2, ty );
break;
case 1: /* DEC */
assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
- storeLE( mkexpr(addr), mkexpr(t2) );
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(t2) );
+ }
setFlags_INC_DEC( False, t2, ty );
break;
default:
@@ -3779,15 +3973,25 @@
t2 = newTemp(ty);
assign(t2, binop(mkSizedOp(ty,Iop_Add8),
mkexpr(t1), mkU(ty,1)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr),mkexpr(t2));
+ }
setFlags_INC_DEC( True, t2, ty );
- storeLE(mkexpr(addr),mkexpr(t2));
break;
case 1: /* DEC */
t2 = newTemp(ty);
assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
mkexpr(t1), mkU(ty,1)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr),mkexpr(t2));
+ }
setFlags_INC_DEC( False, t2, ty );
- storeLE(mkexpr(addr),mkexpr(t2));
break;
case 2: /* call Ev */
/* Ignore any sz value and operate as if sz==8. */
@@ -6978,14 +7182,16 @@
UChar modrm;
Int len;
IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
- t_addr1, t_rsp, t_mask;
+ t_addr1, t_rsp, t_mask, t_new;
vassert(sz == 2 || sz == 4 || sz == 8);
t_fetched = t_bitno0 = t_bitno1 = t_bitno2
- = t_addr0 = t_addr1 = t_rsp = t_mask = IRTemp_INVALID;
+ = t_addr0 = t_addr1 = t_rsp
+ = t_mask = t_new = IRTemp_INVALID;
t_fetched = newTemp(Ity_I8);
+ t_new = newTemp(Ity_I8);
t_bitno0 = newTemp(Ity_I64);
t_bitno1 = newTemp(Ity_I64);
t_bitno2 = newTemp(Ity_I8);
@@ -7050,26 +7256,31 @@
if (op != BtOpNone) {
switch (op) {
- case BtOpSet:
- storeLE( mkexpr(t_addr1),
- binop(Iop_Or8, mkexpr(t_fetched),
- mkexpr(t_mask)) );
+ case BtOpSet:
+ assign( t_new,
+ binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
break;
- case BtOpComp:
- storeLE( mkexpr(t_addr1),
- binop(Iop_Xor8, mkexpr(t_fetched),
- mkexpr(t_mask)) );
+ case BtOpComp:
+ assign( t_new,
+ binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
break;
- case BtOpReset:
- storeLE( mkexpr(t_addr1),
- binop(Iop_And8, mkexpr(t_fetched),
- unop(Iop_Not8, mkexpr(t_mask))) );
+ case BtOpReset:
+ assign( t_new,
+ binop(Iop_And8, mkexpr(t_fetched),
+ unop(Iop_Not8, mkexpr(t_mask))) );
break;
default:
vpanic("dis_bt_G_E(amd64)");
}
+ if ((pfx & PFX_LOCK) && !epartIsReg(modrm)) {
+ casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
+ mkexpr(t_new)/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE( mkexpr(t_addr1), mkexpr(t_new) );
+ }
}
-
+
/* Side effect done; now get selected bit into Carry flag */
/* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
@@ -7088,11 +7299,11 @@
/* Move reg operand from stack back to reg */
if (epartIsReg(modrm)) {
- /* t_esp still points at it. */
+ /* t_rsp still points at it. */
/* only write the reg if actually modifying it; doing otherwise
zeroes the top half erroneously when doing btl due to
standard zero-extend rule */
- if (op != BtOpNone)
+ if (op != BtOpNone)
putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) );
putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(sz)) );
}
@@ -7321,226 +7532,80 @@
IRTemp addr = IRTemp_INVALID;
UChar rm = getUChar(delta0);
+ /* There are 3 cases to consider:
+
+ reg-reg: ignore any lock prefix, generate sequence based
+ on Mux0X
+
+ reg-mem, not locked: ignore any lock prefix, generate sequence
+ based on Mux0X
+
+ reg-mem, locked: use IRCAS
+ */
+
if (epartIsReg(rm)) {
+ /* case 1 */
*ok = False;
return delta0;
/* awaiting test case */
assign( dest, getIRegE(size, pfx, rm) );
delta0++;
+ assign( src, getIRegG(size, pfx, rm) );
+ assign( acc, getIRegRAX(size) );
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
+ assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIRegRAX(size, mkexpr(acc2));
+ putIRegE(size, pfx, rm, mkexpr(dest2));
DIP("cmpxchg%c %s,%s\n", nameISize(size),
nameIRegG(size,pfx,rm),
nameIRegE(size,pfx,rm) );
- } else {
+ }
+ else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) {
+ /* case 2 */
addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
assign( dest, loadLE(ty, mkexpr(addr)) );
delta0 += len;
+ assign( src, getIRegG(size, pfx, rm) );
+ assign( acc, getIRegRAX(size) );
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
+ assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIRegRAX(size, mkexpr(acc2));
+ storeLE( mkexpr(addr), mkexpr(dest2) );
DIP("cmpxchg%c %s,%s\n", nameISize(size),
nameIRegG(size,pfx,rm), dis_buf);
}
-
- assign( src, getIRegG(size, pfx, rm) );
- assign( acc, getIRegRAX(size) );
- setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
- assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
- assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
- assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
- putIRegRAX(size, mkexpr(acc2));
-
- if (epartIsReg(rm)) {
- putIRegE(size, pfx, rm, mkexpr(dest2));
- } else {
- storeLE( mkexpr(addr), mkexpr(dest2) );
+ else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) {
+ /* case 3 */
+ /* src is new value. acc is expected value. dest is old value.
+ Compute success from the output of the IRCAS, and steer the
+ new value for RAX accordingly: in case of success, RAX is
+ unchanged. */
+ addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
+ delta0 += len;
+ assign( src, getIRegG(size, pfx, rm) );
+ assign( acc, getIRegRAX(size) );
+ stmt( IRStmt_CAS(
+ mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
+ NULL, mkexpr(acc), NULL, mkexpr(src) )
+ ));
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIRegRAX(size, mkexpr(acc2));
+ DIP("lock cmpxchg%c %s,%s\n", nameISize(size),
+ nameIRegG(size,pfx,rm), dis_buf);
}
+ else vassert(0);
*ok = True;
return delta0;
}
-static
-ULong dis_cmpxchg8b ( /*OUT*/Bool* ok,
- VexAbiInfo* vbi,
- Prefix pfx,
- Int sz,
- Long delta0 )
-{
- HChar dis_buf[50];
- Int len;
- IRType ty = szToITy(sz);
- IRTemp eq = newTemp(Ity_I8);
- IRTemp olda = newTemp(ty);
- IRTemp oldb = newTemp(ty);
- IRTemp oldc = newTemp(ty);
- IRTemp oldd = newTemp(ty);
- IRTemp newa = newTemp(Ity_I64);
- IRTemp newd = newTemp(Ity_I64);
- IRTemp oldml = newTemp(ty);
- IRTemp oldmh = newTemp(ty);
- IRTemp newml = newTemp(ty);
- IRTemp newmh = newTemp(ty);
- IRTemp addr = IRTemp_INVALID;
- IRTemp oldrf = newTemp(Ity_I64);
- IRTemp newrf = newTemp(Ity_I64);
- UChar rm = getUChar(delta0);
- vassert(sz == 4 || sz == 8); /* guaranteed by caller */
-
- if (epartIsReg(rm)) {
- *ok = False;
- return delta0;
- }
-
- addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
- delta0 += len;
- DIP("cmpxchg%s %s\n", sz == 4 ? "8" : "16", dis_buf);
-
- if (sz == 4) {
- assign( olda, getIReg32( R_RAX ) );
- assign( oldb, getIReg32( R_RBX ) );
- assign( oldc, getIReg32( R_RCX ) );
- assign( oldd, getIReg32( R_RDX ) );
- assign( oldml, loadLE( Ity_I32, mkexpr(addr) ));
- assign( oldmh, loadLE( Ity_I32,
- binop(Iop_Add64,mkexpr(addr),mkU64(4)) ));
- assign(eq,
- unop(Iop_1Uto8,
- binop(Iop_CmpEQ32,
- binop(Iop_Or32,
- binop(Iop_Xor32,mkexpr(olda),mkexpr(oldml)),
- binop(Iop_Xor32,mkexpr(oldd),mkexpr(oldmh))),
- mkU32(0))));
- assign( newml, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldml),mkexpr(oldb)) );
- assign( newmh, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldmh),mkexpr(oldc)) );
- assign( newa, IRExpr_Mux0X(mkexpr(eq),
- unop(Iop_32Uto64,mkexpr(oldml)),
- getIRegRAX(8)) );
- assign( newd, IRExpr_Mux0X(mkexpr(eq),
- unop(Iop_32Uto64,mkexpr(oldmh)),
- getIRegRDX(8)) );
-
- storeLE( mkexpr(addr), mkexpr(newml) );
- storeLE( binop(Iop_Add64,mkexpr(addr),mkU64(4)),
- mkexpr(newmh) );
- putIRegRAX( 8, mkexpr(newa) );
- putIRegRDX( 8, mkexpr(newd) );
- } else {
- assign( olda, getIReg64( R_RAX ) );
- assign( oldb, getIReg64( R_RBX ) );
- assign( oldc, getIReg64( R_RCX ) );
- assign( oldd, getIReg64( R_RDX ) );
- assign( oldml, loadLE( Ity_I64, mkexpr(addr) ));
- assign( oldmh, loadLE( Ity_I64,
- binop(Iop_Add64,mkexpr(addr),mkU64(8)) ));
- assign(eq,
- unop(Iop_1Uto8,
- binop(Iop_CmpEQ64,
- binop(Iop_Or64,
- binop(Iop_Xor64,mkexpr(olda),mkexpr(oldml)),
- binop(Iop_Xor64,mkexpr(oldd),mkexpr(oldmh))),
- mkU64(0))));
- assign( newml, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldml),mkexpr(oldb)) );
- assign( newmh, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldmh),mkexpr(oldc)) );
- assign( newa, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldml),mkexpr(olda)) );
- assign( newd, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldmh),mkexpr(oldd)) );
-
- storeLE( mkexpr(addr), mkexpr(newml) );
- storeLE( binop(Iop_Add64,mkexpr(addr),mkU64(8)),
- mkexpr(newmh) );
- putIRegRAX( 8, mkexpr(newa) );
- putIRegRDX( 8, mkexpr(newd) );
- }
-
- /* And set the flags. Z is set if original d:a == mem, else
- cleared. All others unchanged. (This is different from normal
- cmpxchg which just sets them according to SUB.). */
- assign( oldrf, binop(Iop_And64,
- mk_amd64g_calculate_rflags_all(),
- mkU64(~AMD64G_CC_MASK_Z)) );
- assign( newrf,
- binop(Iop_Or64,
- mkexpr(oldrf),
- binop(Iop_Shl64,
- binop(Iop_And64, unop(Iop_8Uto64, mkexpr(eq)), mkU64(1)),
- mkU8(AMD64G_CC_SHIFT_Z))
- ));
- stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
- stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
- stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newrf) ));
- /* Set NDEP even though it isn't used. This makes redundant-PUT
- elimination of previous stores to this field work better. */
- stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
-
- *ok = True;
- return delta0;
-}
-
-//.. //-- static
-//.. //-- Addr dis_cmpxchg8b ( UCodeBlock* cb,
-//.. //-- UChar sorb,
-//.. //-- Addr eip0 )
-//.. //-- {
-//.. //-- Int tal, tah, junkl, junkh, destl, desth, srcl, srch, accl, acch;
-//.. //-- HChar dis_buf[50];
-//.. //-- UChar rm;
-//.. //-- UInt pair;
-//.. //--
-//.. //-- rm = getUChar(eip0);
-//.. //-- accl = newTemp(cb);
-//.. //-- acch = newTemp(cb);
-//.. //-- srcl = newTemp(cb);
-//.. //-- srch = newTemp(cb);
-//.. //-- destl = newTemp(cb);
-//.. //-- desth = newTemp(cb);
-//.. //-- junkl = newTemp(cb);
-//.. //-- junkh = newTemp(cb);
-//.. //--
-//.. //-- vg_assert(!epartIsReg(rm));
-//.. //--
-//.. //-- pair = disAMode ( cb, sorb, eip0, dis_buf );
-//.. //-- tal = LOW24(pair);
-//.. //-- tah = newTemp(cb);
-//.. //-- uInstr2(cb, MOV, 4, TempReg, tal, TempReg, tah);
-//.. //-- uInstr2(cb, ADD, 4, Literal, 0, TempReg, tah);
-//.. //-- uLiteral(cb, 4);
-//.. //-- eip0 += HI8(pair);
-//.. //-- DIP("cmpxchg8b %s\n", dis_buf);
-//.. //--
-//.. //-- uInstr0(cb, CALLM_S, 0);
-//.. //--
-//.. //-- uInstr2(cb, LOAD, 4, TempReg, tah, TempReg, desth);
-//.. //-- uInstr1(cb, PUSH, 4, TempReg, desth);
-//.. //-- uInstr2(cb, LOAD, 4, TempReg, tal, TempReg, destl);
-//.. //-- uInstr1(cb, PUSH, 4, TempReg, destl);
-//.. //-- uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, srch);
-//.. //-- uInstr1(cb, PUSH, 4, TempReg, srch);
-//.. //-- uInstr2(cb, GET, 4, ArchReg, R_EBX, TempReg, srcl);
-//.. //-- uInstr1(cb, PUSH, 4, TempReg, srcl);
-//.. //-- uInstr2(cb, GET, 4, ArchReg, R_EDX, TempReg, acch);
-//.. //-- uInstr1(cb, PUSH, 4, TempReg, acch);
-//.. //-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, accl);
-//.. //-- uInstr1(cb, PUSH, 4, TempReg, accl);
-//.. //--
-//.. //-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_cmpxchg8b));
-//.. //-- uFlagsRWU(cb, FlagsEmpty, FlagZ, FlagsEmpty);
-//.. //--
-//.. //-- uInstr1(cb, POP, 4, TempReg, accl);
-//.. //-- uInstr2(cb, PUT, 4, TempReg, accl, ArchReg, R_EAX);
-//.. //-- uInstr1(cb, POP, 4, TempReg, acch);
-//.. //-- uInstr2(cb, PUT, 4, TempReg, acch, ArchReg, R_EDX);
-//.. //-- uInstr1(cb, POP, 4, TempReg, srcl);
-//.. //-- uInstr2(cb, PUT, 4, TempReg, srcl, ArchReg, R_EBX);
-//.. //-- uInstr1(cb, POP, 4, TempReg, srch);
-//.. //-- uInstr2(cb, PUT, 4, TempReg, srch, ArchReg, R_ECX);
-//.. //-- uInstr1(cb, POP, 4, TempReg, destl);
-//.. //-- uInstr2(cb, STORE, 4, TempReg, destl, TempReg, tal);
-//.. //-- uInstr1(cb, POP, 4, TempReg, desth);
-//.. //-- uInstr2(cb, STORE, 4, TempReg, desth, TempReg, tah);
-//.. //--
-//.. //-- uInstr0(cb, CALLM_E, 0);
-//.. //--
-//.. //-- return eip0;
-//.. //-- }
-
-
/* Handle conditional move instructions of the form
cmovcc E(reg-or-mem), G(reg)
@@ -7623,23 +7688,56 @@
IRTemp tmpd = newTemp(ty);
IRTemp tmpt0 = newTemp(ty);
IRTemp tmpt1 = newTemp(ty);
- *decode_ok = True;
+ /* There are 3 cases to consider:
+
+ reg-reg: currently unhandled
+
+ reg-mem, not locked: ignore any lock prefix, generate 'naive'
+ (non-atomic) sequence
+
+ reg-mem, locked: use IRCAS
+ */
+
if (epartIsReg(rm)) {
+ /* case 1 */
*decode_ok = False;
return delta0;
- } else {
+ /* Currently we don't handle xadd_G_E with register operand. */
+ }
+ else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) {
+ /* case 2 */
IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
assign( tmpd, loadLE(ty, mkexpr(addr)) );
assign( tmpt0, getIRegG(sz, pfx, rm) );
- assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), mkexpr(tmpd), mkexpr(tmpt0)) );
+ assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(tmpd), mkexpr(tmpt0)) );
setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
storeLE( mkexpr(addr), mkexpr(tmpt1) );
putIRegG(sz, pfx, rm, mkexpr(tmpd));
DIP("xadd%c %s, %s\n",
nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
+ *decode_ok = True;
return len+delta0;
}
+ else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) {
+ /* case 3 */
+ IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
+ assign( tmpd, loadLE(ty, mkexpr(addr)) );
+ assign( tmpt0, getIRegG(sz, pfx, rm) );
+ assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(tmpd), mkexpr(tmpt0)) );
+ casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
+ mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr );
+ setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
+ putIRegG(sz, pfx, rm, mkexpr(tmpd));
+ DIP("xadd%c %s, %s\n",
+ nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
+ *decode_ok = True;
+ return len+delta0;
+ }
+ /*UNREACHED*/
+ vassert(0);
}
//.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
@@ -8557,16 +8655,15 @@
/* Helper for deciding whether a given insn (starting at the opcode
byte) may validly be used with a LOCK prefix. The following insns
may be used with LOCK when their destination operand is in memory.
- Note, this is slightly too permissive. Oh well. Note also, AFAICS
- this is exactly the same for both 32-bit and 64-bit mode.
+ AFAICS this is exactly the same for both 32-bit and 64-bit mode.
- ADD 80 /0, 81 /0, 83 /0, 00, 01, 02, 03
- OR 80 /1, 81 /1, 83 /1, 08, 09, 0A, 0B
- ADC 80 /2, 81 /2, 83 /2, 10, 11, 12, 13
- SBB 81 /3, 81 /3, 83 /3, 18, 19, 1A, 1B
- AND 80 /4, 81 /4, 83 /4, 20, 21, 22, 23
- SUB 80 /5, 81 /5, 83 /5, 28, 29, 2A, 2B
- XOR 80 /6, 81 /6, 83 /6, 30, 31, 32, 33
+ ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
+ OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
+ ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
+ SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
+ AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
+ SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
+ XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
DEC FE /1, FF /1
INC FE /0, FF /0
@@ -8574,7 +8671,7 @@
NEG F6 /3, F7 /3
NOT F6 /2, F7 /2
- XCHG 86, 87
+ XCHG 86, 87
BTC 0F BB, 0F BA /7
BTR 0F B3, 0F BA /6
@@ -8584,52 +8681,93 @@
CMPXCHG8B 0F C7 /1
XADD 0F C0, 0F C1
+
+ ------------------------------
+
+ 80 /0 = addb $imm8, rm8
+ 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
+ 82 /0 = addb $imm8, rm8
+ 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
+
+ 00 = addb r8, rm8
+ 01 = addl r32, rm32 and addw r16, rm16
+
+ Same for ADD OR ADC SBB AND SUB XOR
+
+ FE /1 = dec rm8
+ FF /1 = dec rm32 and dec rm16
+
+ FE /0 = inc rm8
+ FF /0 = inc rm32 and inc rm16
+
+ F6 /3 = neg rm8
+ F7 /3 = neg rm32 and neg rm16
+
+ F6 /2 = not rm8
+ F7 /2 = not rm32 and not rm16
+
+ 0F BB = btcw r16, rm16 and btcl r32, rm32
+ OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
+
+ Same for BTS, BTR
*/
static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
{
switch (opc[0]) {
- case 0x00: case 0x01: case 0x02: case 0x03: return True;
- case 0x08: case 0x09: case 0x0A: case 0x0B: return True;
- case 0x10: case 0x11: case 0x12: case 0x13: return True;
- case 0x18: case 0x19: case 0x1A: case 0x1B: return True;
- case 0x20: case 0x21: case 0x22: case 0x23: return True;
- case 0x28: case 0x29: case 0x2A: case 0x2B: return True;
- case 0x30: case 0x31: case 0x32: case 0x33: return True;
+ case 0x00: case 0x01: case 0x08: case 0x09:
+ case 0x10: case 0x11: case 0x18: case 0x19:
+ case 0x20: case 0x21: case 0x28: case 0x29:
+ case 0x30: case 0x31:
+ if (!epartIsReg(opc[1]))
+ return True;
+ break;
- case 0x80: case 0x81: case 0x83:
- if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6)
+ case 0x80: case 0x81: case 0x82: case 0x83:
+ if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6
+ && !epartIsReg(opc[1]))
return True;
break;
case 0xFE: case 0xFF:
- if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1)
+ if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1
+ && !epartIsReg(opc[1]))
return True;
break;
case 0xF6: case 0xF7:
- if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3)
+ if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3
+ && !epartIsReg(opc[1]))
return True;
break;
case 0x86: case 0x87:
- return True;
+ if (!epartIsReg(opc[1]))
+ return True;
+ break;
case 0x0F: {
switch (opc[1]) {
case 0xBB: case 0xB3: case 0xAB:
- return True;
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
case 0xBA:
- if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7)
+ if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7
+ && !epartIsReg(opc[2]))
return True;
break;
case 0xB0: case 0xB1:
- return True;
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
case 0xC7:
- if (gregLO3ofRM(opc[2]) == 1)
+ if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
return True;
break;
case 0xC0: case 0xC1:
- return True;
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
default:
break;
} /* switch (opc[1]) */
@@ -8653,6 +8791,7 @@
static
DisResult disInstr_AMD64_WRK (
+ /*OUT*/Bool* expect_CAS,
Bool put_IP,
Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
void* callback_opaque,
@@ -8694,6 +8833,8 @@
dres.len = 0;
dres.continueAt = 0;
+ *expect_CAS = False;
+
vassert(guest_RIP_next_assumed == 0);
vassert(guest_RIP_next_mustcheck == False);
@@ -8774,7 +8915,7 @@
case 0x67: pfx |= PFX_ASO; break;
case 0xF2: pfx |= PFX_F2; break;
case 0xF3: pfx |= PFX_F3; break;
- case 0xF0: pfx |= PFX_LOCK; break;
+ case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break;
case 0x2E: pfx |= PFX_CS; break;
case 0x3E: pfx |= PFX_DS; break;
case 0x26: pfx |= PFX_ES; break;
@@ -8828,41 +8969,17 @@
if (pfx & PFX_66) sz = 2;
if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8;
- /* Kludge re LOCK prefixes. We assume here that all code generated
- by Vex is going to be run in a single-threaded context, in other
- words that concurrent executions of Vex-generated translations
- will not happen. So we don't need to worry too much about
- preserving atomicity. However, mark the fact that the notional
- hardware bus lock is being acquired (and, after the insn,
- released), so that thread checking tools know this is a locked
- insn.
+ /* Now we should be looking at the primary opcode byte or the
+ leading F2 or F3. Check that any LOCK prefix is actually
+ allowed. */
- We check for, and immediately reject, (most) inappropriate uses
- of the LOCK prefix. Later (at decode_failure: and
- decode_success:), if we've added a BusLock event, then we will
- follow up with a BusUnlock event. How do we know execution will
- actually ever get to the BusUnlock event? Because
- can_be_used_with_LOCK_prefix rejects all control-flow changing
- instructions.
-
- One loophole, though: if a LOCK prefix insn (seg)faults, then
- the BusUnlock event will never be reached. This could cause
- tools which track bus hardware lock to lose track. Really, we
- should explicitly release the lock after every insn, but that's
- obviously way too expensive. Really, any tool which tracks the
- state of the bus lock needs to ask V's core/tool interface to
- notify it of signal deliveries. On delivery of SIGSEGV to the
- guest, the tool will be notified, in which case it should
- release the bus hardware lock if it is held.
-
- Note, guest-x86/toIR.c contains identical logic.
- */
if (pfx & PFX_LOCK) {
if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
stmt( IRStmt_MBE(Imbe_BusLock) );
unlock_bus_after_insn = True;
DIP("lock ");
} else {
+ *expect_CAS = False;
goto decode_failure;
}
}
@@ -14894,6 +15011,7 @@
nameISize(sz), nameIRegG(sz, pfx, modrm),
nameIRegE(sz, pfx, modrm));
} else {
+ *expect_CAS = True;
/* Need to add IRStmt_MBE(Imbe_BusLock). */
if (pfx & PFX_LOCK) {
/* check it's already been taken care of */
@@ -14909,7 +15027,8 @@
addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
assign( t1, loadLE(ty, mkexpr(addr)) );
assign( t2, getIRegG(sz, pfx, modrm) );
- storeLE( mkexpr(addr), mkexpr(t2) );
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
putIRegG( sz, pfx, modrm, mkexpr(t1) );
delta += alen;
DIP("xchg%c %s, %s\n", nameISize(sz),
@@ -15395,12 +15514,135 @@
if (!ok) goto decode_failure;
break;
}
+
case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */
- Bool ok = True;
+ IRType elemTy = sz==4 ? Ity_I32 : Ity_I64;
+ IRTemp expdHi = newTemp(elemTy);
+ IRTemp expdLo = newTemp(elemTy);
+ IRTemp dataHi = newTemp(elemTy);
+ IRTemp dataLo = newTemp(elemTy);
+ IRTemp oldHi = newTemp(elemTy);
+ IRTemp oldLo = newTemp(elemTy);
+ IRTemp flags_old = newTemp(Ity_I64);
+ IRTemp flags_new = newTemp(Ity_I64);
+ IRTemp success = newTemp(Ity_I1);
+ IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64;
+ IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64;
+ IROp opCmpEQ = sz==4 ? Iop_CmpEQ32 : Iop_CmpEQ64;
+ IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0);
+ IRTemp expdHi64 = newTemp(Ity_I64);
+ IRTemp expdLo64 = newTemp(Ity_I64);
+
+ /* Translate this using a DCAS, even if there is no LOCK
+ prefix. Life is too short to bother with generating two
+ different translations for the with/without-LOCK-prefix
+ cases. */
+ *expect_CAS = True;
+
+ /* Decode, and generate address. */
if (have66orF2orF3(pfx)) goto decode_failure;
if (sz != 4 && sz != 8) goto decode_failure;
- delta = dis_cmpxchg8b ( &ok, vbi, pfx, sz, delta );
- break;
+ if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16))
+ goto decode_failure;
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) goto decode_failure;
+ if (gregLO3ofRM(modrm) != 1) goto decode_failure;
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+
+ /* cmpxchg16b requires an alignment check. */
+ if (sz == 8)
+ gen_SEGV_if_not_16_aligned( addr );
+
+ /* Get the expected and new values. */
+ assign( expdHi64, getIReg64(R_RDX) );
+ assign( expdLo64, getIReg64(R_RAX) );
+
+ /* These are the correctly-sized expected and new values.
+ However, we also get expdHi64/expdLo64 above as 64-bits
+ regardless, because we will need them later in the 32-bit
+ case (paradoxically). */
+ assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64))
+ : mkexpr(expdHi64) );
+ assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64))
+ : mkexpr(expdLo64) );
+ assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) );
+ assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) );
+
+ /* Do the DCAS */
+ stmt( IRStmt_CAS(
+ mkIRCAS( oldHi, oldLo,
+ Iend_LE, mkexpr(addr),
+ mkexpr(expdHi), mkexpr(expdLo),
+ mkexpr(dataHi), mkexpr(dataLo)
+ )));
+
+ /* success when oldHi:oldLo == expdHi:expdLo */
+ assign( success,
+ binop(opCmpEQ,
+ binop(opOR,
+ binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)),
+ binop(opXOR, mkexpr(oldLo), mkexpr(expdLo))
+ ),
+ zero
+ ));
+
+ /* If the DCAS is successful, that is to say oldHi:oldLo ==
+ expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
+ which is where they came from originally. Both the actual
+ contents of these two regs, and any shadow values, are
+ unchanged. If the DCAS fails then we're putting into
+ RDX:RAX the value seen in memory. */
+ /* Now of course there's a complication in the 32-bit case
+ (bah!): if the DCAS succeeds, we need to leave RDX:RAX
+ unchanged; but if we use the same scheme as in the 64-bit
+ case, we get hit by the standard rule that a write to the
+ bottom 32 bits of an integer register zeros the upper 32
+ bits. And so the upper halves of RDX and RAX mysteriously
+ become zero. So we have to stuff back in the original
+ 64-bit values which we previously stashed in
+ expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
+ /* It's just _so_ much fun ... */
+ putIRegRDX( 8,
+ IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
+ sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi))
+ : mkexpr(oldHi),
+ mkexpr(expdHi64)
+ ));
+ putIRegRAX( 8,
+ IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
+ sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo))
+ : mkexpr(oldLo),
+ mkexpr(expdLo64)
+ ));
+
+ /* Copy the success bit into the Z flag and leave the others
+ unchanged */
+ assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all()));
+ assign(
+ flags_new,
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(flags_old),
+ mkU64(~AMD64G_CC_MASK_Z)),
+ binop(Iop_Shl64,
+ binop(Iop_And64,
+ unop(Iop_1Uto64, mkexpr(success)), mkU64(1)),
+ mkU8(AMD64G_CC_SHIFT_Z)) ));
+
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ /* Set NDEP even though it isn't used. This makes
+ redundant-PUT elimination of previous stores to this field
+ work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+
+ /* Sheesh. Aren't you glad it was me and not you that had to
+ write and validate all this grunge? */
+
+ DIP("cmpxchg8b %s\n", dis_buf);
+ break;
+
}
/* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
@@ -15414,12 +15656,18 @@
HChar* fName = NULL;
void* fAddr = NULL;
if (haveF2orF3(pfx)) goto decode_failure;
- if (archinfo->hwcaps == 0/*baseline, == SSE2*/) {
- fName = "amd64g_dirtyhelper_CPUID";
- fAddr = &amd64g_dirtyhelper_CPUID;
+ if (archinfo->hwcaps == (VEX_HWCAPS_AMD64_SSE3
+ |VEX_HWCAPS_AMD64_CX16)) {
+ fName = "amd64g_dirtyhelper_CPUID_sse3_and_cx16";
+ fAddr = &amd64g_dirtyhelper_CPUID_sse3_and_cx16;
+ /* This is a Core-2-like machine */
}
- else
- vpanic("disInstr(amd64)(cpuid)");
+ else {
+ /* Give a CPUID for at least a baseline machine, no SSE2
+ and no CX16 */
+ fName = "amd64g_dirtyhelper_CPUID_baseline";
+ fAddr = &amd64g_dirtyhelper_CPUID_baseline;
+ }
vassert(fName); vassert(fAddr);
d = unsafeIRDirty_0_N ( 0/*regparms*/,
@@ -15818,6 +16066,12 @@
jmp_lit(Ijk_NoDecode, guest_RIP_curr_instr);
dres.whatNext = Dis_StopHere;
dres.len = 0;
+ /* We also need to say that a CAS is not expected now, regardless
+ of what it might have been set to at the start of the fu...
[truncated message content] |
|
From: <sv...@va...> - 2009-05-21 21:44:44
|
Author: sewardj
Date: 2009-05-21 22:44:38 +0100 (Thu, 21 May 2009)
New Revision: 1894
Log:
Add a new capabilities bit for AMD64, indicating whether or not
cmpxchg16b is support. Up till now we've been able to get away with
ignoring the question of whether the host can do cmpxchg16b. But not
any more; with double-word IRCAS with 64-bit words, we'll have to
generate a real cmpxchg16b insn at the back end. So we can't allow
cmpxchg16b in the front end in this case.
Modified:
branches/DCAS/pub/libvex.h
Modified: branches/DCAS/pub/libvex.h
===================================================================
--- branches/DCAS/pub/libvex.h 2009-05-21 21:40:21 UTC (rev 1893)
+++ branches/DCAS/pub/libvex.h 2009-05-21 21:44:38 UTC (rev 1894)
@@ -78,25 +78,28 @@
but not SSE1). LibVEX_Translate will check for nonsensical
combinations. */
-/* x86: baseline capability is Pentium-1 (FPU, MMX, but no SSE) */
+/* x86: baseline capability is Pentium-1 (FPU, MMX, but no SSE), with
+ cmpxchg8b. */
#define VEX_HWCAPS_X86_SSE1 (1<<1) /* SSE1 support (Pentium III) */
#define VEX_HWCAPS_X86_SSE2 (1<<2) /* SSE2 support (Pentium 4) */
#define VEX_HWCAPS_X86_SSE3 (1<<3) /* SSE3 support (>= Prescott) */
-/* amd64: baseline capability is SSE2 */
+/* amd64: baseline capability is SSE2, with cmpxchg8b but not
+ cmpxchg16b. */
#define VEX_HWCAPS_AMD64_SSE3 (1<<4) /* SSE3 support */
+#define VEX_HWCAPS_AMD64_CX16 (1<<5) /* cmpxchg16b support */
/* ppc32: baseline capability is integer only */
-#define VEX_HWCAPS_PPC32_F (1<<5) /* basic (non-optional) FP */
-#define VEX_HWCAPS_PPC32_V (1<<6) /* Altivec (VMX) */
-#define VEX_HWCAPS_PPC32_FX (1<<7) /* FP extns (fsqrt, fsqrts) */
-#define VEX_HWCAPS_PPC32_GX (1<<8) /* Graphics extns
+#define VEX_HWCAPS_PPC32_F (1<<6) /* basic (non-optional) FP */
+#define VEX_HWCAPS_PPC32_V (1<<7) /* Altivec (VMX) */
+#define VEX_HWCAPS_PPC32_FX (1<<8) /* FP extns (fsqrt, fsqrts) */
+#define VEX_HWCAPS_PPC32_GX (1<<9) /* Graphics extns
(fres,frsqrte,fsel,stfiwx) */
/* ppc64: baseline capability is integer and basic FP insns */
-#define VEX_HWCAPS_PPC64_V (1<<9) /* Altivec (VMX) */
-#define VEX_HWCAPS_PPC64_FX (1<<10) /* FP extns (fsqrt, fsqrts) */
-#define VEX_HWCAPS_PPC64_GX (1<<11) /* Graphics extns
+#define VEX_HWCAPS_PPC64_V (1<<10) /* Altivec (VMX) */
+#define VEX_HWCAPS_PPC64_FX (1<<11) /* FP extns (fsqrt, fsqrts) */
+#define VEX_HWCAPS_PPC64_GX (1<<12) /* Graphics extns
(fres,frsqrte,fsel,stfiwx) */
/* arm: baseline capability is ARMv4 */
|
|
From: <sv...@va...> - 2009-05-21 21:40:29
|
Author: sewardj
Date: 2009-05-21 22:40:21 +0100 (Thu, 21 May 2009)
New Revision: 1893
Log:
Handle IRStmt_IRCAS in the amd64 back end.
Modified:
branches/DCAS/priv/host-amd64/hdefs.c
branches/DCAS/priv/host-amd64/hdefs.h
branches/DCAS/priv/host-amd64/isel.c
Modified: branches/DCAS/priv/host-amd64/hdefs.c
===================================================================
--- branches/DCAS/priv/host-amd64/hdefs.c 2009-05-21 21:37:23 UTC (rev 1892)
+++ branches/DCAS/priv/host-amd64/hdefs.c 2009-05-21 21:40:21 UTC (rev 1893)
@@ -790,12 +790,28 @@
i->Ain.Bsfr64.dst = dst;
return i;
}
-AMD64Instr* AMD64Instr_MFence ( void )
-{
+AMD64Instr* AMD64Instr_MFence ( void ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
i->tag = Ain_MFence;
return i;
}
+AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_ACAS;
+ i->Ain.ACAS.addr = addr;
+ i->Ain.ACAS.sz = sz;
+ vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
+ return i;
+}
+AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_DACAS;
+ i->Ain.DACAS.addr = addr;
+ i->Ain.DACAS.sz = sz;
+ vassert(sz == 8 || sz == 4);
+ return i;
+}
+
AMD64Instr* AMD64Instr_A87Free ( Int nregs )
{
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
@@ -1174,6 +1190,18 @@
case Ain_MFence:
vex_printf("mfence" );
return;
+ case Ain_ACAS:
+ vex_printf("lock cmpxchg%c ",
+ i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
+ : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
+ vex_printf("{%%rax->%%rbx},");
+ ppAMD64AMode(i->Ain.ACAS.addr);
+ return;
+ case Ain_DACAS:
+ vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
+ (Int)(2 * i->Ain.DACAS.sz));
+ ppAMD64AMode(i->Ain.DACAS.addr);
+ return;
case Ain_A87Free:
vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
break;
@@ -1511,6 +1539,18 @@
return;
case Ain_MFence:
return;
+ case Ain_ACAS:
+ addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
+ addHRegUse(u, HRmRead, hregAMD64_RBX());
+ addHRegUse(u, HRmModify, hregAMD64_RAX());
+ return;
+ case Ain_DACAS:
+ addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
+ addHRegUse(u, HRmRead, hregAMD64_RCX());
+ addHRegUse(u, HRmRead, hregAMD64_RBX());
+ addHRegUse(u, HRmModify, hregAMD64_RDX());
+ addHRegUse(u, HRmModify, hregAMD64_RAX());
+ return;
case Ain_A87Free:
return;
case Ain_A87PushPop:
@@ -1729,6 +1769,12 @@
return;
case Ain_MFence:
return;
+ case Ain_ACAS:
+ mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
+ return;
+ case Ain_DACAS:
+ mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
+ return;
case Ain_A87Free:
return;
case Ain_A87PushPop:
@@ -2848,6 +2894,40 @@
*p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
goto done;
+ case Ain_ACAS:
+ /* lock */
+ *p++ = 0xF0;
+ if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
+ /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
+ in %rbx. The new-value register is hardwired to be %rbx
+ since dealing with byte integer registers is too much hassle,
+ so we force the register operand to %rbx (could equally be
+ %rcx or %rdx). */
+ rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
+ if (i->Ain.ACAS.sz != 8)
+ rex = clearWBit(rex);
+
+ *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
+ *p++ = 0x0F;
+ if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
+ p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
+ goto done;
+
+ case Ain_DACAS:
+ /* lock */
+ *p++ = 0xF0;
+ /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
+ value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
+ aren't encoded in the insn. */
+ rex = rexAMode_M( fake(1), i->Ain.ACAS.addr );
+ if (i->Ain.ACAS.sz != 8)
+ rex = clearWBit(rex);
+ *p++ = rex;
+ *p++ = 0x0F;
+ *p++ = 0xC7;
+ p = doAMode_M(p, fake(1), i->Ain.DACAS.addr);
+ goto done;
+
case Ain_A87Free:
vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
for (j = 0; j < i->Ain.A87Free.nregs; j++) {
Modified: branches/DCAS/priv/host-amd64/hdefs.h
===================================================================
--- branches/DCAS/priv/host-amd64/hdefs.h 2009-05-21 21:37:23 UTC (rev 1892)
+++ branches/DCAS/priv/host-amd64/hdefs.h 2009-05-21 21:40:21 UTC (rev 1893)
@@ -383,6 +383,10 @@
Ain_Set64, /* convert condition code to 64-bit value */
Ain_Bsfr64, /* 64-bit bsf/bsr */
Ain_MFence, /* mem fence */
+ Ain_ACAS, /* 8/16/32/64-bit lock;cmpxchg */
+ Ain_DACAS, /* lock;cmpxchg8b/16b (doubleword ACAS, 2 x
+ 32-bit or 2 x 64-bit only) */
+
Ain_A87Free, /* free up x87 registers */
Ain_A87PushPop, /* x87 loads/stores */
Ain_A87FpOp, /* x87 operations */
@@ -534,6 +538,14 @@
On AMD64 we emit a real "mfence". */
struct {
} MFence;
+ struct {
+ AMD64AMode* addr;
+ UChar sz; /* 1, 2, 4 or 8 */
+ } ACAS;
+ struct {
+ AMD64AMode* addr;
+ UChar sz; /* 4 or 8 only */
+ } DACAS;
/* --- X87 --- */
@@ -689,6 +701,9 @@
extern AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst );
extern AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_MFence ( void );
+extern AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz );
+extern AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz );
+
extern AMD64Instr* AMD64Instr_A87Free ( Int nregs );
extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush );
extern AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op );
Modified: branches/DCAS/priv/host-amd64/isel.c
===================================================================
--- branches/DCAS/priv/host-amd64/isel.c 2009-05-21 21:37:23 UTC (rev 1892)
+++ branches/DCAS/priv/host-amd64/isel.c 2009-05-21 21:40:21 UTC (rev 1893)
@@ -145,7 +145,6 @@
Int vreg_ctr;
- /* Currently (27 Jan 06) unused */
UInt hwcaps;
}
ISelEnv;
@@ -3822,6 +3821,81 @@
}
break;
+ /* --------- ACAS --------- */
+ case Ist_CAS:
+ if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
+ /* "normal" singleton CAS */
+ UChar sz;
+ IRCAS* cas = stmt->Ist.CAS.details;
+ IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
+ /* get: cas->expd into %rax, and cas->data into %rbx */
+ AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
+ HReg rData = iselIntExpr_R(env, cas->dataLo);
+ HReg rExpd = iselIntExpr_R(env, cas->expdLo);
+ HReg rOld = lookupIRTemp(env, cas->oldLo);
+ vassert(cas->expdHi == NULL);
+ vassert(cas->dataHi == NULL);
+ addInstr(env, mk_iMOVsd_RR(rExpd, rOld));
+ addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX()));
+ addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX()));
+ switch (ty) {
+ case Ity_I64: sz = 8; break;
+ case Ity_I32: sz = 4; break;
+ case Ity_I16: sz = 2; break;
+ case Ity_I8: sz = 1; break;
+ default: goto unhandled_cas;
+ }
+ addInstr(env, AMD64Instr_ACAS(am, sz));
+ addInstr(env, AMD64Instr_CMov64(
+ Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOld));
+ return;
+ } else {
+ /* double CAS */
+ UChar sz;
+ IRCAS* cas = stmt->Ist.CAS.details;
+ IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
+ /* only 32-bit and 64-bit allowed in this case */
+ /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */
+ /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */
+ AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
+ HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
+ HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
+ HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
+ HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
+ HReg rOldHi = lookupIRTemp(env, cas->oldHi);
+ HReg rOldLo = lookupIRTemp(env, cas->oldLo);
+ switch (ty) {
+ case Ity_I64:
+ if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16))
+ goto unhandled_cas; /* we'd have to generate
+ cmpxchg16b, but the host
+ doesn't support that */
+ sz = 8;
+ break;
+ case Ity_I32:
+ sz = 4;
+ break;
+ default:
+ goto unhandled_cas;
+ }
+ addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
+ addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX()));
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX()));
+ addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX()));
+ addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX()));
+ addInstr(env, AMD64Instr_DACAS(am, sz));
+ addInstr(env,
+ AMD64Instr_CMov64(
+ Acc_NZ, AMD64RM_Reg(hregAMD64_RDX()), rOldHi));
+ addInstr(env,
+ AMD64Instr_CMov64(
+ Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOldLo));
+ return;
+ }
+ unhandled_cas:
+ break;
+
/* --------- INSTR MARK --------- */
/* Doesn't generate any executable code ... */
case Ist_IMark:
@@ -3893,7 +3967,8 @@
/* sanity ... */
vassert(arch_host == VexArchAMD64);
- vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_AMD64_SSE3)));
+ vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_AMD64_SSE3
+ |VEX_HWCAPS_AMD64_CX16)));
/* Make up an initial environment to use. */
env = LibVEX_Alloc(sizeof(ISelEnv));
|
|
From: <sv...@va...> - 2009-05-21 21:37:35
|
Author: sewardj
Date: 2009-05-21 22:37:23 +0100 (Thu, 21 May 2009)
New Revision: 1892
Log:
Handle IRStmt_IRCAS in the x86 back end.
Modified:
branches/DCAS/priv/host-x86/hdefs.c
branches/DCAS/priv/host-x86/hdefs.h
branches/DCAS/priv/host-x86/isel.c
Modified: branches/DCAS/priv/host-x86/hdefs.c
===================================================================
--- branches/DCAS/priv/host-x86/hdefs.c 2009-05-21 21:31:49 UTC (rev 1891)
+++ branches/DCAS/priv/host-x86/hdefs.c 2009-05-21 21:37:23 UTC (rev 1892)
@@ -710,8 +710,7 @@
i->Xin.Bsfr32.dst = dst;
return i;
}
-X86Instr* X86Instr_MFence ( UInt hwcaps )
-{
+X86Instr* X86Instr_MFence ( UInt hwcaps ) {
X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
i->tag = Xin_MFence;
i->Xin.MFence.hwcaps = hwcaps;
@@ -719,6 +718,20 @@
|VEX_HWCAPS_X86_SSE3)));
return i;
}
+X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_ACAS;
+ i->Xin.ACAS.addr = addr;
+ i->Xin.ACAS.sz = sz;
+ vassert(sz == 4 || sz == 2 || sz == 1);
+ return i;
+}
+X86Instr* X86Instr_DACAS ( X86AMode* addr ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_DACAS;
+ i->Xin.DACAS.addr = addr;
+ return i;
+}
X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) {
X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
@@ -1002,6 +1015,17 @@
vex_printf("mfence(%s)",
LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps));
return;
+ case Xin_ACAS:
+ vex_printf("lock cmpxchg%c ",
+ i->Xin.ACAS.sz==1 ? 'b'
+ : i->Xin.ACAS.sz==2 ? 'w' : 'l');
+ vex_printf("{%%eax->%%ebx},");
+ ppX86AMode(i->Xin.ACAS.addr);
+ return;
+ case Xin_DACAS:
+ vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},");
+ ppX86AMode(i->Xin.DACAS.addr);
+ return;
case Xin_FpUnary:
vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op));
ppHRegX86(i->Xin.FpUnary.src);
@@ -1266,6 +1290,18 @@
return;
case Xin_MFence:
return;
+ case Xin_ACAS:
+ addRegUsage_X86AMode(u, i->Xin.ACAS.addr);
+ addHRegUse(u, HRmRead, hregX86_EBX());
+ addHRegUse(u, HRmModify, hregX86_EAX());
+ return;
+ case Xin_DACAS:
+ addRegUsage_X86AMode(u, i->Xin.DACAS.addr);
+ addHRegUse(u, HRmRead, hregX86_ECX());
+ addHRegUse(u, HRmRead, hregX86_EBX());
+ addHRegUse(u, HRmModify, hregX86_EDX());
+ addHRegUse(u, HRmModify, hregX86_EAX());
+ return;
case Xin_FpUnary:
addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
@@ -1450,6 +1486,12 @@
return;
case Xin_MFence:
return;
+ case Xin_ACAS:
+ mapRegs_X86AMode(m, i->Xin.ACAS.addr);
+ return;
+ case Xin_DACAS:
+ mapRegs_X86AMode(m, i->Xin.DACAS.addr);
+ return;
case Xin_FpUnary:
mapReg(m, &i->Xin.FpUnary.src);
mapReg(m, &i->Xin.FpUnary.dst);
@@ -2495,6 +2537,35 @@
/*NOTREACHED*/
break;
+ case Xin_ACAS:
+ /* lock */
+ *p++ = 0xF0;
+ /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value
+ in %ebx. The new-value register is hardwired to be %ebx
+ since letting it be any integer register gives the problem
+ that %sil and %dil are unaddressible on x86 and hence we
+ would have to resort to the same kind of trickery as with
+ byte-sized Xin.Store, just below. Given that this isn't
+ performance critical, it is simpler just to force the
+ register operand to %ebx (could equally be %ecx or %edx).
+ (Although %ebx is more consistent with cmpxchg8b.) */
+ if (i->Xin.ACAS.sz == 2) *p++ = 0x66;
+ *p++ = 0x0F;
+ if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
+ p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr);
+ goto done;
+
+ case Xin_DACAS:
+ /* lock */
+ *p++ = 0xF0;
+ /* cmpxchg8b m64. Expected-value in %edx:%eax, new value
+ in %ecx:%ebx. All 4 regs are hardwired in the ISA, so
+ aren't encoded in the insn. */
+ *p++ = 0x0F;
+ *p++ = 0xC7;
+ p = doAMode_M(p, fake(1), i->Xin.DACAS.addr);
+ goto done;
+
case Xin_Store:
if (i->Xin.Store.sz == 2) {
/* This case, at least, is simple, given that we can
Modified: branches/DCAS/priv/host-x86/hdefs.h
===================================================================
--- branches/DCAS/priv/host-x86/hdefs.h 2009-05-21 21:31:49 UTC (rev 1891)
+++ branches/DCAS/priv/host-x86/hdefs.h 2009-05-21 21:37:23 UTC (rev 1892)
@@ -367,6 +367,8 @@
Xin_Set32, /* convert condition code to 32-bit value */
Xin_Bsfr32, /* 32-bit bsf/bsr */
Xin_MFence, /* mem fence (not just sse2, but sse0 and 1 too) */
+ Xin_ACAS, /* 8/16/32-bit lock;cmpxchg */
+ Xin_DACAS, /* lock;cmpxchg8b (doubleword ACAS, 2 x 32-bit only) */
Xin_FpUnary, /* FP fake unary op */
Xin_FpBinary, /* FP fake binary op */
@@ -502,6 +504,17 @@
struct {
UInt hwcaps;
} MFence;
+ /* "lock;cmpxchg": mem address in .addr,
+ expected value in %eax, new value in %ebx */
+ struct {
+ X86AMode* addr;
+ UChar sz; /* 1, 2 or 4 */
+ } ACAS;
+ /* "lock;cmpxchg8b": mem address in .addr, expected value in
+ %edx:%eax, new value in %ecx:%ebx */
+ struct {
+ X86AMode* addr;
+ } DACAS;
/* X86 Floating point (fake 3-operand, "flat reg file" insns) */
struct {
@@ -638,6 +651,8 @@
extern X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst );
extern X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst );
extern X86Instr* X86Instr_MFence ( UInt hwcaps );
+extern X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz );
+extern X86Instr* X86Instr_DACAS ( X86AMode* addr );
extern X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst );
extern X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst );
Modified: branches/DCAS/priv/host-x86/isel.c
===================================================================
--- branches/DCAS/priv/host-x86/isel.c 2009-05-21 21:31:49 UTC (rev 1891)
+++ branches/DCAS/priv/host-x86/isel.c 2009-05-21 21:37:23 UTC (rev 1892)
@@ -3858,6 +3858,68 @@
}
break;
+ /* --------- ACAS --------- */
+ case Ist_CAS:
+ if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
+ /* "normal" singleton CAS */
+ UChar sz;
+ IRCAS* cas = stmt->Ist.CAS.details;
+ IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
+ /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
+ X86AMode* am = iselIntExpr_AMode(env, cas->addr);
+ HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
+ HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
+ HReg rOldLo = lookupIRTemp(env, cas->oldLo);
+ vassert(cas->expdHi == NULL);
+ vassert(cas->dataHi == NULL);
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
+ addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
+ switch (ty) {
+ case Ity_I32: sz = 4; break;
+ case Ity_I16: sz = 2; break;
+ case Ity_I8: sz = 1; break;
+ default: goto unhandled_cas;
+ }
+ addInstr(env, X86Instr_ACAS(am, sz));
+ addInstr(env,
+ X86Instr_CMov32(Xcc_NZ,
+ X86RM_Reg(hregX86_EAX()), rOldLo));
+ return;
+ } else {
+ /* double CAS */
+ IRCAS* cas = stmt->Ist.CAS.details;
+ IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
+ /* only 32-bit allowed in this case */
+ /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
+ /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
+ X86AMode* am = iselIntExpr_AMode(env, cas->addr);
+ HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
+ HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
+ HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
+ HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
+ HReg rOldHi = lookupIRTemp(env, cas->oldHi);
+ HReg rOldLo = lookupIRTemp(env, cas->oldLo);
+ if (ty != Ity_I32)
+ goto unhandled_cas;
+ addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
+ addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
+ addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
+ addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
+ addInstr(env, X86Instr_DACAS(am));
+ addInstr(env,
+ X86Instr_CMov32(Xcc_NZ,
+ X86RM_Reg(hregX86_EDX()), rOldHi));
+ addInstr(env,
+ X86Instr_CMov32(Xcc_NZ,
+ X86RM_Reg(hregX86_EAX()), rOldLo));
+ return;
+ }
+ unhandled_cas:
+ break;
+
/* --------- INSTR MARK --------- */
/* Doesn't generate any executable code ... */
case Ist_IMark:
|
|
From: <sv...@va...> - 2009-05-21 21:31:56
|
Author: sewardj
Date: 2009-05-21 22:31:49 +0100 (Thu, 21 May 2009)
New Revision: 1891
Log:
Handle IRStmt_IRCAS in basic routines and in the optimiser pipeline.
Modified:
branches/DCAS/priv/ir/irdefs.c
branches/DCAS/priv/ir/iropt.c
Modified: branches/DCAS/priv/ir/irdefs.c
===================================================================
--- branches/DCAS/priv/ir/irdefs.c 2009-05-21 21:27:40 UTC (rev 1890)
+++ branches/DCAS/priv/ir/irdefs.c 2009-05-21 21:31:49 UTC (rev 1891)
@@ -723,6 +723,32 @@
vex_printf(")");
}
+void ppIRCAS ( IRCAS* cas )
+{
+ /* Print even structurally invalid constructions, as an aid to
+ debugging. */
+ if (cas->oldHi != IRTemp_INVALID) {
+ ppIRTemp(cas->oldHi);
+ vex_printf(",");
+ }
+ ppIRTemp(cas->oldLo);
+ vex_printf(" = CAS%s(", cas->end==Iend_LE ? "le" : "be" );
+ ppIRExpr(cas->addr);
+ vex_printf("::");
+ if (cas->expdHi) {
+ ppIRExpr(cas->expdHi);
+ vex_printf(",");
+ }
+ ppIRExpr(cas->expdLo);
+ vex_printf("->");
+ if (cas->dataHi) {
+ ppIRExpr(cas->dataHi);
+ vex_printf(",");
+ }
+ ppIRExpr(cas->dataLo);
+ vex_printf(")");
+}
+
void ppIRJumpKind ( IRJumpKind kind )
{
switch (kind) {
@@ -805,6 +831,9 @@
vex_printf( ") = ");
ppIRExpr(s->Ist.Store.data);
break;
+ case Ist_CAS:
+ ppIRCAS(s->Ist.CAS.details);
+ break;
case Ist_Dirty:
ppIRDirty(s->Ist.Dirty.details);
break;
@@ -1151,6 +1180,25 @@
}
+/* Constructors -- IRCAS */
+
+IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo,
+ IREndness end, IRExpr* addr,
+ IRExpr* expdHi, IRExpr* expdLo,
+ IRExpr* dataHi, IRExpr* dataLo ) {
+ IRCAS* cas = LibVEX_Alloc(sizeof(IRCAS));
+ cas->oldHi = oldHi;
+ cas->oldLo = oldLo;
+ cas->end = end;
+ cas->addr = addr;
+ cas->expdHi = expdHi;
+ cas->expdLo = expdLo;
+ cas->dataHi = dataHi;
+ cas->dataLo = dataLo;
+ return cas;
+}
+
+
/* Constructors -- IRStmt */
IRStmt* IRStmt_NoOp ( void )
@@ -1208,6 +1256,12 @@
vassert(end == Iend_LE || end == Iend_BE);
return s;
}
+IRStmt* IRStmt_CAS ( IRCAS* cas ) {
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_CAS;
+ s->Ist.CAS.details = cas;
+ return s;
+}
IRStmt* IRStmt_Dirty ( IRDirty* d )
{
IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
@@ -1389,6 +1443,16 @@
return d2;
}
+IRCAS* deepCopyIRCAS ( IRCAS* cas )
+{
+ return mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
+ deepCopyIRExpr(cas->addr),
+ deepCopyIRExpr(cas->expdHi),
+ deepCopyIRExpr(cas->expdLo),
+ deepCopyIRExpr(cas->dataHi),
+ deepCopyIRExpr(cas->dataLo) );
+}
+
IRStmt* deepCopyIRStmt ( IRStmt* s )
{
switch (s->tag) {
@@ -1415,6 +1479,8 @@
return IRStmt_Store(s->Ist.Store.end,
deepCopyIRExpr(s->Ist.Store.addr),
deepCopyIRExpr(s->Ist.Store.data));
+ case Ist_CAS:
+ return IRStmt_CAS(deepCopyIRCAS(s->Ist.CAS.details));
case Ist_Dirty:
return IRStmt_Dirty(deepCopyIRDirty(s->Ist.Dirty.details));
case Ist_MBE:
@@ -1996,6 +2062,7 @@
Int i;
IRExpr* e;
IRDirty* di;
+ IRCAS* cas;
switch (st->tag) {
case Ist_AbiHint:
@@ -2046,6 +2113,13 @@
case Ist_Store:
return toBool( isIRAtom(st->Ist.Store.addr)
&& isIRAtom(st->Ist.Store.data) );
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ return toBool( isIRAtom(cas->addr)
+ && (cas->expdHi ? isIRAtom(cas->expdHi) : True)
+ && isIRAtom(cas->expdLo)
+ && (cas->dataHi ? isIRAtom(cas->dataHi) : True)
+ && isIRAtom(cas->dataLo) );
case Ist_Dirty:
di = st->Ist.Dirty.details;
if (!isIRAtom(di->guard))
@@ -2205,6 +2279,7 @@
{
Int i;
IRDirty* d;
+ IRCAS* cas;
switch (stmt->tag) {
case Ist_IMark:
break;
@@ -2226,6 +2301,16 @@
useBeforeDef_Expr(bb,stmt,stmt->Ist.Store.addr,def_counts);
useBeforeDef_Expr(bb,stmt,stmt->Ist.Store.data,def_counts);
break;
+ case Ist_CAS:
+ cas = stmt->Ist.CAS.details;
+ useBeforeDef_Expr(bb,stmt,cas->addr,def_counts);
+ if (cas->expdHi)
+ useBeforeDef_Expr(bb,stmt,cas->expdHi,def_counts);
+ useBeforeDef_Expr(bb,stmt,cas->expdLo,def_counts);
+ if (cas->dataHi)
+ useBeforeDef_Expr(bb,stmt,cas->dataHi,def_counts);
+ useBeforeDef_Expr(bb,stmt,cas->dataLo,def_counts);
+ break;
case Ist_Dirty:
d = stmt->Ist.Dirty.details;
for (i = 0; d->args[i] != NULL; i++)
@@ -2452,6 +2537,8 @@
{
Int i;
IRDirty* d;
+ IRCAS* cas;
+ IRType tyExpd, tyData;
IRTypeEnv* tyenv = bb->tyenv;
switch (stmt->tag) {
case Ist_IMark:
@@ -2502,6 +2589,56 @@
if (stmt->Ist.Store.end != Iend_LE && stmt->Ist.Store.end != Iend_BE)
sanityCheckFail(bb,stmt,"Ist.Store.end: bogus endianness");
break;
+ case Ist_CAS:
+ cas = stmt->Ist.CAS.details;
+ /* make sure it's definitely either a CAS or a DCAS */
+ if (cas->oldHi == IRTemp_INVALID
+ && cas->expdHi == NULL && cas->dataHi == NULL) {
+ /* fine; it's a single cas */
+ }
+ else
+ if (cas->oldHi != IRTemp_INVALID
+ && cas->expdHi != NULL && cas->dataHi != NULL) {
+ /* fine; it's a double cas */
+ }
+ else {
+ /* it's some el-mutanto hybrid */
+ goto bad_cas;
+ }
+ /* check the address type */
+ tcExpr( bb, stmt, cas->addr, gWordTy );
+ if (typeOfIRExpr(tyenv, cas->addr) != gWordTy) goto bad_cas;
+ /* check types on the {old,expd,data}Lo components agree */
+ tyExpd = typeOfIRExpr(tyenv, cas->expdLo);
+ tyData = typeOfIRExpr(tyenv, cas->dataLo);
+ if (tyExpd != tyData) goto bad_cas;
+ if (tyExpd != typeOfIRTemp(tyenv, cas->oldLo))
+ goto bad_cas;
+ /* check the base element type is sane */
+ if (tyExpd == Ity_I8 || tyExpd == Ity_I16 || tyExpd == Ity_I32
+ || (gWordTy == Ity_I64 && tyExpd == Ity_I64)) {
+ /* fine */
+ } else {
+ goto bad_cas;
+ }
+ /* If it's a DCAS, check types on the {old,expd,data}Hi
+ components too */
+ if (cas->oldHi != IRTemp_INVALID) {
+ tyExpd = typeOfIRExpr(tyenv, cas->expdHi);
+ tyData = typeOfIRExpr(tyenv, cas->dataHi);
+ if (tyExpd != tyData) goto bad_cas;
+ if (tyExpd != typeOfIRTemp(tyenv, cas->oldHi))
+ goto bad_cas;
+ /* and finally check that oldLo and oldHi have the same
+ type. This forces equivalence amongst all 6 types. */
+ if (typeOfIRTemp(tyenv, cas->oldHi)
+ != typeOfIRTemp(tyenv, cas->oldLo))
+ goto bad_cas;
+ }
+ break;
+ bad_cas:
+ sanityCheckFail(bb,stmt,"IRStmt.CAS: ill-formed");
+ break;
case Ist_Dirty:
/* Mostly check for various kinds of ill-formed dirty calls. */
d = stmt->Ist.Dirty.details;
@@ -2540,6 +2677,7 @@
break;
bad_dirty:
sanityCheckFail(bb,stmt,"IRStmt.Dirty: ill-formed");
+ break;
case Ist_NoOp:
break;
case Ist_MBE:
@@ -2614,10 +2752,15 @@
def_counts[i] = 0;
for (i = 0; i < bb->stmts_used; i++) {
+ IRDirty* d;
+ IRCAS* cas;
stmt = bb->stmts[i];
+ /* Check any temps used by this statement. */
useBeforeDef_Stmt(bb,stmt,def_counts);
- if (stmt->tag == Ist_WrTmp) {
+ /* Now make note of any temps defd by this statement. */
+ switch (stmt->tag) {
+ case Ist_WrTmp:
if (stmt->Ist.WrTmp.tmp < 0 || stmt->Ist.WrTmp.tmp >= n_temps)
sanityCheckFail(bb, stmt,
"IRStmt.Tmp: destination tmp is out of range");
@@ -2625,19 +2768,43 @@
if (def_counts[stmt->Ist.WrTmp.tmp] > 1)
sanityCheckFail(bb, stmt,
"IRStmt.Tmp: destination tmp is assigned more than once");
+ break;
+ case Ist_Dirty:
+ if (stmt->Ist.Dirty.details->tmp != IRTemp_INVALID) {
+ d = stmt->Ist.Dirty.details;
+ if (d->tmp < 0 || d->tmp >= n_temps)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.Dirty: destination tmp is out of range");
+ def_counts[d->tmp]++;
+ if (def_counts[d->tmp] > 1)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.Dirty: destination tmp is assigned more than once");
+ }
+ break;
+ case Ist_CAS:
+ cas = stmt->Ist.CAS.details;
+
+ if (cas->oldHi != IRTemp_INVALID) {
+ if (cas->oldHi < 0 || cas->oldHi >= n_temps)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.CAS: destination tmpHi is out of range");
+ def_counts[cas->oldHi]++;
+ if (def_counts[cas->oldHi] > 1)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.CAS: destination tmpHi is assigned more than once");
+ }
+ if (cas->oldLo < 0 || cas->oldLo >= n_temps)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.CAS: destination tmpLo is out of range");
+ def_counts[cas->oldLo]++;
+ if (def_counts[cas->oldLo] > 1)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.CAS: destination tmpLo is assigned more than once");
+ break;
+ default:
+ /* explicitly handle the rest, so as to keep gcc quiet */
+ break;
}
- else
- if (stmt->tag == Ist_Dirty
- && stmt->Ist.Dirty.details->tmp != IRTemp_INVALID) {
- IRDirty* d = stmt->Ist.Dirty.details;
- if (d->tmp < 0 || d->tmp >= n_temps)
- sanityCheckFail(bb, stmt,
- "IRStmt.Dirty: destination tmp is out of range");
- def_counts[d->tmp]++;
- if (def_counts[d->tmp] > 1)
- sanityCheckFail(bb, stmt,
- "IRStmt.Dirty: destination tmp is assigned more than once");
- }
}
/* Typecheck everything. */
Modified: branches/DCAS/priv/ir/iropt.c
===================================================================
--- branches/DCAS/priv/ir/iropt.c 2009-05-21 21:27:40 UTC (rev 1890)
+++ branches/DCAS/priv/ir/iropt.c 2009-05-21 21:31:49 UTC (rev 1891)
@@ -388,8 +388,9 @@
static void flatten_Stmt ( IRSB* bb, IRStmt* st )
{
Int i;
- IRExpr *e1, *e2;
+ IRExpr *e1, *e2, *e3, *e4, *e5;
IRDirty *d, *d2;
+ IRCAS *cas, *cas2;
switch (st->tag) {
case Ist_Put:
if (isIRAtom(st->Ist.Put.data)) {
@@ -426,6 +427,17 @@
e2 = flatten_Expr(bb, st->Ist.Store.data);
addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
break;
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ e1 = flatten_Expr(bb, cas->addr);
+ e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
+ e3 = flatten_Expr(bb, cas->expdLo);
+ e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
+ e5 = flatten_Expr(bb, cas->dataLo);
+ cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
+ e1, e2, e3, e4, e5 );
+ addStmtToIRSB(bb, IRStmt_CAS(cas2));
+ break;
case Ist_Dirty:
d = st->Ist.Dirty.details;
d2 = emptyIRDirty();
@@ -710,13 +722,14 @@
enough do a lot better if needed. */
/* Probably also overly-conservative, but also dump everything
if we hit a memory bus event (fence, lock, unlock). Ditto
- AbiHints.*/
+ AbiHints and CASs. */
case Ist_AbiHint:
vassert(isIRAtom(st->Ist.AbiHint.base));
vassert(isIRAtom(st->Ist.AbiHint.nia));
/* fall through */
case Ist_MBE:
case Ist_Dirty:
+ case Ist_CAS:
for (j = 0; j < env->used; j++)
env->inuse[j] = False;
break;
@@ -1751,6 +1764,25 @@
fold_Expr(subst_Expr(env, st->Ist.Store.data))
);
+ case Ist_CAS: {
+ IRCAS *cas, *cas2;
+ cas = st->Ist.CAS.details;
+ vassert(isIRAtom(cas->addr));
+ vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
+ vassert(isIRAtom(cas->expdLo));
+ vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
+ vassert(isIRAtom(cas->dataLo));
+ cas2 = mkIRCAS(
+ cas->oldHi, cas->oldLo, cas->end,
+ fold_Expr(subst_Expr(env, cas->addr)),
+ cas->expdHi ? fold_Expr(subst_Expr(env, cas->expdHi)) : NULL,
+ fold_Expr(subst_Expr(env, cas->expdLo)),
+ cas->dataHi ? fold_Expr(subst_Expr(env, cas->dataHi)) : NULL,
+ fold_Expr(subst_Expr(env, cas->dataLo))
+ );
+ return IRStmt_CAS(cas2);
+ }
+
case Ist_Dirty: {
Int i;
IRDirty *d, *d2;
@@ -1956,6 +1988,7 @@
{
Int i;
IRDirty* d;
+ IRCAS* cas;
switch (st->tag) {
case Ist_AbiHint:
addUses_Expr(set, st->Ist.AbiHint.base);
@@ -1975,6 +2008,16 @@
addUses_Expr(set, st->Ist.Store.addr);
addUses_Expr(set, st->Ist.Store.data);
return;
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ addUses_Expr(set, cas->addr);
+ if (cas->expdHi)
+ addUses_Expr(set, cas->expdHi);
+ addUses_Expr(set, cas->expdLo);
+ if (cas->dataHi)
+ addUses_Expr(set, cas->dataHi);
+ addUses_Expr(set, cas->dataLo);
+ return;
case Ist_Dirty:
d = st->Ist.Dirty.details;
if (d->mFx != Ifx_None)
@@ -2561,7 +2604,7 @@
to do the no-overlap assessments needed for Put/PutI.
*/
switch (st->tag) {
- case Ist_Dirty: case Ist_Store: case Ist_MBE:
+ case Ist_Dirty: case Ist_Store: case Ist_MBE: case Ist_CAS:
paranoia = 2; break;
case Ist_Put: case Ist_PutI:
paranoia = 1; break;
@@ -3248,6 +3291,18 @@
deltaIRExpr(st->Ist.Store.addr, delta);
deltaIRExpr(st->Ist.Store.data, delta);
break;
+ case Ist_CAS:
+ if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
+ st->Ist.CAS.details->oldHi += delta;
+ st->Ist.CAS.details->oldLo += delta;
+ deltaIRExpr(st->Ist.CAS.details->addr, delta);
+ if (st->Ist.CAS.details->expdHi)
+ deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
+ deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
+ if (st->Ist.CAS.details->dataHi)
+ deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
+ deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
+ break;
case Ist_Dirty:
d = st->Ist.Dirty.details;
deltaIRExpr(d->guard, delta);
@@ -3682,6 +3737,7 @@
{
Int i;
IRDirty* d;
+ IRCAS* cas;
switch (st->tag) {
case Ist_AbiHint:
aoccCount_Expr(uses, st->Ist.AbiHint.base);
@@ -3701,6 +3757,16 @@
aoccCount_Expr(uses, st->Ist.Store.addr);
aoccCount_Expr(uses, st->Ist.Store.data);
return;
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ aoccCount_Expr(uses, cas->addr);
+ if (cas->expdHi)
+ aoccCount_Expr(uses, cas->expdHi);
+ aoccCount_Expr(uses, cas->expdLo);
+ if (cas->dataHi)
+ aoccCount_Expr(uses, cas->dataHi);
+ aoccCount_Expr(uses, cas->dataLo);
+ return;
case Ist_Dirty:
d = st->Ist.Dirty.details;
if (d->mFx != Ifx_None)
@@ -3910,9 +3976,9 @@
static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
{
- Int i;
- IRDirty* d;
- IRDirty* d2;
+ Int i;
+ IRDirty *d, *d2;
+ IRCAS *cas, *cas2;
switch (st->tag) {
case Ist_AbiHint:
return IRStmt_AbiHint(
@@ -3956,6 +4022,17 @@
return IRStmt_NoOp();
case Ist_MBE:
return IRStmt_MBE(st->Ist.MBE.event);
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ cas2 = mkIRCAS(
+ cas->oldHi, cas->oldLo, cas->end,
+ atbSubst_Expr(env, cas->addr),
+ cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
+ atbSubst_Expr(env, cas->expdLo),
+ cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
+ atbSubst_Expr(env, cas->dataLo)
+ );
+ return IRStmt_CAS(cas2);
case Ist_Dirty:
d = st->Ist.Dirty.details;
d2 = emptyIRDirty();
@@ -4239,9 +4316,10 @@
/*OUT*/Bool* hasVorFtemps,
IRSB* bb )
{
- Int i, j;
- IRStmt* st;
+ Int i, j;
+ IRStmt* st;
IRDirty* d;
+ IRCAS* cas;
*hasGetIorPutI = False;
*hasVorFtemps = False;
@@ -4277,6 +4355,14 @@
vassert(isIRAtom(st->Ist.Store.addr));
vassert(isIRAtom(st->Ist.Store.data));
break;
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ vassert(isIRAtom(cas->addr));
+ vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
+ vassert(isIRAtom(cas->expdLo));
+ vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
+ vassert(isIRAtom(cas->dataLo));
+ break;
case Ist_Dirty:
d = st->Ist.Dirty.details;
vassert(isIRAtom(d->guard));
|
|
From: <sv...@va...> - 2009-05-21 21:27:49
|
Author: sewardj
Date: 2009-05-21 22:27:40 +0100 (Thu, 21 May 2009)
New Revision: 1890
Log:
Add a new IR statement kind, IRStmt_IRCAS, which represents single-
and double-word atomic compare-and-swap operations.
Modified:
branches/DCAS/pub/libvex_ir.h
Modified: branches/DCAS/pub/libvex_ir.h
===================================================================
--- branches/DCAS/pub/libvex_ir.h 2009-05-21 19:24:01 UTC (rev 1889)
+++ branches/DCAS/pub/libvex_ir.h 2009-05-21 21:27:40 UTC (rev 1890)
@@ -1358,6 +1358,93 @@
extern void ppIRMBusEvent ( IRMBusEvent );
+/* --------------- Compare and Swap --------------- */
+
+/* This denotes an atomic compare and swap operation, either
+ a single-element one or a double-element one.
+
+ In the single-element case:
+
+ .addr is the memory address.
+ .end is the endianness with which memory is accessed
+
+ If .addr contains the same value as .expdLo, then .dataLo is
+ written there, else there is no write. In both cases, the
+ original value at .addr is copied into .oldLo.
+
+ Types: .expdLo, .dataLo and .oldLo must all have the same type.
+ It may be any integral type, viz: I8, I16, I32 or, for 64-bit
+ guests, I64.
+
+ .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must
+ be NULL.
+
+ In the double-element case:
+
+ .addr is the memory address.
+ .end is the endianness with which memory is accessed
+
+ The operation is the same:
+
+ If .addr contains the same value as .expdHi:.expdLo, then
+ .dataHi:.dataLo is written there, else there is no write. In
+ both cases the original value at .addr is copied into
+ .oldHi:.oldLo.
+
+ Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must
+ all have the same type, which may be any integral type, viz: I8,
+ I16, I32 or, for 64-bit guests, I64.
+
+ The double-element case is complicated by the issue of
+ endianness. In all cases, the two elements are understood to be
+ located adjacently in memory, starting at the address .addr.
+
+ If .end is Iend_LE, then the .xxxLo component is at the lower
+ address and the .xxxHi component is at the higher address, and
+ each component is itself stored little-endianly.
+
+ If .end is Iend_BE, then the .xxxHi component is at the lower
+ address and the .xxxLo component is at the higher address, and
+ each component is itself stored big-endianly.
+
+ This allows representing more cases than most architectures can
+ handle. For example, x86 cannot do DCAS on 8- or 16-bit elements.
+
+ How to know if the CAS succeeded?
+
+ * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo),
+ then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now
+ stored at .addr, and the original value there was .oldLo (resp
+ .oldHi:.oldLo).
+
+ * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo),
+ then the CAS failed, and the original value at .addr was .oldLo
+ (resp. .oldHi:.oldLo).
+
+ Hence it is easy to know whether or not the CAS succeeded.
+*/
+typedef
+ struct {
+ IRTemp oldHi; /* old value of *addr is written here */
+ IRTemp oldLo;
+ IREndness end; /* endianness of the data in memory */
+ IRExpr* addr; /* store address */
+ IRExpr* expdHi; /* expected old value at *addr */
+ IRExpr* expdLo;
+ IRExpr* dataHi; /* new value for *addr */
+ IRExpr* dataLo;
+ }
+ IRCAS;
+
+extern void ppIRCAS ( IRCAS* cas );
+
+extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo,
+ IREndness end, IRExpr* addr,
+ IRExpr* expdHi, IRExpr* expdLo,
+ IRExpr* dataHi, IRExpr* dataLo );
+
+extern IRCAS* deepCopyIRCAS ( IRCAS* );
+
/* ------------------ Statements ------------------ */
/* The different kinds of statements. Their meaning is explained
@@ -1379,6 +1466,7 @@
Ist_PutI,
Ist_WrTmp,
Ist_Store,
+ Ist_CAS,
Ist_Dirty,
Ist_MBE, /* META (maybe) */
Ist_Exit
@@ -1392,7 +1480,7 @@
'st.Ist.Store.<fieldname>'.
For each kind of statement, we show what it looks like when
- pretty-printed with ppIRExpr().
+ pretty-printed with ppIRStmt().
*/
typedef
struct _IRStmt {
@@ -1401,7 +1489,7 @@
/* A no-op (usually resulting from IR optimisation). Can be
omitted without any effect.
- ppIRExpr output: IR-NoOp
+ ppIRStmt output: IR-NoOp
*/
struct {
} NoOp;
@@ -1412,7 +1500,7 @@
the IRSB). Contains the address and length of the
instruction.
- ppIRExpr output: ------ IMark(<addr>, <len>) ------,
+ ppIRStmt output: ------ IMark(<addr>, <len>) ------,
eg. ------ IMark(0x4000792, 5) ------,
*/
struct {
@@ -1431,7 +1519,7 @@
next (dynamic) instruction that will be executed. This is
to help Memcheck to origin tracking.
- ppIRExpr output: ====== AbiHint(<base>, <len>, <nia>) ======
+ ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ======
eg. ====== AbiHint(t1, 16, t2) ======
*/
struct {
@@ -1441,7 +1529,7 @@
} AbiHint;
/* Write a guest register, at a fixed offset in the guest state.
- ppIRExpr output: PUT(<offset>) = <data>, eg. PUT(60) = t1
+ ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1
*/
struct {
Int offset; /* Offset into the guest state */
@@ -1452,7 +1540,7 @@
state. See the comment for GetI expressions for more
information.
- ppIRExpr output: PUTI<descr>[<ix>,<bias>] = <data>,
+ ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>,
eg. PUTI(64:8xF64)[t5,0] = t1
*/
struct {
@@ -1467,7 +1555,7 @@
reject any block containing a temporary which is not assigned
to exactly once.
- ppIRExpr output: t<tmp> = <data>, eg. t1 = 3
+ ppIRStmt output: t<tmp> = <data>, eg. t1 = 3
*/
struct {
IRTemp tmp; /* Temporary (LHS of assignment) */
@@ -1475,7 +1563,7 @@
} WrTmp;
/* Write a value to memory.
- ppIRExpr output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2
+ ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2
*/
struct {
IREndness end; /* Endianness of the store */
@@ -1483,11 +1571,29 @@
IRExpr* data; /* value to write */
} Store;
+ /* Do an atomic compare-and-swap operation. Semantics are
+ described above on a comment at the definition of IRCAS.
+
+ ppIRStmt output:
+ t<tmp> = CAS<end>(<addr> :: <expected> -> <new>)
+ eg
+ t1 = CASle(t2 :: t3->Add32(t3,1))
+ which denotes a 32-bit atomic increment
+ of a value at address t2
+
+ A double-element CAS may also be denoted, in which case <tmp>,
+ <expected> and <new> are all pairs of items, separated by
+ commas.
+ */
+ struct {
+ IRCAS* details;
+ } CAS;
+
/* Call (possibly conditionally) a C function that has side
effects (ie. is "dirty"). See the comments above the
IRDirty type declaration for more information.
- ppIRExpr output:
+ ppIRStmt output:
t<tmp> = DIRTY <guard> <effects>
::: <callee>(<args>)
eg.
@@ -1501,7 +1607,7 @@
/* A memory bus event - a fence, or acquisition/release of the
hardware bus lock. IR optimisation treats all these as fences
across which no memory references may be moved.
- ppIRExpr output: MBusEvent-Fence,
+ ppIRStmt output: MBusEvent-Fence,
MBusEvent-BusLock, MBusEvent-BusUnlock.
*/
struct {
@@ -1509,7 +1615,7 @@
} MBE;
/* Conditional exit from the middle of an IRSB.
- ppIRExpr output: if (<guard>) goto {<jk>} <dst>
+ ppIRStmt output: if (<guard>) goto {<jk>} <dst>
eg. if (t69) goto {Boring} 0x4000AAA:I32
*/
struct {
@@ -1530,6 +1636,7 @@
IRExpr* data );
extern IRStmt* IRStmt_WrTmp ( IRTemp tmp, IRExpr* data );
extern IRStmt* IRStmt_Store ( IREndness end, IRExpr* addr, IRExpr* data );
+extern IRStmt* IRStmt_CAS ( IRCAS* details );
extern IRStmt* IRStmt_Dirty ( IRDirty* details );
extern IRStmt* IRStmt_MBE ( IRMBusEvent event );
extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst );
|
|
From: <sv...@va...> - 2009-05-21 19:37:16
|
Author: sewardj Date: 2009-05-21 20:37:04 +0100 (Thu, 21 May 2009) New Revision: 10077 Log: Swizzle external. Modified: branches/DCAS/ Property changes on: branches/DCAS ___________________________________________________________________ Name: svn:externals - VEX svn://svn.valgrind.org/vex/trunk + VEX svn://svn.valgrind.org/vex/branches/DCAS |
|
From: <sv...@va...> - 2009-05-21 19:34:42
|
Author: sewardj Date: 2009-05-21 20:34:20 +0100 (Thu, 21 May 2009) New Revision: 10076 Log: Make a copy of trunk r10075, to experiment with genuinely atomic implementations of LOCK-prefixed x86/amd64 instructions. Added: branches/DCAS/ Copied: branches/DCAS (from rev 10075, trunk) Property changes on: branches/DCAS ___________________________________________________________________ Name: svn:ignore + acinclude.m4 aclocal.m4 autom4te-*.cache autom4te.cache bin cachegrind cachegrind.out.* compile config.guess config.h* config.log config.status config.sub configure default.supp glibc-2.X.supp depcomp include .in_place install-sh lib Makefile Makefile.in missing mkinstalldirs share stamp-h* svn-commit.tmp svn-commit.2.tmp valgrind valgrind.pc valgrind.spec valt_load_address*.lds vg_annotate vg_cachegen Name: svn:externals + VEX svn://svn.valgrind.org/vex/trunk Name: svn:mergeinfo + |
|
From: <sv...@va...> - 2009-05-21 19:24:09
|
Author: sewardj Date: 2009-05-21 20:24:01 +0100 (Thu, 21 May 2009) New Revision: 1889 Log: Make a copy of trunk r1888, to experiment with genuinely atomic implementations of LOCK-prefixed x86/amd64 instructions. Added: branches/DCAS/ Copied: branches/DCAS (from rev 1888, trunk) Property changes on: branches/DCAS ___________________________________________________________________ Name: svn:ignore + libvex*.a TAG_* Name: svn:mergeinfo + |
|
From: <sv...@va...> - 2009-05-21 15:33:43
|
Author: sewardj
Date: 2009-05-21 16:33:36 +0100 (Thu, 21 May 2009)
New Revision: 10075
Log:
Handle DW_CFA_{remember,restore}_state. This requires having a stack
of currently on-the-go register rules, rather than just one.
gcc doesn't appear to generate these (it's pretty darn obscure), but
they do turn up a piece of handwritten assembly somewhere in the
depths of Python-2.6 on amd64-linux.
Modified:
trunk/coregrind/m_debuginfo/readdwarf.c
Modified: trunk/coregrind/m_debuginfo/readdwarf.c
===================================================================
--- trunk/coregrind/m_debuginfo/readdwarf.c 2009-05-21 14:54:05 UTC (rev 10074)
+++ trunk/coregrind/m_debuginfo/readdwarf.c 2009-05-21 15:33:36 UTC (rev 10075)
@@ -1922,6 +1922,11 @@
}
+/* Size of the stack of register unwind rules. This is only
+ exceedingly rarely used, so a stack of size 1 should actually work
+ with almost all compiler-generated CFA. */
+#define N_RR_STACK 4
+
typedef
struct {
/* Read-only fields (set by the CIE) */
@@ -1938,8 +1943,12 @@
Int cfa_reg;
Int cfa_off; /* in bytes */
Int cfa_expr_ix; /* index into cfa_exprs */
- /* register unwind rules */
- RegRule reg[N_CFI_REGS];
+ /* A stack of register unwind rules. We need a stack of them,
+ rather than just one set of rules, in order to handle
+ DW_CFA_{remember,restore}_state. */
+ RegRule reg[N_RR_STACK][N_CFI_REGS];
+ Int reg_sp; /* 0 <= reg_sp < N_RR_STACK; points at the
+ currently-in-use rule set. */
/* array of CfiExpr, shared by reg[] and cfa_expr_ix */
XArray* exprs;
}
@@ -1947,7 +1956,7 @@
static void ppUnwindContext ( UnwindContext* ctx )
{
- Int i;
+ Int j, i;
VG_(printf)("0x%llx: ", (ULong)ctx->loc);
if (ctx->cfa_is_regoff) {
VG_(printf)("%d(r%d) ", ctx->cfa_off, ctx->cfa_reg);
@@ -1957,14 +1966,19 @@
ML_(ppCfiExpr)( ctx->exprs, ctx->cfa_expr_ix );
VG_(printf)("} ");
}
- for (i = 0; i < N_CFI_REGS; i++)
- ppRegRule(ctx->exprs, &ctx->reg[i]);
+ for (j = 0; j <= ctx->reg_sp; j++) {
+ VG_(printf)("%s[%d]={ ", j > 0 ? " " : "", j);
+ for (i = 0; i < N_CFI_REGS; i++)
+ ppRegRule(ctx->exprs, &ctx->reg[j][i]);
+ VG_(printf)("}");
+ }
VG_(printf)("\n");
}
static void initUnwindContext ( /*OUT*/UnwindContext* ctx )
{
- Int i;
+ Int j, i;
+ VG_(memset)(ctx, 0, sizeof(*ctx));
ctx->code_a_f = 0;
ctx->data_a_f = 0;
ctx->initloc = 0;
@@ -1975,9 +1989,12 @@
ctx->cfa_off = 0;
ctx->cfa_expr_ix = 0;
ctx->exprs = NULL;
- for (i = 0; i < N_CFI_REGS; i++) {
- ctx->reg[i].tag = RR_Undef;
- ctx->reg[i].arg = 0;
+ ctx->reg_sp = 0;
+ for (j = 0; j < N_RR_STACK; j++) {
+ for (i = 0; i < N_CFI_REGS; i++) {
+ ctx->reg[j][i].tag = RR_Undef;
+ ctx->reg[j][i].arg = 0;
+ }
}
}
@@ -2103,9 +2120,16 @@
why = 2; goto failed; /* otherwise give up */ \
}
- SUMMARISE_HOW(si->ra_how, si->ra_off, ctx->reg[ctx->ra_reg] );
- SUMMARISE_HOW(si->fp_how, si->fp_off, ctx->reg[FP_REG] );
+ /* Guard against obviously stupid settings of the reg-rule stack
+ pointer. */
+ if (ctx->reg_sp < 0) { why = 8; goto failed; }
+ if (ctx->reg_sp >= N_RR_STACK) { why = 9; goto failed; }
+ SUMMARISE_HOW(si->ra_how, si->ra_off,
+ ctx->reg[ctx->reg_sp][ctx->ra_reg] );
+ SUMMARISE_HOW(si->fp_how, si->fp_off,
+ ctx->reg[ctx->reg_sp][FP_REG] );
+
# undef SUMMARISE_HOW
/* on x86/amd64, it seems the old %{e,r}sp value before the call is
@@ -2115,7 +2139,7 @@
/* also, gcc says "Undef" for %{e,r}bp when it is unchanged. So
.. */
- if (ctx->reg[FP_REG].tag == RR_Undef)
+ if (ctx->reg[ctx->reg_sp][FP_REG].tag == RR_Undef)
si->fp_how = CFIR_SAME;
/* knock out some obviously stupid cases */
@@ -2214,10 +2238,10 @@
}
VG_(printf)("RA=");
- ppRegRule( ctx->exprs, &ctx->reg[ctx->ra_reg] );
+ ppRegRule( ctx->exprs, &ctx->reg[ctx->reg_sp][ctx->ra_reg] );
VG_(printf)("FP=");
- ppRegRule( ctx->exprs, &ctx->reg[FP_REG] );
+ ppRegRule( ctx->exprs, &ctx->reg[ctx->reg_sp][FP_REG] );
VG_(printf)("\n");
}
@@ -2663,6 +2687,9 @@
Addr printing_bias = ((Addr)ctx->initloc) - ((Addr)di->text_bias);
i++;
+ if (ctx->reg_sp < 0 || ctx->reg_sp >= N_RR_STACK)
+ return 0; /* bogus reg-rule stack pointer */
+
if (hi2 == DW_CFA_advance_loc) {
delta = (UInt)lo6;
ctx->loc += delta;
@@ -2679,12 +2706,13 @@
reg = (Int)lo6;
if (reg < 0 || reg >= N_CFI_REGS)
return 0; /* fail */
- ctx->reg[reg].tag = RR_CFAOff;
- ctx->reg[reg].arg = off * ctx->data_a_f;
+ ctx->reg[ctx->reg_sp][reg].tag = RR_CFAOff;
+ ctx->reg[ctx->reg_sp][reg].arg = off * ctx->data_a_f;
if (di->ddump_frames)
VG_(printf)(" DW_CFA_offset: r%d at cfa%s%d\n",
- (Int)reg, ctx->reg[reg].arg < 0 ? "" : "+",
- (Int)ctx->reg[reg].arg );
+ (Int)reg,
+ ctx->reg[ctx->reg_sp][reg].arg < 0 ? "" : "+",
+ (Int)ctx->reg[ctx->reg_sp][reg].arg );
return i;
}
@@ -2694,7 +2722,7 @@
return 0; /* fail */
if (restore_ctx == NULL)
return 0; /* fail */
- ctx->reg[reg] = restore_ctx->reg[reg];
+ ctx->reg[ctx->reg_sp][reg] = restore_ctx->reg[ctx->reg_sp][reg];
if (di->ddump_frames)
VG_(printf)(" DW_CFA_restore: r%d\n", (Int)reg);
return i;
@@ -2780,8 +2808,8 @@
return 0; /* fail */
if (reg2 < 0 || reg2 >= N_CFI_REGS)
return 0; /* fail */
- ctx->reg[reg].tag = RR_Reg;
- ctx->reg[reg].arg = reg2;
+ ctx->reg[ctx->reg_sp][reg].tag = RR_Reg;
+ ctx->reg[ctx->reg_sp][reg].arg = reg2;
if (di->ddump_frames)
VG_(printf)(" DW_CFA_register: r%d in r%d\n",
(Int)reg, (Int)reg2);
@@ -2794,8 +2822,8 @@
i += nleb;
if (reg < 0 || reg >= N_CFI_REGS)
return 0; /* fail */
- ctx->reg[reg].tag = RR_CFAOff;
- ctx->reg[reg].arg = off * ctx->data_a_f;
+ ctx->reg[ctx->reg_sp][reg].tag = RR_CFAOff;
+ ctx->reg[ctx->reg_sp][reg].arg = off * ctx->data_a_f;
if (di->ddump_frames)
VG_(printf)(" rci:DW_CFA_offset_extended\n");
break;
@@ -2807,12 +2835,13 @@
i += nleb;
if (reg < 0 || reg >= N_CFI_REGS)
return 0; /* fail */
- ctx->reg[reg].tag = RR_CFAOff;
- ctx->reg[reg].arg = off * ctx->data_a_f;
+ ctx->reg[ctx->reg_sp][reg].tag = RR_CFAOff;
+ ctx->reg[ctx->reg_sp][reg].arg = off * ctx->data_a_f;
if (di->ddump_frames)
VG_(printf)(" DW_CFA_offset_extended_sf: r%d at cfa%s%d\n",
- reg, ctx->reg[reg].arg < 0 ? "" : "+",
- (Int)ctx->reg[reg].arg);
+ reg,
+ ctx->reg[ctx->reg_sp][reg].arg < 0 ? "" : "+",
+ (Int)ctx->reg[ctx->reg_sp][reg].arg);
break;
case DW_CFA_GNU_negative_offset_extended:
@@ -2822,8 +2851,8 @@
i += nleb;
if (reg < 0 || reg >= N_CFI_REGS)
return 0; /* fail */
- ctx->reg[reg].tag = RR_CFAOff;
- ctx->reg[reg].arg = (-off) * ctx->data_a_f;
+ ctx->reg[ctx->reg_sp][reg].tag = RR_CFAOff;
+ ctx->reg[ctx->reg_sp][reg].arg = (-off) * ctx->data_a_f;
if (di->ddump_frames)
VG_(printf)(" rci:DW_CFA_GNU_negative_offset_extended\n");
break;
@@ -2835,7 +2864,7 @@
return 0; /* fail */
if (restore_ctx == NULL)
return 0; /* fail */
- ctx->reg[reg] = restore_ctx->reg[reg];
+ ctx->reg[ctx->reg_sp][reg] = restore_ctx->reg[ctx->reg_sp][reg];
if (di->ddump_frames)
VG_(printf)(" rci:DW_CFA_restore_extended\n");
break;
@@ -2847,8 +2876,8 @@
i += nleb;
if (reg < 0 || reg >= N_CFI_REGS)
return 0; /* fail */
- ctx->reg[reg].tag = RR_CFAValOff;
- ctx->reg[reg].arg = off * ctx->data_a_f;
+ ctx->reg[ctx->reg_sp][reg].tag = RR_CFAValOff;
+ ctx->reg[ctx->reg_sp][reg].arg = off * ctx->data_a_f;
if (di->ddump_frames)
VG_(printf)(" rci:DW_CFA_val_offset\n");
break;
@@ -2860,8 +2889,8 @@
i += nleb;
if (reg < 0 || reg >= N_CFI_REGS)
return 0; /* fail */
- ctx->reg[reg].tag = RR_CFAValOff;
- ctx->reg[reg].arg = off * ctx->data_a_f;
+ ctx->reg[ctx->reg_sp][reg].tag = RR_CFAValOff;
+ ctx->reg[ctx->reg_sp][reg].arg = off * ctx->data_a_f;
if (di->ddump_frames)
VG_(printf)(" rci:DW_CFA_val_offset_sf\n");
break;
@@ -2906,8 +2935,8 @@
i += nleb;
if (reg < 0 || reg >= N_CFI_REGS)
return 0; /* fail */
- ctx->reg[reg].tag = RR_Undef;
- ctx->reg[reg].arg = 0;
+ ctx->reg[ctx->reg_sp][reg].tag = RR_Undef;
+ ctx->reg[ctx->reg_sp][reg].arg = 0;
if (di->ddump_frames)
VG_(printf)(" rci:DW_CFA_undefined\n");
break;
@@ -2917,8 +2946,8 @@
i += nleb;
if (reg < 0 || reg >= N_CFI_REGS)
return 0; /* fail */
- ctx->reg[reg].tag = RR_Same;
- ctx->reg[reg].arg = 0;
+ ctx->reg[ctx->reg_sp][reg].tag = RR_Same;
+ ctx->reg[ctx->reg_sp][reg].arg = 0;
if (di->ddump_frames)
VG_(printf)(" rci:DW_CFA_same_value\n");
break;
@@ -2962,8 +2991,8 @@
return 0; /* fail */
/* Add an extra dereference */
j = ML_(CfiExpr_Deref)( ctx->exprs, j );
- ctx->reg[reg].tag = RR_ValExpr;
- ctx->reg[reg].arg = j;
+ ctx->reg[ctx->reg_sp][reg].tag = RR_ValExpr;
+ ctx->reg[ctx->reg_sp][reg].arg = j;
break;
case DW_CFA_val_expression:
@@ -2991,8 +3020,8 @@
}
if (j == -1)
return 0; /* fail */
- ctx->reg[reg].tag = RR_ValExpr;
- ctx->reg[reg].arg = j;
+ ctx->reg[ctx->reg_sp][reg].tag = RR_ValExpr;
+ ctx->reg[ctx->reg_sp][reg].arg = j;
break;
case DW_CFA_def_cfa_expression:
@@ -3018,9 +3047,41 @@
/* Ignored. This appears to be sparc-specific; quite why it
turns up in SuSE-supplied x86 .so's beats me. */
if (di->ddump_frames)
- VG_(printf)("DW_CFA_GNU_window_save\n");
+ VG_(printf)(" DW_CFA_GNU_window_save\n");
break;
+ case DW_CFA_remember_state:
+ if (di->ddump_frames)
+ VG_(printf)(" DW_CFA_remember_state\n");
+ /* we just checked this at entry, so: */
+ vg_assert(ctx->reg_sp >= 0 && ctx->reg_sp < N_RR_STACK);
+ ctx->reg_sp++;
+ if (ctx->reg_sp == N_RR_STACK) {
+ /* stack overflow. We're hosed. */
+ VG_(message)(Vg_DebugMsg, "DWARF2 CFI reader: N_RR_STACK is "
+ "too low; increase and recompile.");
+ i = 0; /* indicate failure */
+ } else {
+ VG_(memcpy)(/*dst*/&ctx->reg[ctx->reg_sp],
+ /*src*/&ctx->reg[ctx->reg_sp - 1],
+ sizeof(ctx->reg[ctx->reg_sp]) );
+ }
+ break;
+
+ case DW_CFA_restore_state:
+ if (di->ddump_frames)
+ VG_(printf)(" DW_CFA_restore_state\n");
+ /* we just checked this at entry, so: */
+ vg_assert(ctx->reg_sp >= 0 && ctx->reg_sp < N_RR_STACK);
+ if (ctx->reg_sp == 0) {
+ /* stack overflow. Give up. */
+ i = 0; /* indicate failure */
+ } else {
+ /* simply fall back to previous entry */
+ ctx->reg_sp--;
+ }
+ break;
+
default:
VG_(message)(Vg_DebugMsg, "DWARF2 CFI reader: unhandled CFI "
"instruction 0:%d", (Int)lo6);
|
|
From: <sv...@va...> - 2009-05-21 14:54:13
|
Author: sewardj
Date: 2009-05-21 15:54:05 +0100 (Thu, 21 May 2009)
New Revision: 10074
Log:
When updating the constraint for a location following a race, make
sure the read-constraint <= the write-constraint. Failure to do this
leads to assertion failures later on. Fixes #181394.
Modified:
trunk/helgrind/libhb_core.c
Modified: trunk/helgrind/libhb_core.c
===================================================================
--- trunk/helgrind/libhb_core.c 2009-05-21 14:49:55 UTC (rev 10073)
+++ trunk/helgrind/libhb_core.c 2009-05-21 14:54:05 UTC (rev 10074)
@@ -3670,18 +3670,21 @@
/* assert on sanity of constraints. */
POrd ordxx = VtsID__getOrdering(rmini,wmini);
tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
- svNew = MSM_RACE2ERR
- ? SVal__mkE()
- /* see comments on corresponding fragment in
- msm_write for explanation. */
- /* aggressive setting: */
- /*
- : SVal__mkC( VtsID__join2(wmini,tviR),
- VtsID__join2(wmini,tviW) );
- */
- /* "consistent" setting: */
- : SVal__mkC( VtsID__join2(rmini,tviR),
- VtsID__join2(wmini,tviW) );
+ /* Compute svNew following the race. This isn't so
+ simple. */
+ /* see comments on corresponding fragment in
+ msm_write for explanation. */
+ if (MSM_RACE2ERR) {
+ /* XXX UNUSED; this should be deleted */
+ tl_assert(0);
+ svNew = SVal__mkE();
+ } else {
+ VtsID r_joined = VtsID__join2(rmini,tviR);
+ VtsID w_joined = VtsID__join2(wmini,tviW);
+ /* ensure that r_joined <= w_joined */
+ w_joined = VtsID__join2( w_joined, r_joined );
+ svNew = SVal__mkC( r_joined, w_joined );
+ }
record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/ );
goto out;
}
@@ -3747,22 +3750,40 @@
/* assert on sanity of constraints. */
POrd ordxx = VtsID__getOrdering(rmini,wmini);
tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
- svNew = MSM_RACE2ERR
- ? SVal__mkE()
- /* One possibility is, after a race is seen, to
- set the location's constraints as aggressively
- (as far ahead) as possible. However, that just
- causes lots more races to be reported, which is
- very confusing. Hence don't do this. */
- /*
- : SVal__mkC( VtsID__join2(wmini,tviR),
- VtsID__join2(wmini,tviW) );
- */
- /* instead, re-set the constraints in a way which
- is consistent with (ie, as they would have been
- computed anyway) had no race been detected. */
- : SVal__mkC( VtsID__join2(rmini,tviR),
- VtsID__join2(wmini,tviW) );
+ /* Compute svNew following the race. This isn't so
+ simple. */
+ if (MSM_RACE2ERR) {
+ /* XXX UNUSED; this should be deleted */
+ tl_assert(0);
+ svNew = SVal__mkE();
+ /* One possibility is, after a race is seen, to
+ set the location's constraints as aggressively
+ (as far ahead) as possible. However, that just
+ causes lots more races to be reported, which is
+ very confusing. Hence don't do this. */
+ /*
+ = SVal__mkC( VtsID__join2(wmini,tviR),
+ VtsID__join2(wmini,tviW) );
+ */
+ } else {
+ /* instead, re-set the constraints in a way which is
+ consistent with (ie, as they would have been computed
+ anyway) the case where no race was detected. */
+ VtsID r_joined = VtsID__join2(rmini,tviR);
+ VtsID w_joined = VtsID__join2(wmini,tviW);
+ /* Because of the race, the "normal" ordering constraint
+ wmini(constraint) <= tviW(actual access) no longer
+ holds. Hence it can be that the required constraint
+ (on SVal_Cs) r_joined <= w_joined does not hold either.
+ To fix this and guarantee we're not generating invalid
+ SVal_Cs, do w_joined = w_joined `join` r_joined, so as
+ to force r_joined <= w_joined in the arguments to
+ SVal__mkC. I think this is only important when we're
+ dealing with reader-writer locks.
+ */
+ w_joined = VtsID__join2( w_joined, r_joined );
+ svNew = SVal__mkC( r_joined, w_joined );
+ }
record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/ );
goto out;
}
|