|
From: <sv...@va...> - 2007-09-16 11:04:32
|
Author: sewardj
Date: 2007-09-16 12:04:24 +0100 (Sun, 16 Sep 2007)
New Revision: 1789
Log:
Changes to facilitate passing x86/amd64 LOCK prefixes through to
Valgrind tools:
* Generalise IRStmt_MFence to IRStmt_MBE ("memory bus event"), so
the IR can carry not only notifications of memory fences, but also
of notional hardware bus locks / unlocks
* Generate these in the amd64->IR front end
* Generate these in the x86->IR front end, and tidy up messy handling
of instruction prefixes in general -- make it a bit more like how the
amd64->IR front end handles prefixes
No equivalent changes to the ppc->IR front ends since the lwarx/stwcx.
methodology for atomic memory changes does not fit this model.
Modified:
branches/THRCHECK/priv/guest-amd64/toIR.c
branches/THRCHECK/priv/guest-ppc/toIR.c
branches/THRCHECK/priv/guest-x86/toIR.c
branches/THRCHECK/priv/host-amd64/isel.c
branches/THRCHECK/priv/host-ppc/isel.c
branches/THRCHECK/priv/host-x86/isel.c
branches/THRCHECK/priv/ir/irdefs.c
branches/THRCHECK/priv/ir/iropt.c
branches/THRCHECK/pub/libvex_ir.h
Modified: branches/THRCHECK/priv/guest-amd64/toIR.c
===================================================================
--- branches/THRCHECK/priv/guest-amd64/toIR.c 2007-09-09 19:38:48 UTC (rev 1788)
+++ branches/THRCHECK/priv/guest-amd64/toIR.c 2007-09-16 11:04:24 UTC (rev 1789)
@@ -1963,7 +1963,7 @@
/*------------------------------------------------------------*/
static
-HChar* sorbTxt ( Prefix pfx )
+HChar* segRegTxt ( Prefix pfx )
{
if (pfx & PFX_CS) return "%cs:";
if (pfx & PFX_DS) return "%ds:";
@@ -2115,7 +2115,7 @@
case 0x00: case 0x01: case 0x02: case 0x03:
/* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
{ UChar rm = toUChar(mod_reg_rm & 7);
- DIS(buf, "%s(%s)", sorbTxt(pfx), nameIRegRexB(8,pfx,rm));
+ DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
*len = 1;
return disAMode_copy2tmp(
handleAddrOverrides(pfx, getIRegRexB(8,pfx,rm)));
@@ -2129,9 +2129,9 @@
{ UChar rm = toUChar(mod_reg_rm & 7);
Long d = getSDisp8(delta);
if (d == 0) {
- DIS(buf, "%s(%s)", sorbTxt(pfx), nameIRegRexB(8,pfx,rm));
+ DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
} else {
- DIS(buf, "%s%lld(%s)", sorbTxt(pfx), d, nameIRegRexB(8,pfx,rm));
+ DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
}
*len = 2;
return disAMode_copy2tmp(
@@ -2146,7 +2146,7 @@
/* ! 14 */ case 0x15: case 0x16: case 0x17:
{ UChar rm = toUChar(mod_reg_rm & 7);
Long d = getSDisp32(delta);
- DIS(buf, "%s%lld(%s)", sorbTxt(pfx), d, nameIRegRexB(8,pfx,rm));
+ DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
*len = 5;
return disAMode_copy2tmp(
handleAddrOverrides(pfx,
@@ -2164,7 +2164,7 @@
case 0x05:
{ Long d = getSDisp32(delta);
*len = 5;
- DIS(buf, "%s%lld(%%rip)", sorbTxt(pfx), d);
+ DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d);
/* We need to know the next instruction's start address.
Try and figure out what it is, record the guess, and ask
the top-level driver logic (bbToIR_AMD64) to check we
@@ -2207,11 +2207,11 @@
if ((!index_is_SP) && (!base_is_BPor13)) {
if (scale == 0) {
- DIS(buf, "%s(%s,%s)", sorbTxt(pfx),
+ DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
nameIRegRexB(8,pfx,base_r),
nameIReg64rexX(pfx,index_r));
} else {
- DIS(buf, "%s(%s,%s,%d)", sorbTxt(pfx),
+ DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
nameIRegRexB(8,pfx,base_r),
nameIReg64rexX(pfx,index_r), 1<<scale);
}
@@ -2227,7 +2227,7 @@
if ((!index_is_SP) && base_is_BPor13) {
Long d = getSDisp32(delta);
- DIS(buf, "%s%lld(,%s,%d)", sorbTxt(pfx), d,
+ DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d,
nameIReg64rexX(pfx,index_r), 1<<scale);
*len = 6;
return
@@ -2240,7 +2240,7 @@
}
if (index_is_SP && (!base_is_BPor13)) {
- DIS(buf, "%s(%s)", sorbTxt(pfx), nameIRegRexB(8,pfx,base_r));
+ DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r));
*len = 2;
return disAMode_copy2tmp(
handleAddrOverrides(pfx, getIRegRexB(8,pfx,base_r)));
@@ -2248,7 +2248,7 @@
if (index_is_SP && base_is_BPor13) {
Long d = getSDisp32(delta);
- DIS(buf, "%s%lld", sorbTxt(pfx), d);
+ DIS(buf, "%s%lld", segRegTxt(pfx), d);
*len = 6;
return disAMode_copy2tmp(
handleAddrOverrides(pfx, mkU64(d)));
@@ -2274,7 +2274,7 @@
Long d = getSDisp8(delta+1);
if (index_r == R_RSP && 0==getRexX(pfx)) {
- DIS(buf, "%s%lld(%s)", sorbTxt(pfx),
+ DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
d, nameIRegRexB(8,pfx,base_r));
*len = 3;
return disAMode_copy2tmp(
@@ -2282,11 +2282,11 @@
binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
} else {
if (scale == 0) {
- DIS(buf, "%s%lld(%s,%s)", sorbTxt(pfx), d,
+ DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
nameIRegRexB(8,pfx,base_r),
nameIReg64rexX(pfx,index_r));
} else {
- DIS(buf, "%s%lld(%s,%s,%d)", sorbTxt(pfx), d,
+ DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
nameIRegRexB(8,pfx,base_r),
nameIReg64rexX(pfx,index_r), 1<<scale);
}
@@ -2321,7 +2321,7 @@
Long d = getSDisp32(delta+1);
if (index_r == R_RSP && 0==getRexX(pfx)) {
- DIS(buf, "%s%lld(%s)", sorbTxt(pfx),
+ DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
d, nameIRegRexB(8,pfx,base_r));
*len = 6;
return disAMode_copy2tmp(
@@ -2329,11 +2329,11 @@
binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
} else {
if (scale == 0) {
- DIS(buf, "%s%lld(%s,%s)", sorbTxt(pfx), d,
+ DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
nameIRegRexB(8,pfx,base_r),
nameIReg64rexX(pfx,index_r));
} else {
- DIS(buf, "%s%lld(%s,%s,%d)", sorbTxt(pfx), d,
+ DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
nameIRegRexB(8,pfx,base_r),
nameIReg64rexX(pfx,index_r), 1<<scale);
}
@@ -8299,6 +8299,96 @@
}
+/* Helper for deciding whether a given insn (starting at the opcode
+ byte) may validly be used with a LOCK prefix. The following insns
+ may be used with LOCK when their destination operand is in memory.
+ Note, this is slightly too permissive. Oh well. Note also, AFAICS
+ this is exactly the same for both 32-bit and 64-bit mode.
+
+ ADD 80 /0, 81 /0, 83 /0, 00, 01, 02, 03
+ OR 80 /1, 81 /1, 83 /1, 08, 09, 0A, 0B
+ ADC 80 /2, 81 /2, 83 /2, 10, 11, 12, 13
+ SBB 81 /3, 81 /3, 83 /3, 18, 19, 1A, 1B
+ AND 80 /4, 81 /4, 83 /4, 20, 21, 22, 23
+ SUB 80 /5, 81 /5, 83 /5, 28, 29, 2A, 2B
+ XOR 80 /6, 81 /6, 83 /6, 30, 31, 32, 33
+
+ DEC FE /1, FF /1
+ INC FE /0, FF /0
+
+ NEG F6 /3, F7 /3
+ NOT F6 /2, F7 /2
+
+ XCHG 86, 87
+
+ BTC 0F BB, 0F BA /7
+ BTR 0F B3, 0F BA /6
+ BTS 0F AB, 0F BA /5
+
+ CMPXCHG 0F B0, 0F B1
+ CMPXCHG8B 0F C7 /1
+
+ XADD 0F C0, 0F C1
+*/
+static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
+{
+ switch (opc[0]) {
+ case 0x00: case 0x01: case 0x02: case 0x03: return True;
+ case 0x08: case 0x09: case 0x0A: case 0x0B: return True;
+ case 0x10: case 0x11: case 0x12: case 0x13: return True;
+ case 0x18: case 0x19: case 0x1A: case 0x1B: return True;
+ case 0x20: case 0x21: case 0x22: case 0x23: return True;
+ case 0x28: case 0x29: case 0x2A: case 0x2B: return True;
+ case 0x30: case 0x31: case 0x32: case 0x33: return True;
+
+ case 0x80: case 0x81: case 0x83:
+ if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6)
+ return True;
+ break;
+
+ case 0xFE: case 0xFF:
+ if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1)
+ return True;
+ break;
+
+ case 0xF6: case 0xF7:
+ if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3)
+ return True;
+ break;
+
+ case 0x86: case 0x87:
+ return True;
+
+ case 0x0F: {
+ switch (opc[1]) {
+ case 0xBB: case 0xB3: case 0xAB:
+ return True;
+ case 0xBA:
+ if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7)
+ return True;
+ break;
+ case 0xB0: case 0xB1:
+ return True;
+ case 0xC7:
+ if (gregLO3ofRM(opc[2]) == 1)
+ return True;
+ break;
+ case 0xC0: case 0xC1:
+ return True;
+ default:
+ break;
+ } /* switch (opc[1]) */
+ break;
+ }
+
+ default:
+ break;
+ } /* switch (opc[0]) */
+
+ return False;
+}
+
+
/*------------------------------------------------------------*/
/*--- Disassemble a single instruction ---*/
/*------------------------------------------------------------*/
@@ -8341,6 +8431,9 @@
/* pfx holds the summary of prefixes. */
Prefix pfx = PFX_EMPTY;
+ /* do we need follow the insn with MBusEvent(BusUnlock) ? */
+ Bool unlock_bus_after_insn = False;
+
/* Set result defaults. */
dres.whatNext = Dis_Continue;
dres.len = 0;
@@ -8477,17 +8570,40 @@
/* Kludge re LOCK prefixes. We assume here that all code generated
by Vex is going to be run in a single-threaded context, in other
words that concurrent executions of Vex-generated translations
- will not happen. That is certainly the case for how the
- Valgrind-3.0 code line uses Vex. Given that assumption, it
- seems safe to ignore LOCK prefixes since there will never be any
- other thread running at the same time as this one. However, at
- least emit a memory fence on the basis that it would at least be
- prudent to flush any memory transactions from this thread as far
- as possible down the memory hierarchy. */
+ will not happen. So we don't need to worry too much about
+ preserving atomicity. However, mark the fact that the notional
+ hardware bus lock is being acquired (and, after the insn,
+ released), so that thread checking tools know this is a locked
+ insn.
+
+ We check for, and immediately reject, (most) inappropriate uses
+ of the LOCK prefix. Later (at decode_failure: and
+ decode_success:), if we've added a BusLock event, then we will
+ follow up with a BusUnlock event. How do we know execution will
+ actually ever get to the BusUnlock event? Because
+ can_be_used_with_LOCK_prefix rejects all control-flow changing
+ instructions.
+
+ One loophole, though: if a LOCK prefix insn (seg)faults, then
+ the BusUnlock event will never be reached. This could cause
+ tools which track bus hardware lock to lose track. Really, we
+ should explicitly release the lock after every insn, but that's
+ obviously way too expensive. Really, any tool which tracks the
+ state of the bus lock needs to ask V's core/tool interface to
+ notify it of signal deliveries. On delivery of SIGSEGV to the
+ guest, the tool will be notified, in which case it should
+ release the bus hardware lock if it is held.
+
+ Note, guest-x86/toIR.c contains identical logic.
+ */
if (pfx & PFX_LOCK) {
- /* vex_printf("vex amd64->IR: ignoring LOCK prefix on: ");
- insn_verbose = True; */
- stmt( IRStmt_MFence() );
+ if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
+ stmt( IRStmt_MBE(Imbe_BusLock) );
+ unlock_bus_after_insn = True;
+ DIP("lock ");
+ } else {
+ goto decode_failure;
+ }
}
@@ -9536,7 +9652,7 @@
delta += 3;
/* Insert a memory fence. It's sometimes important that these
are carried through to the generated code. */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
DIP("sfence\n");
goto decode_success;
}
@@ -10313,7 +10429,7 @@
delta += 3;
/* Insert a memory fence. It's sometimes important that these
are carried through to the generated code. */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
DIP("%sfence\n", gregLO3ofRM(insn[2])==5 ? "l" : "m");
goto decode_success;
}
@@ -12763,7 +12879,7 @@
assign( addr, handleAddrOverrides(pfx, mkU64(d64)) );
putIRegRAX(sz, loadLE( ty, mkexpr(addr) ));
DIP("mov%c %s0x%llx, %s\n", nameISize(sz),
- sorbTxt(pfx), d64,
+ segRegTxt(pfx), d64,
nameIRegRAX(sz));
break;
@@ -12781,7 +12897,7 @@
assign( addr, handleAddrOverrides(pfx, mkU64(d64)) );
storeLE( mkexpr(addr), getIRegRAX(sz) );
DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz),
- sorbTxt(pfx), d64);
+ segRegTxt(pfx), d64);
break;
/* XXXX be careful here with moves to AH/BH/CH/DH */
@@ -14136,7 +14252,7 @@
stmt( IRStmt_Dirty(d) );
/* CPUID is a serialising insn. So, just in case someone is
using it as a memory fence ... */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
DIP("cpuid\n");
break;
}
@@ -14500,6 +14616,8 @@
insn, but nevertheless be paranoid and update it again right
now. */
stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
+ if (unlock_bus_after_insn)
+ stmt( IRStmt_MBE(Imbe_BusUnlock) );
jmp_lit(Ijk_NoDecode, guest_RIP_curr_instr);
dres.whatNext = Dis_StopHere;
dres.len = 0;
@@ -14510,6 +14628,8 @@
decode_success:
/* All decode successes end up here. */
DIP("\n");
+ if (unlock_bus_after_insn)
+ stmt( IRStmt_MBE(Imbe_BusUnlock) );
dres.len = (Int)toUInt(delta - delta_start);
return dres;
}
Modified: branches/THRCHECK/priv/guest-ppc/toIR.c
===================================================================
--- branches/THRCHECK/priv/guest-ppc/toIR.c 2007-09-09 19:38:48 UTC (rev 1788)
+++ branches/THRCHECK/priv/guest-ppc/toIR.c 2007-09-16 11:04:24 UTC (rev 1789)
@@ -4816,7 +4816,7 @@
return False;
}
DIP("isync\n");
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
break;
/* X-Form */
@@ -4829,7 +4829,7 @@
}
DIP("eieio\n");
/* Insert a memory fence, just to be on the safe side. */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
break;
case 0x014: // lwarx (Load Word and Reserve Indexed, PPC32 p458)
@@ -4918,7 +4918,7 @@
DIP("%ssync\n", flag_L == 1 ? "lw" : "");
/* Insert a memory fence. It's sometimes important that these
are carried through to the generated code. */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
break;
/* 64bit Memsync */
@@ -5662,7 +5662,7 @@
putGST( PPC_GST_TILEN, mkSzImm(ty, lineszB) );
/* be paranoid ... */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
irsb->jumpkind = Ijk_TInval;
irsb->next = mkSzImm(ty, nextInsnAddr());
Modified: branches/THRCHECK/priv/guest-x86/toIR.c
===================================================================
--- branches/THRCHECK/priv/guest-x86/toIR.c 2007-09-09 19:38:48 UTC (rev 1788)
+++ branches/THRCHECK/priv/guest-x86/toIR.c 2007-09-16 11:04:24 UTC (rev 1789)
@@ -7136,6 +7136,96 @@
}
+/* Helper for deciding whether a given insn (starting at the opcode
+ byte) may validly be used with a LOCK prefix. The following insns
+ may be used with LOCK when their destination operand is in memory.
+ Note, this is slightly too permissive. Oh well. Note also, AFAICS
+ this is exactly the same for both 32-bit and 64-bit mode.
+
+ ADD 80 /0, 81 /0, 83 /0, 00, 01, 02, 03
+ OR 80 /1, 81 /1, 83 /1, 08, 09, 0A, 0B
+ ADC 80 /2, 81 /2, 83 /2, 10, 11, 12, 13
+ SBB 81 /3, 81 /3, 83 /3, 18, 19, 1A, 1B
+ AND 80 /4, 81 /4, 83 /4, 20, 21, 22, 23
+ SUB 80 /5, 81 /5, 83 /5, 28, 29, 2A, 2B
+ XOR 80 /6, 81 /6, 83 /6, 30, 31, 32, 33
+
+ DEC FE /1, FF /1
+ INC FE /0, FF /0
+
+ NEG F6 /3, F7 /3
+ NOT F6 /2, F7 /2
+
+ XCHG 86, 87
+
+ BTC 0F BB, 0F BA /7
+ BTR 0F B3, 0F BA /6
+ BTS 0F AB, 0F BA /5
+
+ CMPXCHG 0F B0, 0F B1
+ CMPXCHG8B 0F C7 /1
+
+ XADD 0F C0, 0F C1
+*/
+static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
+{
+ switch (opc[0]) {
+ case 0x00: case 0x01: case 0x02: case 0x03: return True;
+ case 0x08: case 0x09: case 0x0A: case 0x0B: return True;
+ case 0x10: case 0x11: case 0x12: case 0x13: return True;
+ case 0x18: case 0x19: case 0x1A: case 0x1B: return True;
+ case 0x20: case 0x21: case 0x22: case 0x23: return True;
+ case 0x28: case 0x29: case 0x2A: case 0x2B: return True;
+ case 0x30: case 0x31: case 0x32: case 0x33: return True;
+
+ case 0x80: case 0x81: case 0x83:
+ if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6)
+ return True;
+ break;
+
+ case 0xFE: case 0xFF:
+ if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1)
+ return True;
+ break;
+
+ case 0xF6: case 0xF7:
+ if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3)
+ return True;
+ break;
+
+ case 0x86: case 0x87:
+ return True;
+
+ case 0x0F: {
+ switch (opc[1]) {
+ case 0xBB: case 0xB3: case 0xAB:
+ return True;
+ case 0xBA:
+ if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7)
+ return True;
+ break;
+ case 0xB0: case 0xB1:
+ return True;
+ case 0xC7:
+ if (gregOfRM(opc[2]) == 1)
+ return True;
+ break;
+ case 0xC0: case 0xC1:
+ return True;
+ default:
+ break;
+ } /* switch (opc[1]) */
+ break;
+ }
+
+ default:
+ break;
+ } /* switch (opc[0]) */
+
+ return False;
+}
+
+
/*------------------------------------------------------------*/
/*--- Disassemble a single instruction ---*/
/*------------------------------------------------------------*/
@@ -7155,10 +7245,10 @@
IRType ty;
IRTemp addr, t0, t1, t2, t3, t4, t5, t6;
Int alen;
- UChar opc, modrm, abyte;
+ UChar opc, modrm, abyte, pre;
UInt d32;
HChar dis_buf[50];
- Int am_sz, d_sz;
+ Int am_sz, d_sz, n_prefixes;
DisResult dres;
UChar* insn; /* used in SSE decoders */
@@ -7178,6 +7268,12 @@
indicating the prefix. */
UChar sorb = 0;
+ /* Gets set to True if a LOCK prefix is seen. */
+ Bool pfx_lock = False;
+
+ /* do we need follow the insn with MBusEvent(BusUnlock) ? */
+ Bool unlock_bus_after_insn = False;
+
/* Set result defaults. */
dres.whatNext = Dis_Continue;
dres.len = 0;
@@ -7242,103 +7338,129 @@
}
}
- /* Deal with prefixes. */
- /* Skip a LOCK prefix. */
- /* 2005 Jan 06: the following insns are observed to sometimes
- have a LOCK prefix:
- cmpxchgl %ecx,(%edx)
- cmpxchgl %edx,0x278(%ebx) etc
- xchgl %eax, (%ecx)
- xaddl %eax, (%ecx)
- We need to catch any such which appear to be being used as
- a memory barrier, for example lock addl $0,0(%esp)
- and emit an IR MFence construct.
- */
- if (getIByte(delta) == 0xF0) {
-
+ /* Handle a couple of weird-ass NOPs that have been observed in the
+ wild. */
+ {
UChar* code = (UChar*)(guest_code + delta);
-
- /* Various bits of kernel headers use the following as a memory
- barrier. Hence, first emit an MFence and then let the insn
- go through as usual. */
- /* F08344240000: lock addl $0, 0(%esp) */
- if (code[0] == 0xF0 && code[1] == 0x83 && code[2] == 0x44 &&
- code[3] == 0x24 && code[4] == 0x00 && code[5] == 0x00) {
- stmt( IRStmt_MFence() );
+ /* Sun's JVM 1.5.0 uses the following as a NOP:
+ 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */
+ if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64
+ && code[3] == 0x65 && code[4] == 0x90) {
+ DIP("%%es:%%cs:%%fs:%%gs:nop\n");
+ delta += 5;
+ goto decode_success;
}
- else
- if (0) {
- vex_printf("vex x86->IR: ignoring LOCK prefix on: ");
- /* insn_verbose = True; */
+ /* don't barf on recent binutils padding
+ 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:0x0(%eax,%eax,1) */
+ if (code[0] == 0x66
+ && code[1] == 0x2E && code[2] == 0x0F && code[3] == 0x1F
+ && code[4] == 0x84 && code[5] == 0x00 && code[6] == 0x00
+ && code[7] == 0x00 && code[8] == 0x00 && code[9] == 0x00 ) {
+ DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n");
+ delta += 10;
+ goto decode_success;
}
+ }
- /* In any case, skip the prefix. */
- delta++;
- }
+ /* Normal instruction handling starts here. */
- /* Detect operand-size overrides. It is possible for more than one
- 0x66 to appear. */
- while (getIByte(delta) == 0x66) { sz = 2; delta++; };
-
- /* segment override prefixes come after the operand-size override,
- it seems */
- switch (getIByte(delta)) {
- case 0x3E: /* %DS: */
- case 0x26: /* %ES: */
- /* Sun's JVM 1.5.0 uses the following as a NOP:
- 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */
- {
- UChar* code = (UChar*)(guest_code + delta);
- if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64
- && code[3] == 0x65 && code[4] == 0x90) {
- DIP("%%es:%%cs:%%fs:%%gs:nop\n");
- delta += 5;
- goto decode_success;
- }
- /* else fall through */
- }
- case 0x64: /* %FS: */
- case 0x65: /* %GS: */
- sorb = getIByte(delta); delta++;
- break;
- case 0x2E: /* %CS: */
- /* 2E prefix on a conditional branch instruction is a
- branch-prediction hint, which can safely be ignored. */
- {
+ /* Deal with some but not all prefixes:
+ 66(oso)
+ F0(lock)
+ 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:)
+ Not dealt with (left in place):
+ F2 F3
+ */
+ n_prefixes = 0;
+ while (True) {
+ if (n_prefixes > 7) goto decode_failure;
+ pre = getUChar(delta);
+ switch (pre) {
+ case 0x66:
+ sz = 2;
+ break;
+ case 0xF0:
+ pfx_lock = True;
+ break;
+ case 0x3E: /* %DS: */
+ case 0x26: /* %ES: */
+ case 0x64: /* %FS: */
+ case 0x65: /* %GS: */
+ if (sorb != 0)
+ goto decode_failure; /* only one seg override allowed */
+ sorb = pre;
+ break;
+ case 0x2E: { /* %CS: */
+ /* 2E prefix on a conditional branch instruction is a
+ branch-prediction hint, which can safely be ignored. */
UChar op1 = getIByte(delta+1);
UChar op2 = getIByte(delta+2);
if ((op1 >= 0x70 && op1 <= 0x7F)
|| (op1 == 0xE3)
|| (op1 == 0x0F && op2 >= 0x80 && op2 <= 0x8F)) {
if (0) vex_printf("vex x86->IR: ignoring branch hint\n");
- sorb = getIByte(delta); delta++;
- break;
+ } else {
+ /* All other CS override cases are not handled */
+ goto decode_failure;
}
+ break;
}
- /* don't barf on recent binutils padding
- 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:0x0(%eax,%eax,1)
- */
- {
- UChar* code = (UChar*)(guest_code + delta);
- if (sz == 2
- && code[-1] == 0x66
- && code[0] == 0x2E && code[1] == 0x0F && code[2] == 0x1F
- && code[3] == 0x84 && code[4] == 0x00 && code[5] == 0x00
- && code[6] == 0x00 && code[7] == 0x00 && code[8] == 0x00 ) {
- DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n");
- delta += 9;
- goto decode_success;
- }
- }
- /* All other CS override cases are not handled */
+ case 0x36: /* %SS: */
+ /* SS override cases are not handled */
+ goto decode_failure;
+ default:
+ goto not_a_prefix;
+ }
+ n_prefixes++;
+ delta++;
+ }
+
+ not_a_prefix:
+
+ /* Now we should be looking at the primary opcode byte or the
+ leading F2 or F3. Check that any LOCK prefix is actually
+ allowed. */
+
+ /* Kludge re LOCK prefixes. We assume here that all code generated
+ by Vex is going to be run in a single-threaded context, in other
+ words that concurrent executions of Vex-generated translations
+ will not happen. So we don't need to worry too much about
+ preserving atomicity. However, mark the fact that the notional
+ hardware bus lock is being acquired (and, after the insn,
+ released), so that thread checking tools know this is a locked
+ insn.
+
+ We check for, and immediately reject, (most) inappropriate uses
+ of the LOCK prefix. Later (at decode_failure: and
+ decode_success:), if we've added a BusLock event, then we will
+ follow up with a BusUnlock event. How do we know execution will
+ actually ever get to the BusUnlock event? Because
+ can_be_used_with_LOCK_prefix rejects all control-flow changing
+ instructions.
+
+ One loophole, though: if a LOCK prefix insn (seg)faults, then
+ the BusUnlock event will never be reached. This could cause
+ tools which track bus hardware lock to lose track. Really, we
+ should explicitly release the lock after every insn, but that's
+ obviously way too expensive. Really, any tool which tracks the
+ state of the bus lock needs to ask V's core/tool interface to
+ notify it of signal deliveries. On delivery of SIGSEGV to the
+ guest, the tool will be notified, in which case it should
+ release the bus hardware lock if it is held.
+
+ Note, guest-amd64/toIR.c contains identical logic.
+ */
+ if (pfx_lock) {
+ if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
+ stmt( IRStmt_MBE(Imbe_BusLock) );
+ unlock_bus_after_insn = True;
+ DIP("lock ");
+ } else {
goto decode_failure;
- case 0x36: /* %SS: */
- /* SS override cases are not handled */
- goto decode_failure;
- default:
- break;
+ }
}
+
/* ---------------------------------------------------- */
/* --- The SSE decoder. --- */
/* ---------------------------------------------------- */
@@ -8324,7 +8446,7 @@
delta += 3;
/* Insert a memory fence. It's sometimes important that these
are carried through to the generated code. */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
DIP("sfence\n");
goto decode_success;
}
@@ -9104,7 +9226,7 @@
delta += 3;
/* Insert a memory fence. It's sometimes important that these
are carried through to the generated code. */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
DIP("%sfence\n", gregOfRM(insn[2])==5 ? "l" : "m");
goto decode_success;
}
@@ -12699,7 +12821,7 @@
stmt( IRStmt_Dirty(d) );
/* CPUID is a serialising insn. So, just in case someone is
using it as a memory fence ... */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
DIP("cpuid\n");
break;
}
@@ -13086,6 +13208,8 @@
insn, but nevertheless be paranoid and update it again right
now. */
stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) );
+ if (unlock_bus_after_insn)
+ stmt( IRStmt_MBE(Imbe_BusUnlock) );
jmp_lit(Ijk_NoDecode, guest_EIP_curr_instr);
dres.whatNext = Dis_StopHere;
dres.len = 0;
@@ -13096,7 +13220,8 @@
decode_success:
/* All decode successes end up here. */
DIP("\n");
-
+ if (unlock_bus_after_insn)
+ stmt( IRStmt_MBE(Imbe_BusUnlock) );
dres.len = delta - delta_start;
return dres;
}
Modified: branches/THRCHECK/priv/host-amd64/isel.c
===================================================================
--- branches/THRCHECK/priv/host-amd64/isel.c 2007-09-09 19:38:48 UTC (rev 1788)
+++ branches/THRCHECK/priv/host-amd64/isel.c 2007-09-16 11:04:24 UTC (rev 1789)
@@ -3763,9 +3763,18 @@
}
/* --------- MEM FENCE --------- */
- case Ist_MFence:
- addInstr(env, AMD64Instr_MFence());
- return;
+ case Ist_MBE:
+ switch (stmt->Ist.MBE.event) {
+ case Imbe_Fence:
+ addInstr(env, AMD64Instr_MFence());
+ return;
+ case Imbe_BusLock:
+ case Imbe_BusUnlock:
+ return;
+ default:
+ break;
+ }
+ break;
/* --------- INSTR MARK --------- */
/* Doesn't generate any executable code ... */
Modified: branches/THRCHECK/priv/host-ppc/isel.c
===================================================================
--- branches/THRCHECK/priv/host-ppc/isel.c 2007-09-09 19:38:48 UTC (rev 1788)
+++ branches/THRCHECK/priv/host-ppc/isel.c 2007-09-16 11:04:24 UTC (rev 1789)
@@ -3866,9 +3866,18 @@
}
/* --------- MEM FENCE --------- */
- case Ist_MFence:
- addInstr(env, PPCInstr_MFence());
- return;
+ case Ist_MBE:
+ switch (stmt->Ist.MBE.event) {
+ case Imbe_Fence:
+ addInstr(env, PPCInstr_MFence());
+ return;
+ case Imbe_BusLock:
+ case Imbe_BusUnlock:
+ return;
+ default:
+ break;
+ }
+ break;
/* --------- INSTR MARK --------- */
/* Doesn't generate any executable code ... */
Modified: branches/THRCHECK/priv/host-x86/isel.c
===================================================================
--- branches/THRCHECK/priv/host-x86/isel.c 2007-09-09 19:38:48 UTC (rev 1788)
+++ branches/THRCHECK/priv/host-x86/isel.c 2007-09-16 11:04:24 UTC (rev 1789)
@@ -3802,9 +3802,18 @@
}
/* --------- MEM FENCE --------- */
- case Ist_MFence:
- addInstr(env, X86Instr_MFence(env->hwcaps));
- return;
+ case Ist_MBE:
+ switch (stmt->Ist.MBE.event) {
+ case Imbe_Fence:
+ addInstr(env, X86Instr_MFence(env->hwcaps));
+ return;
+ case Imbe_BusLock:
+ case Imbe_BusUnlock:
+ return;
+ default:
+ break;
+ }
+ break;
/* --------- INSTR MARK --------- */
/* Doesn't generate any executable code ... */
Modified: branches/THRCHECK/priv/ir/irdefs.c
===================================================================
--- branches/THRCHECK/priv/ir/irdefs.c 2007-09-09 19:38:48 UTC (rev 1788)
+++ branches/THRCHECK/priv/ir/irdefs.c 2007-09-16 11:04:24 UTC (rev 1789)
@@ -736,6 +736,16 @@
}
}
+void ppIRMBusEvent ( IRMBusEvent event )
+{
+ switch (event) {
+ case Imbe_Fence: vex_printf("Fence"); break;
+ case Imbe_BusLock: vex_printf("BusLock"); break;
+ case Imbe_BusUnlock: vex_printf("BusUnlock"); break;
+ default: vpanic("ppIRMBusEvent");
+ }
+}
+
void ppIRStmt ( IRStmt* s )
{
if (!s) {
@@ -781,8 +791,9 @@
case Ist_Dirty:
ppIRDirty(s->Ist.Dirty.details);
break;
- case Ist_MFence:
- vex_printf("IR-MFence");
+ case Ist_MBE:
+ vex_printf("IR-");
+ ppIRMBusEvent(s->Ist.MBE.event);
break;
case Ist_Exit:
vex_printf( "if (" );
@@ -1186,12 +1197,12 @@
s->Ist.Dirty.details = d;
return s;
}
-IRStmt* IRStmt_MFence ( void )
+IRStmt* IRStmt_MBE ( IRMBusEvent event )
{
- /* Just use a single static closure. */
- static IRStmt static_closure;
- static_closure.tag = Ist_MFence;
- return &static_closure;
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_MBE;
+ s->Ist.MBE.event = event;
+ return s;
}
IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst ) {
IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
@@ -1387,8 +1398,8 @@
deepCopyIRExpr(s->Ist.Store.data));
case Ist_Dirty:
return IRStmt_Dirty(deepCopyIRDirty(s->Ist.Dirty.details));
- case Ist_MFence:
- return IRStmt_MFence();
+ case Ist_MBE:
+ return IRStmt_MBE(s->Ist.MBE.event);
case Ist_Exit:
return IRStmt_Exit(deepCopyIRExpr(s->Ist.Exit.guard),
s->Ist.Exit.jk,
@@ -2021,7 +2032,7 @@
return True;
case Ist_NoOp:
case Ist_IMark:
- case Ist_MFence:
+ case Ist_MBE:
return True;
case Ist_Exit:
return isIRAtom(st->Ist.Exit.guard);
@@ -2196,7 +2207,7 @@
useBeforeDef_Expr(bb,stmt,d->mAddr,def_counts);
break;
case Ist_NoOp:
- case Ist_MFence:
+ case Ist_MBE:
break;
case Ist_Exit:
useBeforeDef_Expr(bb,stmt,stmt->Ist.Exit.guard,def_counts);
@@ -2500,8 +2511,15 @@
bad_dirty:
sanityCheckFail(bb,stmt,"IRStmt.Dirty: ill-formed");
case Ist_NoOp:
- case Ist_MFence:
break;
+ case Ist_MBE:
+ switch (stmt->Ist.MBE.event) {
+ case Imbe_Fence: case Imbe_BusLock: case Imbe_BusUnlock:
+ break;
+ default: sanityCheckFail(bb,stmt,"IRStmt.MBE.event: unknown");
+ break;
+ }
+ break;
case Ist_Exit:
tcExpr( bb, stmt, stmt->Ist.Exit.guard, gWordTy );
if (typeOfIRExpr(tyenv,stmt->Ist.Exit.guard) != Ity_I1)
Modified: branches/THRCHECK/priv/ir/iropt.c
===================================================================
--- branches/THRCHECK/priv/ir/iropt.c 2007-09-09 19:38:48 UTC (rev 1788)
+++ branches/THRCHECK/priv/ir/iropt.c 2007-09-16 11:04:24 UTC (rev 1789)
@@ -442,7 +442,7 @@
addStmtToIRSB(bb, IRStmt_Dirty(d2));
break;
case Ist_NoOp:
- case Ist_MFence:
+ case Ist_MBE:
case Ist_IMark:
addStmtToIRSB(bb, st);
break;
@@ -708,11 +708,12 @@
crude solution is just to flush everything; we could easily
enough do a lot better if needed. */
/* Probably also overly-conservative, but also dump everything
- if we hit a memory fence. Ditto AbiHints.*/
+ if we hit a memory bus event (fence, lock, unlock). Ditto
+ AbiHints.*/
case Ist_AbiHint:
vassert(isIRAtom(st->Ist.AbiHint.base));
/* fall through */
- case Ist_MFence:
+ case Ist_MBE:
case Ist_Dirty:
for (j = 0; j < env->used; j++)
env->inuse[j] = False;
@@ -1760,8 +1761,8 @@
case Ist_NoOp:
return IRStmt_NoOp();
- case Ist_MFence:
- return IRStmt_MFence();
+ case Ist_MBE:
+ return IRStmt_MBE(st->Ist.MBE.event);
case Ist_Exit: {
IRExpr* fcond;
@@ -1967,7 +1968,7 @@
return;
case Ist_NoOp:
case Ist_IMark:
- case Ist_MFence:
+ case Ist_MBE:
return;
case Ist_Exit:
addUses_Expr(set, st->Ist.Exit.guard);
@@ -2535,7 +2536,7 @@
/* ------ BEGIN invalidate aenv bindings ------ */
/* This is critical: remove from aenv any E' -> .. bindings
which might be invalidated by this statement. The only
- vulnerable kind of bindings are the GetIt kind.
+ vulnerable kind of bindings are the GetI kind.
Dirty call - dump (paranoia level -> 2)
Store - dump (ditto)
Put, PutI - dump unless no-overlap is proven (.. -> 1)
@@ -2543,12 +2544,12 @@
to do the no-overlap assessments needed for Put/PutI.
*/
switch (st->tag) {
- case Ist_Dirty: case Ist_Store:
+ case Ist_Dirty: case Ist_Store: case Ist_MBE:
paranoia = 2; break;
case Ist_Put: case Ist_PutI:
paranoia = 1; break;
case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
- case Ist_WrTmp: case Ist_MFence: case Ist_Exit:
+ case Ist_WrTmp: case Ist_Exit:
paranoia = 0; break;
default:
vpanic("do_cse_BB(1)");
@@ -2963,7 +2964,7 @@
case Ist_IMark:
return False;
- case Ist_MFence:
+ case Ist_MBE:
case Ist_AbiHint:
/* just be paranoid ... these should be rare. */
return True;
@@ -3206,7 +3207,7 @@
switch (st->tag) {
case Ist_NoOp:
case Ist_IMark:
- case Ist_MFence:
+ case Ist_MBE:
break;
case Ist_AbiHint:
deltaIRExpr(st->Ist.AbiHint.base, delta);
@@ -3691,7 +3692,7 @@
return;
case Ist_NoOp:
case Ist_IMark:
- case Ist_MFence:
+ case Ist_MBE:
return;
case Ist_Exit:
aoccCount_Expr(uses, st->Ist.Exit.guard);
@@ -3933,8 +3934,8 @@
return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
case Ist_NoOp:
return IRStmt_NoOp();
- case Ist_MFence:
- return IRStmt_MFence();
+ case Ist_MBE:
+ return IRStmt_MBE(st->Ist.MBE.event);
case Ist_Dirty:
d = st->Ist.Dirty.details;
d2 = emptyIRDirty();
@@ -4093,11 +4094,11 @@
question is marked as requiring precise
exceptions. */
|| (env[k].doesLoad && stmtPuts)
- /* probably overly conservative: a memory fence
+ /* probably overly conservative: a memory bus event
invalidates absolutely everything, so that all
computation prior to it is forced to complete before
- proceeding with the fence. */
- || st->tag == Ist_MFence
+ proceeding with the event (fence,lock,unlock). */
+ || st->tag == Ist_MBE
/* also be (probably overly) paranoid re AbiHints */
|| st->tag == Ist_AbiHint
);
@@ -4265,7 +4266,7 @@
break;
case Ist_NoOp:
case Ist_IMark:
- case Ist_MFence:
+ case Ist_MBE:
break;
case Ist_Exit:
vassert(isIRAtom(st->Ist.Exit.guard));
Modified: branches/THRCHECK/pub/libvex_ir.h
===================================================================
--- branches/THRCHECK/pub/libvex_ir.h 2007-09-09 19:38:48 UTC (rev 1788)
+++ branches/THRCHECK/pub/libvex_ir.h 2007-09-16 11:04:24 UTC (rev 1789)
@@ -229,8 +229,8 @@
float, or a vector (SIMD) value. */
typedef
enum {
- Ity_INVALID=0x10FFF,
- Ity_I1=0x11000,
+ Ity_INVALID=0x11000,
+ Ity_I1,
Ity_I8,
Ity_I16,
Ity_I32,
@@ -254,8 +254,8 @@
/* IREndness is used in load IRExprs and store IRStmts. */
typedef
enum {
- Iend_LE=22, /* little endian */
- Iend_BE=33 /* big endian */
+ Iend_LE=0x12000, /* little endian */
+ Iend_BE /* big endian */
}
IREndness;
@@ -267,7 +267,7 @@
/* The various kinds of constant. */
typedef
enum {
- Ico_U1=0x12000,
+ Ico_U1=0x13000,
Ico_U8,
Ico_U16,
Ico_U32,
@@ -406,7 +406,7 @@
/* -- Do not change this ordering. The IR generators rely on
(eg) Iop_Add64 == IopAdd8 + 3. -- */
- Iop_INVALID=0x13000,
+ Iop_INVALID=0x14000,
Iop_Add8, Iop_Add16, Iop_Add32, Iop_Add64,
Iop_Sub8, Iop_Sub16, Iop_Sub32, Iop_Sub64,
/* Signless mul. MullS/MullU is elsewhere. */
@@ -884,7 +884,7 @@
in the comments for IRExpr. */
typedef
enum {
- Iex_Binder,
+ Iex_Binder=0x15000,
Iex_Get,
Iex_GetI,
Iex_RdTmp,
@@ -1181,7 +1181,7 @@
*/
typedef
enum {
- Ijk_Boring=0x14000, /* not interesting; just goto next */
+ Ijk_Boring=0x16000, /* not interesting; just goto next */
Ijk_Call, /* guest is doing a call */
Ijk_Ret, /* guest is doing a return */
Ijk_ClientReq, /* do guest client req before continuing */
@@ -1254,7 +1254,7 @@
/* Effects on resources (eg. registers, memory locations) */
typedef
enum {
- Ifx_None = 0x15000, /* no effect */
+ Ifx_None = 0x17000, /* no effect */
Ifx_Read, /* reads the resource */
Ifx_Write, /* writes the resource */
Ifx_Modify, /* modifies the resource */
@@ -1316,6 +1316,19 @@
IRExpr** args );
+/* --------------- Memory Bus Events --------------- */
+
+typedef
+ enum {
+ Imbe_Fence=0x18000,
+ Imbe_BusLock,
+ Imbe_BusUnlock
+ }
+ IRMBusEvent;
+
+extern void ppIRMBusEvent ( IRMBusEvent );
+
+
/* ------------------ Statements ------------------ */
/* The different kinds of statements. Their meaning is explained
@@ -1327,9 +1340,10 @@
they are required by some IR consumers such as tools that
instrument the code.
*/
+
typedef
enum {
- Ist_NoOp,
+ Ist_NoOp=0x19000,
Ist_IMark, /* META */
Ist_AbiHint, /* META */
Ist_Put,
@@ -1337,7 +1351,7 @@
Ist_WrTmp,
Ist_Store,
Ist_Dirty,
- Ist_MFence,
+ Ist_MBE, /* META (maybe) */
Ist_Exit
}
IRStmtTag;
@@ -1452,11 +1466,15 @@
IRDirty* details;
} Dirty;
- /* A memory fence.
- ppIRExpr output: IR-MFence
+ /* A memory bus event - a fence, or acquisition/release of the
+ hardware bus lock. IR optimisation treats all these as fences
+ across which no memory references may be moved.
+ ppIRExpr output: MBusEvent-Fence,
+ MBusEvent-BusLock, MBusEvent-BusUnlock.
*/
struct {
- } MFence;
+ IRMBusEvent event;
+ } MBE;
/* Conditional exit from the middle of an IRSB.
ppIRExpr output: if (<guard>) goto {<jk>} <dst>
@@ -1481,7 +1499,7 @@
extern IRStmt* IRStmt_WrTmp ( IRTemp tmp, IRExpr* data );
extern IRStmt* IRStmt_Store ( IREndness end, IRExpr* addr, IRExpr* data );
extern IRStmt* IRStmt_Dirty ( IRDirty* details );
-extern IRStmt* IRStmt_MFence ( void );
+extern IRStmt* IRStmt_MBE ( IRMBusEvent event );
extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst );
/* Deep-copy an IRStmt. */
|