|
From: <sv...@va...> - 2010-07-29 15:39:13
|
Author: sewardj
Date: 2010-07-29 16:39:05 +0100 (Thu, 29 Jul 2010)
New Revision: 1995
Log:
Only decode LZCNT if the host supports it, since otherwise we risk
confusing it with BSR. Followup to #212335.
Modified:
trunk/priv/guest_amd64_toIR.c
trunk/priv/guest_x86_toIR.c
trunk/priv/host_amd64_isel.c
trunk/priv/host_x86_defs.c
trunk/priv/host_x86_isel.c
trunk/priv/main_main.c
trunk/pub/libvex.h
Modified: trunk/priv/guest_amd64_toIR.c
===================================================================
--- trunk/priv/guest_amd64_toIR.c 2010-07-29 11:34:38 UTC (rev 1994)
+++ trunk/priv/guest_amd64_toIR.c 2010-07-29 15:39:05 UTC (rev 1995)
@@ -15099,14 +15099,13 @@
goto decode_success;
}
- /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, but
- fortunately occupying opcode space which AFAICS is not occupied
- by anything else, even in Intel land. NB: 0F BD is BSR, but
- that's decoded below here, and we reject it if there's an F3
- prefix. Hence there is no possibility of confusion with this
- one. */
+ /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
+ which we can only decode if we're sure this is an AMD cpu that
+ supports LZCNT, since otherwise it's BSR, which behaves
+ differently. */
if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
- && insn[0] == 0x0F && insn[1] == 0xBD) {
+ && insn[0] == 0x0F && insn[1] == 0xBD
+ && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) {
vassert(sz == 2 || sz == 4 || sz == 8);
/*IRType*/ ty = szToITy(sz);
IRTemp src = newTemp(ty);
Modified: trunk/priv/guest_x86_toIR.c
===================================================================
--- trunk/priv/guest_x86_toIR.c 2010-07-29 11:34:38 UTC (rev 1994)
+++ trunk/priv/guest_x86_toIR.c 2010-07-29 15:39:05 UTC (rev 1995)
@@ -12613,13 +12613,12 @@
goto decode_success;
}
- /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, but
- fortunately occupying opcode space which AFAICS is not occupied
- by anything else, even in Intel land. NB: 0F BD is BSR, but
- that's decoded below here, and it won't match there's an F3
- prefix. Hence there is no possibility of confusion with this
- one. */
- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD) {
+ /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
+ which we can only decode if we're sure this is an AMD cpu that
+ supports LZCNT, since otherwise it's BSR, which behaves
+ differently. */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD
+ && 0 != (archinfo->hwcaps & VEX_HWCAPS_X86_LZCNT)) {
vassert(sz == 2 || sz == 4);
/*IRType*/ ty = szToITy(sz);
IRTemp src = newTemp(ty);
Modified: trunk/priv/host_amd64_isel.c
===================================================================
--- trunk/priv/host_amd64_isel.c 2010-07-29 11:34:38 UTC (rev 1994)
+++ trunk/priv/host_amd64_isel.c 2010-07-29 15:39:05 UTC (rev 1995)
@@ -4122,8 +4122,10 @@
/* sanity ... */
vassert(arch_host == VexArchAMD64);
- vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_AMD64_SSE3
- |VEX_HWCAPS_AMD64_CX16)));
+ vassert(0 == (hwcaps_host
+ & ~(VEX_HWCAPS_AMD64_SSE3
+ | VEX_HWCAPS_AMD64_CX16
+ | VEX_HWCAPS_AMD64_LZCNT)));
/* Make up an initial environment to use. */
env = LibVEX_Alloc(sizeof(ISelEnv));
Modified: trunk/priv/host_x86_defs.c
===================================================================
--- trunk/priv/host_x86_defs.c 2010-07-29 11:34:38 UTC (rev 1994)
+++ trunk/priv/host_x86_defs.c 2010-07-29 15:39:05 UTC (rev 1995)
@@ -703,8 +703,10 @@
X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
i->tag = Xin_MFence;
i->Xin.MFence.hwcaps = hwcaps;
- vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1|VEX_HWCAPS_X86_SSE2
- |VEX_HWCAPS_X86_SSE3)));
+ vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1
+ |VEX_HWCAPS_X86_SSE2
+ |VEX_HWCAPS_X86_SSE3
+ |VEX_HWCAPS_X86_LZCNT)));
return i;
}
X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) {
Modified: trunk/priv/host_x86_isel.c
===================================================================
--- trunk/priv/host_x86_isel.c 2010-07-29 11:34:38 UTC (rev 1994)
+++ trunk/priv/host_x86_isel.c 2010-07-29 15:39:05 UTC (rev 1995)
@@ -4000,9 +4000,11 @@
/* sanity ... */
vassert(arch_host == VexArchX86);
- vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_X86_SSE1
- |VEX_HWCAPS_X86_SSE2
- |VEX_HWCAPS_X86_SSE3)));
+ vassert(0 == (hwcaps_host
+ & ~(VEX_HWCAPS_X86_SSE1
+ | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_SSE3
+ | VEX_HWCAPS_X86_LZCNT)));
/* Make up an initial environment to use. */
env = LibVEX_Alloc(sizeof(ISelEnv));
Modified: trunk/priv/main_main.c
===================================================================
--- trunk/priv/main_main.c 2010-07-29 11:34:38 UTC (rev 1994)
+++ trunk/priv/main_main.c 2010-07-29 15:39:05 UTC (rev 1995)
@@ -754,32 +754,53 @@
static HChar* show_hwcaps_x86 ( UInt hwcaps )
{
/* Monotonic, SSE3 > SSE2 > SSE1 > baseline. */
- if (hwcaps == 0)
- return "x86-sse0";
- if (hwcaps == VEX_HWCAPS_X86_SSE1)
- return "x86-sse1";
- if (hwcaps == (VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2))
- return "x86-sse1-sse2";
- if (hwcaps == (VEX_HWCAPS_X86_SSE1
- | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3))
- return "x86-sse1-sse2-sse3";
-
- return NULL;
+ switch (hwcaps) {
+ case 0:
+ return "x86-sse0";
+ case VEX_HWCAPS_X86_SSE1:
+ return "x86-sse1";
+ case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2:
+ return "x86-sse1-sse2";
+ case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_LZCNT:
+ return "x86-sse1-sse2-lzcnt";
+ case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_SSE3:
+ return "x86-sse1-sse2-sse3";
+ case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_SSE3 | VEX_HWCAPS_X86_LZCNT:
+ return "x86-sse1-sse2-sse3-lzcnt";
+ default:
+ return NULL;
+ }
}
static HChar* show_hwcaps_amd64 ( UInt hwcaps )
{
/* SSE3 and CX16 are orthogonal and > baseline, although we really
don't expect to come across anything which can do SSE3 but can't
- do CX16. Still, we can handle that case. */
- const UInt SSE3 = VEX_HWCAPS_AMD64_SSE3;
- const UInt CX16 = VEX_HWCAPS_AMD64_CX16;
- UInt c = hwcaps;
- if (c == 0) return "amd64-sse2";
- if (c == SSE3) return "amd64-sse3";
- if (c == CX16) return "amd64-sse2-cx16";
- if (c == (SSE3|CX16)) return "amd64-sse3-cx16";
- return NULL;
+ do CX16. Still, we can handle that case. LZCNT is similarly
+ orthogonal. */
+ switch (hwcaps) {
+ case 0:
+ return "amd64-sse2";
+ case VEX_HWCAPS_AMD64_SSE3:
+ return "amd64-sse3";
+ case VEX_HWCAPS_AMD64_CX16:
+ return "amd64-sse2-cx16";
+ case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16:
+ return "amd64-sse3-cx16";
+ case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_LZCNT:
+ return "amd64-sse3-lzcnt";
+ case VEX_HWCAPS_AMD64_CX16 | VEX_HWCAPS_AMD64_LZCNT:
+ return "amd64-sse2-cx16-lzcnt";
+ case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16
+ | VEX_HWCAPS_AMD64_LZCNT:
+ return "amd64-sse3-cx16-lzcnt";
+
+ default:
+ return NULL;
+ }
}
static HChar* show_hwcaps_ppc32 ( UInt hwcaps )
Modified: trunk/pub/libvex.h
===================================================================
--- trunk/pub/libvex.h 2010-07-29 11:34:38 UTC (rev 1994)
+++ trunk/pub/libvex.h 2010-07-29 15:39:05 UTC (rev 1995)
@@ -69,27 +69,29 @@
/* x86: baseline capability is Pentium-1 (FPU, MMX, but no SSE), with
cmpxchg8b. */
-#define VEX_HWCAPS_X86_SSE1 (1<<1) /* SSE1 support (Pentium III) */
-#define VEX_HWCAPS_X86_SSE2 (1<<2) /* SSE2 support (Pentium 4) */
-#define VEX_HWCAPS_X86_SSE3 (1<<3) /* SSE3 support (>= Prescott) */
+#define VEX_HWCAPS_X86_SSE1 (1<<1) /* SSE1 support (Pentium III) */
+#define VEX_HWCAPS_X86_SSE2 (1<<2) /* SSE2 support (Pentium 4) */
+#define VEX_HWCAPS_X86_SSE3 (1<<3) /* SSE3 support (>= Prescott) */
+#define VEX_HWCAPS_X86_LZCNT (1<<4) /* SSE4a LZCNT insn */
/* amd64: baseline capability is SSE2, with cmpxchg8b but not
cmpxchg16b. */
-#define VEX_HWCAPS_AMD64_SSE3 (1<<4) /* SSE3 support */
-#define VEX_HWCAPS_AMD64_CX16 (1<<5) /* cmpxchg16b support */
+#define VEX_HWCAPS_AMD64_SSE3 (1<<5) /* SSE3 support */
+#define VEX_HWCAPS_AMD64_CX16 (1<<6) /* cmpxchg16b support */
+#define VEX_HWCAPS_AMD64_LZCNT (1<<7) /* SSE4a LZCNT insn */
/* ppc32: baseline capability is integer only */
-#define VEX_HWCAPS_PPC32_F (1<<6) /* basic (non-optional) FP */
-#define VEX_HWCAPS_PPC32_V (1<<7) /* Altivec (VMX) */
-#define VEX_HWCAPS_PPC32_FX (1<<8) /* FP extns (fsqrt, fsqrts) */
-#define VEX_HWCAPS_PPC32_GX (1<<9) /* Graphics extns
- (fres,frsqrte,fsel,stfiwx) */
+#define VEX_HWCAPS_PPC32_F (1<<8) /* basic (non-optional) FP */
+#define VEX_HWCAPS_PPC32_V (1<<9) /* Altivec (VMX) */
+#define VEX_HWCAPS_PPC32_FX (1<<10) /* FP extns (fsqrt, fsqrts) */
+#define VEX_HWCAPS_PPC32_GX (1<<11) /* Graphics extns
+ (fres,frsqrte,fsel,stfiwx) */
/* ppc64: baseline capability is integer and basic FP insns */
-#define VEX_HWCAPS_PPC64_V (1<<10) /* Altivec (VMX) */
-#define VEX_HWCAPS_PPC64_FX (1<<11) /* FP extns (fsqrt, fsqrts) */
-#define VEX_HWCAPS_PPC64_GX (1<<12) /* Graphics extns
- (fres,frsqrte,fsel,stfiwx) */
+#define VEX_HWCAPS_PPC64_V (1<<12) /* Altivec (VMX) */
+#define VEX_HWCAPS_PPC64_FX (1<<13) /* FP extns (fsqrt, fsqrts) */
+#define VEX_HWCAPS_PPC64_GX (1<<14) /* Graphics extns
+ (fres,frsqrte,fsel,stfiwx) */
/* arm: baseline capability is ARMv4 */
/* No extra capabilities */
|