From: <man...@us...> - 2015-06-27 14:42:01
|
Revision: 5369 http://sourceforge.net/p/modplug/code/5369 Author: manxorist Date: 2015-06-27 14:41:53 +0000 (Sat, 27 Jun 2015) Log Message: ----------- [Fix] Be more defensive with CPUID detection code. In particular, only query AMD extended feature flags (cpuid(eax=0x80000000)) if the function is actually documented for a CPU that old. It's not known whether this is actually required by real silicon; the docs are ambigouos and contradictional. Anyway, the more defensive check surely will not hurt any known CPU. (affected CPUs are AMD K5 and AMD K6-1) (not tested because I have no hardware available) [Fix] In the last round of CPUID cleanups, when we introduced AMD vendor string checking, 3DNow! detection on other vendor's CPUs was lost. Reintroduce it for all those that we know about and also only query the function on CPUs new enough to have it actually documented (supposedly fixes 3DNow! detection on IDT WinChip 2, Cyrix MediaGXm, VIA Cyrix III, VIA C3 and National Semiconductor Geode GX2) (not tested because I have no hardware available) [Fix] The extended feature flags are actually different from vendor to vendor, in particular the ones that AMD uses for MMXEXT and 3DNOWEXT have different meanings on other vendors. We should only ever check the one single bit for 3DNOW, and only on the other vendors CPUs that support it. (not tested because I have no hardware available) [Ref] Add TSC, CMOV and SSE4 detection while at it. Modified Paths: -------------- trunk/OpenMPT/common/BuildSettings.h trunk/OpenMPT/common/mptCPU.cpp trunk/OpenMPT/common/mptCPU.h trunk/OpenMPT/mptrack/AboutDialog.cpp Modified: trunk/OpenMPT/common/BuildSettings.h =================================================================== --- trunk/OpenMPT/common/BuildSettings.h 2015-06-27 14:14:12 UTC (rev 5368) +++ trunk/OpenMPT/common/BuildSettings.h 2015-06-27 14:41:53 UTC (rev 5369) @@ -50,6 +50,9 @@ // Generate inline assembly using SSE3 instructions (only used when the CPU supports it). #define ENABLE_SSE3 +// Generate inline assembly using SSE4 instructions (only used when the CPU supports it). +#define ENABLE_SSE4 + // Generate inline assembly using AMD specific instruction set extensions (only used when the CPU supports it). #define ENABLE_X86_AMD @@ -64,6 +67,9 @@ // Generate inline assembly using SSE3 instructions (only used when the CPU supports it). #define ENABLE_SSE3 +// Generate inline assembly using SSE4 instructions (only used when the CPU supports it). +#define ENABLE_SSE4 + #endif #endif // ENABLE_ASM Modified: trunk/OpenMPT/common/mptCPU.cpp =================================================================== --- trunk/OpenMPT/common/mptCPU.cpp 2015-06-27 14:14:12 UTC (rev 5368) +++ trunk/OpenMPT/common/mptCPU.cpp 2015-06-27 14:41:53 UTC (rev 5369) @@ -19,6 +19,10 @@ uint32 ProcSupport = 0; +char ProcVendorID[16+1] = ""; +uint16 ProcFamily = 0; +uint8 ProcModel = 0; +uint8 ProcStepping = 0; #if MPT_COMPILER_MSVC && (defined(ENABLE_X86) || defined(ENABLE_X64)) @@ -69,46 +73,197 @@ } -static bool has_cpuid() -//--------------------- +static cpuid_result cpuidex(uint32 function_a, uint32 function_c) +//--------------------------------------------------------------- { - const size_t eflags_cpuid = 1 << 21; - size_t old_eflags = __readeflags(); - __writeeflags(old_eflags ^ eflags_cpuid); - bool result = ((__readeflags() ^ old_eflags) & eflags_cpuid) != 0; - __writeeflags(old_eflags); + cpuid_result result; + #if MPT_MSVC_AT_LEAST(2010,0) + int CPUInfo[4]; + __cpuidex(CPUInfo, function_a, function_c); + result.a = CPUInfo[0]; + result.b = CPUInfo[1]; + result.c = CPUInfo[2]; + result.d = CPUInfo[3]; + #else + // just do not test modern cpuid features with older compiler + result.a = 0; + result.b = 0; + result.c = 0; + result.d = 0; + #endif return result; } +static MPT_NOINLINE bool has_cpuid() +//---------------------------------- +{ + const size_t eflags_cpuid = 1 << 21; + size_t eflags_old = __readeflags(); + size_t eflags_flipped = eflags_old ^ eflags_cpuid; + __writeeflags(eflags_flipped); + size_t eflags_testchanged = __readeflags(); + __writeeflags(eflags_old); + return ((eflags_testchanged ^ eflags_old) & eflags_cpuid) != 0; +} + + void InitProcSupport() //-------------------- { ProcSupport = 0; + MemsetZero(ProcVendorID); + ProcFamily = 0; + ProcModel = 0; + ProcStepping = 0; if(has_cpuid()) { + + ProcSupport |= PROCSUPPORT_CPUID; + cpuid_result VendorString = cpuid(0x00000000u); + std::strcpy(ProcVendorID, VendorString.as_string().c_str()); - cpuid_result StandardFeatureFlags = cpuid(0x00000001u); - if(StandardFeatureFlags.d & (1<<23)) ProcSupport |= PROCSUPPORT_MMX; - if(StandardFeatureFlags.d & (1<<25)) ProcSupport |= PROCSUPPORT_SSE; - if(StandardFeatureFlags.d & (1<<26)) ProcSupport |= PROCSUPPORT_SSE2; - if(StandardFeatureFlags.c & (1<< 0)) ProcSupport |= PROCSUPPORT_SSE3; - - if(VendorString.as_string() == "AuthenticAMD") + // Cyrix 6x86 and 6x86MX do not specify the value returned in eax. + // They both support 0x00000001u however. + if((VendorString.as_string() == "CyrixInstead") || (VendorString.a >= 0x00000001u)) { - cpuid_result ExtendedVendorString = cpuid(0x80000000u); - if(ExtendedVendorString.a >= 0x80000001u) + cpuid_result StandardFeatureFlags = cpuid(0x00000001u); + uint32 Stepping = (StandardFeatureFlags.a >> 0) & 0x0f; + uint32 BaseModel = (StandardFeatureFlags.a >> 4) & 0x0f; + uint32 BaseFamily = (StandardFeatureFlags.a >> 8) & 0x0f; + uint32 ExtModel = (StandardFeatureFlags.a >> 16) & 0x0f; + uint32 ExtFamily = (StandardFeatureFlags.a >> 20) & 0xff; + if(BaseFamily < 0xf) { - cpuid_result ExtendedFeatureFlags = cpuid(0x80000001u); - if(ExtendedFeatureFlags.d & (1<<22)) ProcSupport |= PROCSUPPORT_AMD_MMXEXT; - if(ExtendedFeatureFlags.d & (1<<31)) ProcSupport |= PROCSUPPORT_AMD_3DNOW; - if(ExtendedFeatureFlags.d & (1<<30)) ProcSupport |= PROCSUPPORT_AMD_3DNOW2; + ProcFamily = static_cast<uint16>(BaseFamily); + ProcModel = static_cast<uint8>(BaseModel); + ProcStepping = static_cast<uint8>(Stepping); + } else + { + ProcFamily = static_cast<uint16>(ExtFamily + BaseFamily); + ProcModel = static_cast<uint8>((ExtModel << 4) | (BaseModel << 0)); + ProcStepping = static_cast<uint8>(Stepping); } + if(StandardFeatureFlags.d & (1<< 4)) ProcSupport |= PROCSUPPORT_TSC; + if(StandardFeatureFlags.d & (1<<15)) ProcSupport |= PROCSUPPORT_CMOV; + if(StandardFeatureFlags.d & (1<< 0)) ProcSupport |= PROCSUPPORT_FPU; + if(StandardFeatureFlags.d & (1<<23)) ProcSupport |= PROCSUPPORT_MMX; + if(StandardFeatureFlags.d & (1<<25)) ProcSupport |= PROCSUPPORT_SSE; + if(StandardFeatureFlags.d & (1<<26)) ProcSupport |= PROCSUPPORT_SSE2; + if(StandardFeatureFlags.c & (1<< 0)) ProcSupport |= PROCSUPPORT_SSE3; + if(StandardFeatureFlags.c & (1<< 9)) ProcSupport |= PROCSUPPORT_SSSE3; + if(StandardFeatureFlags.c & (1<<19)) ProcSupport |= PROCSUPPORT_SSE4_1; + if(StandardFeatureFlags.c & (1<<20)) ProcSupport |= PROCSUPPORT_SSE4_2; } + + // 3DNow! manual recommends to just execute 0x80000000u. + // It is totally unknown how earlier CPUs from other vendors + // would behave. + // Thus we only execute 0x80000000u on other vendors CPUs for the earliest + // that we found it documented for and that actually supports 3DNow!. + // We only need 0x80000000u in order to detect 3DNow!. + // Thus, this is enough for us. + if((VendorString.as_string() == "AuthenticAMD") || (VendorString.as_string() == "AMDisbetter!")) + { // AMD + + if((ProcFamily > 5) || ((ProcFamily == 5) && (ProcModel >= 8))) + { // >= K6-2 (K6 = Family 5, K6-2 = Model 8) + // Not sure if earlier AMD CPUs support 0x80000000u. + // AMD 5k86 and AMD K5 manuals do not mention it. + cpuid_result ExtendedVendorString = cpuid(0x80000000u); + if(ExtendedVendorString.a >= 0x80000001u) + { + cpuid_result ExtendedFeatureFlags = cpuid(0x80000001u); + if(ExtendedFeatureFlags.d & (1<< 4)) ProcSupport |= PROCSUPPORT_TSC; + if(ExtendedFeatureFlags.d & (1<<15)) ProcSupport |= PROCSUPPORT_CMOV; + if(ExtendedFeatureFlags.d & (1<< 0)) ProcSupport |= PROCSUPPORT_FPU; + if(ExtendedFeatureFlags.d & (1<<23)) ProcSupport |= PROCSUPPORT_MMX; + if(ExtendedFeatureFlags.d & (1<<22)) ProcSupport |= PROCSUPPORT_AMD_MMXEXT; + if(ExtendedFeatureFlags.d & (1<<31)) ProcSupport |= PROCSUPPORT_AMD_3DNOW; + if(ExtendedFeatureFlags.d & (1<<30)) ProcSupport |= PROCSUPPORT_AMD_3DNOWEXT; + } + } + + } else if(VendorString.as_string() == "CentaurHauls") + { // Centaur (IDT WinChip or VIA C3) + + if(ProcFamily == 5) + { // IDT + + if(ProcModel >= 8) + { // >= WinChip 2 + cpuid_result ExtendedVendorString = cpuid(0x80000000u); + if(ExtendedVendorString.a >= 0x80000001u) + { + cpuid_result ExtendedFeatureFlags = cpuid(0x80000001u); + if(ExtendedFeatureFlags.d & (1<<31)) ProcSupport |= PROCSUPPORT_AMD_3DNOW; + } + } + + } else if(ProcFamily >= 6) + { // VIA + + if((ProcFamily >= 7) || ((ProcFamily == 6) && (ProcModel >= 7))) + { // >= C3 Samuel 2 + cpuid_result ExtendedVendorString = cpuid(0x80000000u); + if(ExtendedVendorString.a >= 0x80000001u) + { + cpuid_result ExtendedFeatureFlags = cpuid(0x80000001u); + if(ExtendedFeatureFlags.d & (1<<31)) ProcSupport |= PROCSUPPORT_AMD_3DNOW; + } + } + + } + + } else if(VendorString.as_string() == "CyrixInstead") + { // Cyrix + + // 6x86 : 5.2.x + // 6x86L : 5.2.x + // MediaGX : 4.4.x + // 6x86MX : 6.0.x + // MII : 6.0.x + // MediaGXm: 5.4.x + // well, doh ... + + if((ProcFamily == 5) && (ProcModel >= 4)) + { // Cyrix MediaGXm + cpuid_result ExtendedVendorString = cpuid(0x80000000u); + if(ExtendedVendorString.a >= 0x80000001u) + { + cpuid_result ExtendedFeatureFlags = cpuid(0x80000001u); + if(ExtendedFeatureFlags.d & (1<<31)) ProcSupport |= PROCSUPPORT_AMD_3DNOW; + } + } + + } else if(VendorString.as_string() == "Geode by NSC") + { // National Semiconductor + + if((ProcFamily > 5) || ((ProcFamily == 5) && (ProcModel >= 5))) + { // >= Geode GX2 + cpuid_result ExtendedVendorString = cpuid(0x80000000u); + if(ExtendedVendorString.a >= 0x80000001u) + { + cpuid_result ExtendedFeatureFlags = cpuid(0x80000001u); + if(ExtendedFeatureFlags.d & (1<<31)) ProcSupport |= PROCSUPPORT_AMD_3DNOW; + } + } + + } + + } else + { + + ProcSupport |= PROCSUPPORT_FPU; // We assume FPU because we require it. + } + + // We do not have to check if SSE got enabled by the OS because we only do + // support Windows >= 98 SE which will always enable SSE if available. + } Modified: trunk/OpenMPT/common/mptCPU.h =================================================================== --- trunk/OpenMPT/common/mptCPU.h 2015-06-27 14:14:12 UTC (rev 5368) +++ trunk/OpenMPT/common/mptCPU.h 2015-06-27 14:41:53 UTC (rev 5369) @@ -15,14 +15,25 @@ #ifdef ENABLE_ASM -#define PROCSUPPORT_MMX 0x00001 // Processor supports MMX instructions -#define PROCSUPPORT_SSE 0x00010 // Processor supports SSE instructions -#define PROCSUPPORT_SSE2 0x00020 // Processor supports SSE2 instructions -#define PROCSUPPORT_SSE3 0x00040 // Processor supports SSE3 instructions -#define PROCSUPPORT_AMD_MMXEXT 0x10000 // Processor supports AMD MMX extensions -#define PROCSUPPORT_AMD_3DNOW 0x20000 // Processor supports AMD 3DNow! instructions -#define PROCSUPPORT_AMD_3DNOW2 0x40000 // Processor supports AMD 3DNow!2 instructions +#define PROCSUPPORT_CPUID 0x00001 // Processor supports CPUID instruction (i586) +#define PROCSUPPORT_TSC 0x00002 // Processor supports RDTSC instruction (i586) +#define PROCSUPPORT_CMOV 0x00004 // Processor supports conditional move instructions (i686) +#define PROCSUPPORT_FPU 0x00008 // Processor supports x87 instructions +#define PROCSUPPORT_MMX 0x00010 // Processor supports MMX instructions +#define PROCSUPPORT_AMD_MMXEXT 0x00020 // Processor supports AMD MMX extensions +#define PROCSUPPORT_AMD_3DNOW 0x00040 // Processor supports AMD 3DNow! instructions +#define PROCSUPPORT_AMD_3DNOWEXT 0x00080 // Processor supports AMD 3DNow!2 instructions +#define PROCSUPPORT_SSE 0x00100 // Processor supports SSE instructions +#define PROCSUPPORT_SSE2 0x00200 // Processor supports SSE2 instructions +#define PROCSUPPORT_SSE3 0x00400 // Processor supports SSE3 instructions +#define PROCSUPPORT_SSSE3 0x00800 // Processor supports SSSE3 instructions +#define PROCSUPPORT_SSE4_1 0x01000 // Processor supports SSE4.1 instructions +#define PROCSUPPORT_SSE4_2 0x02000 // Processor supports SSE4.2 instructions extern uint32 ProcSupport; +extern char ProcVendorID[16+1]; +extern uint16 ProcFamily; +extern uint8 ProcModel; +extern uint8 ProcStepping; void InitProcSupport(); static inline uint32 GetProcSupport() { Modified: trunk/OpenMPT/mptrack/AboutDialog.cpp =================================================================== --- trunk/OpenMPT/mptrack/AboutDialog.cpp 2015-06-27 14:14:12 UTC (rev 5368) +++ trunk/OpenMPT/mptrack/AboutDialog.cpp 2015-06-27 14:41:53 UTC (rev 5369) @@ -364,11 +364,12 @@ { text += MPT_USTRING("Required CPU features: "); std::vector<mpt::ustring> features; - if(GetMinimumSSEVersion() <= 0 && GetMinimumAVXVersion() <= 0 ) features.push_back(MPT_USTRING("FPU")); - if(GetMinimumSSEVersion() >= 1) features.push_back(MPT_USTRING("SSE")); - if(GetMinimumSSEVersion() >= 2) features.push_back(MPT_USTRING("SSE2")); - if(GetMinimumAVXVersion() >= 1) features.push_back(MPT_USTRING("AVX")); - if(GetMinimumAVXVersion() >= 2) features.push_back(MPT_USTRING("AVX2")); + if(GetMinimumSSEVersion() <= 0 && GetMinimumAVXVersion() <= 0 ) features.push_back(MPT_USTRING("fpu")); + if(GetMinimumSSEVersion() >= 1) features.push_back(MPT_USTRING("cmov")); + if(GetMinimumSSEVersion() >= 1) features.push_back(MPT_USTRING("sse")); + if(GetMinimumSSEVersion() >= 2) features.push_back(MPT_USTRING("sse2")); + if(GetMinimumAVXVersion() >= 1) features.push_back(MPT_USTRING("avx")); + if(GetMinimumAVXVersion() >= 2) features.push_back(MPT_USTRING("avx2")); text += mpt::String::Combine(features, MPT_USTRING(" ")); text += lf; } @@ -377,33 +378,52 @@ std::vector<mpt::ustring> features; #if MPT_COMPILER_MSVC && defined(ENABLE_ASM) #if defined(ENABLE_X86) - features.push_back(MPT_USTRING("x86-32")); + features.push_back(MPT_USTRING("x86")); + if(GetProcSupport() & PROCSUPPORT_FPU) features.push_back(MPT_USTRING("fpu")); + if(GetProcSupport() & PROCSUPPORT_CMOV) features.push_back(MPT_USTRING("cmov")); #endif #if defined(ENABLE_X64) features.push_back(MPT_USTRING("x86-64")); #endif #if defined(ENABLE_MMX) - if(GetProcSupport() & PROCSUPPORT_MMX) features.push_back(MPT_USTRING("MMX")); + if(GetProcSupport() & PROCSUPPORT_MMX) features.push_back(MPT_USTRING("mmx")); #endif #if defined(ENABLE_SSE) - if(GetProcSupport() & PROCSUPPORT_SSE) features.push_back(MPT_USTRING("SSE")); + if(GetProcSupport() & PROCSUPPORT_SSE) features.push_back(MPT_USTRING("sse")); #endif #if defined(ENABLE_SSE2) - if(GetProcSupport() & PROCSUPPORT_SSE2) features.push_back(MPT_USTRING("SSE2")); + if(GetProcSupport() & PROCSUPPORT_SSE2) features.push_back(MPT_USTRING("sse2")); #endif #if defined(ENABLE_SSE3) - if(GetProcSupport() & PROCSUPPORT_SSE3) features.push_back(MPT_USTRING("SSE3")); + if(GetProcSupport() & PROCSUPPORT_SSE3) features.push_back(MPT_USTRING("sse3")); + if(GetProcSupport() & PROCSUPPORT_SSSE3) features.push_back(MPT_USTRING("ssse3")); #endif + #if defined(ENABLE_SSE4) + if(GetProcSupport() & PROCSUPPORT_SSE4_1) features.push_back(MPT_USTRING("sse4.1")); + if(GetProcSupport() & PROCSUPPORT_SSE4_2) features.push_back(MPT_USTRING("sse4.2")); + #endif #if defined(ENABLE_X86_AMD) - if(GetProcSupport() & PROCSUPPORT_AMD_MMXEXT) features.push_back(MPT_USTRING("AMD-MMXEXT")); - if(GetProcSupport() & PROCSUPPORT_AMD_3DNOW) features.push_back(MPT_USTRING("AMD-3DNOW")); - if(GetProcSupport() & PROCSUPPORT_AMD_3DNOW2) features.push_back(MPT_USTRING("AMD-3DNOW2")); + if(GetProcSupport() & PROCSUPPORT_AMD_MMXEXT) features.push_back(MPT_USTRING("mmxext")); + if(GetProcSupport() & PROCSUPPORT_AMD_3DNOW) features.push_back(MPT_USTRING("3dnow")); + if(GetProcSupport() & PROCSUPPORT_AMD_3DNOWEXT) features.push_back(MPT_USTRING("3dnowext")); #endif #endif text += mpt::String::Combine(features, MPT_USTRING(" ")); text += lf; } text += lf; + if(GetProcSupport() & PROCSUPPORT_CPUID) + { + text += MPT_UFORMAT("CPU: %1, Family %2, Model %3, Stepping %4" + , mpt::ToUnicode(mpt::CharsetASCII, (std::strlen(ProcVendorID) > 0) ? std::string(ProcVendorID) : std::string("Generic")) + , ProcFamily + , ProcModel + , ProcStepping + ) + lf; + } else + { + text += MPT_USTRING("Generic without CPUID") + lf; + } text += MPT_UFORMAT("Operating System: %1", mpt::Windows::Version::GetName()) + lf; text += lf; text += MPT_UFORMAT("OpenMPT Path%2: %1", theApp.GetAppDirPath(), theApp.IsPortableMode() ? MPT_USTRING(" (portable)") : MPT_USTRING("")) + lf; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |