|
From: <sv...@va...> - 2005-11-13 00:30:28
|
Author: sewardj
Date: 2005-11-13 00:30:22 +0000 (Sun, 13 Nov 2005)
New Revision: 5108
Log:
Hook the ppc32 stuff up to the revised CPU detection machinery, and
add a bunch of code to detect what the cpu can do at startup by
catching SIGILLs. Shame PPC doesn't offer any sane mechanism for
finding out what instruction subsets the CPU is capable of (a la
x86/amd64 cpuid).
Modified:
trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S
trunk/coregrind/m_machine.c
trunk/coregrind/m_main.c
trunk/coregrind/m_transtab.c
Modified: trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S 2005-11-13 00:09:49=
UTC (rev 5107)
+++ trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S 2005-11-13 00:30:22=
UTC (rev 5108)
@@ -178,8 +178,8 @@
=20
/* set host AltiVec control word to the default mode expected=20
by VEX-generated code. */
- lis 3,VG_(have_altivec_ppc32)@ha
- lwz 3,VG_(have_altivec_ppc32)@l(3)
+ lis 3,VG_(machine_ppc32_has_VMX)@ha
+ lwz 3,VG_(machine_ppc32_has_VMX)@l(3)
cmplwi 3,0
beq L1
/* generate vector {0x0,0x0,0x0,0x00010000} */
Modified: trunk/coregrind/m_machine.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_machine.c 2005-11-13 00:09:49 UTC (rev 5107)
+++ trunk/coregrind/m_machine.c 2005-11-13 00:30:22 UTC (rev 5108)
@@ -34,7 +34,9 @@
#include "pub_core_libcbase.h"
#include "pub_core_machine.h"
#include "pub_core_cpuid.h"
+#include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL
=20
+
#define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
#define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
#define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
@@ -262,6 +264,12 @@
record it. To be called once at system startup. Returns False if
this a CPU incapable of running Valgrind. */
=20
+#if defined(VGA_ppc32)
+#include <setjmp.h> // For jmp_buf
+static jmp_buf env_sigill;
+static void handler_sigill ( Int x ) { __builtin_longjmp(env_sigill,1); =
}
+#endif
+
Bool VG_(machine_get_hwcaps)( void )
{
vg_assert(hwcaps_done =3D=3D False);
@@ -317,25 +325,108 @@
return True;
=20
#elif defined(VGA_ppc32)
- va =3D VexArchPPC32;
- vai.subarch =3D VexSubArchPPC32_AV;
- /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
- called before we're ready to go. */
- return True;
+ { /* ppc32 doesn't seem to have a sane way to find out what insn
+ sets the CPU supports. So we have to arse around with
+ SIGILLs. Yuck. */
+ vki_sigset_t saved_set, tmp_set;
+ struct vki_sigaction saved_act, tmp_act;
=20
+ Bool have_fp, have_vmx;
+
+ VG_(sigemptyset)(&tmp_set);
+ VG_(sigaddset)(&tmp_set, VKI_SIGILL);
+
+ VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
+
+ VG_(sigaction)(VKI_SIGILL, NULL, &saved_act);
+ tmp_act =3D saved_act;
+
+ tmp_act.sa_flags &=3D ~VKI_SA_RESETHAND;
+ tmp_act.sa_flags &=3D ~VKI_SA_SIGINFO;
+
+ tmp_act.ksa_handler =3D handler_sigill;
+ VG_(sigaction)(VKI_SIGILL, &tmp_act, NULL);
+
+ have_fp =3D True;
+ if (__builtin_setjmp(env_sigill)) {
+ have_fp =3D False;
+ } else {
+ __asm__ __volatile__("fmr 0,0");
+ }
+
+ tmp_act.ksa_handler =3D handler_sigill;
+ VG_(sigaction)(VKI_SIGILL, &tmp_act, NULL);
+
+ have_vmx =3D True;
+ if (__builtin_setjmp(env_sigill)) {
+ have_vmx =3D False;
+ } else {
+ __asm__ __volatile__("vor 0,0,0");
+ }
+
+ VG_(sigaction)(VKI_SIGILL, &saved_act, NULL);
+ VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
+
+ /* VG_(printf)("FP %d VMX %d\n", (Int)have_fp, (Int)have_vmx); */
+
+ /* We can only support 3 cases, not 4 (vmx but no fp). So make
+ fp a prerequisite for vmx. */
+ if (have_vmx && !have_fp)
+ have_vmx =3D False;
+
+ VG_(machine_ppc32_has_FPU) =3D have_fp ? 1 : 0;
+ VG_(machine_ppc32_has_VMX) =3D have_vmx ? 1 : 0;
+
+ va =3D VexArchPPC32;
+
+ if (have_fp =3D=3D False && have_vmx =3D=3D False) {
+ vai.subarch =3D VexSubArchPPC32_noAV; // _I
+ }
+ else if (have_fp =3D=3D True && have_vmx =3D=3D False) {
+ vai.subarch =3D VexSubArchPPC32_noAV; // _FI
+ }
+ else if (have_fp =3D=3D True && have_vmx =3D=3D True) {
+ vai.subarch =3D VexSubArchPPC32_AV; // _VFI
+ } else {
+ /* this can't happen. */
+ vg_assert2(0, "VG_(machine_get_hwcaps)(ppc32)");
+ }
+
+ /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
+ called before we're ready to go. */
+ return True;
+ }
+
#else
# error "Unknown arch"
#endif
}
=20
+/* Notify host cpu cache line size, as per above comment. */
+#if defined(VGA_ppc32)
+void VG_(machine_ppc32_set_clszB)( Int szB )
+{
+ vg_assert(hwcaps_done);
=20
+ /* Either the value must not have been set yet (zero) or we can
+ tolerate it being set to the same value multiple times, as the
+ stack scanning logic in m_main is a bit stupid. */
+ vg_assert(vai.ppc32_cache_line_szB =3D=3D 0
+ || vai.ppc32_cache_line_szB =3D=3D szB);
+
+ vg_assert(szB =3D=3D 32 || szB =3D=3D 128);
+ vai.ppc32_cache_line_szB =3D szB;
+}
+#endif
+
+
/* Fetch host cpu info, once established. */
void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
/*OUT*/VexArchInfo* pVai )
{
vg_assert(hwcaps_done);
- *pVa =3D va;
- *pVai =3D vai;
+ if (pVa) *pVa =3D va;
+ if (pVai) *pVai =3D vai;
}
=20
=20
Modified: trunk/coregrind/m_main.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_main.c 2005-11-13 00:09:49 UTC (rev 5107)
+++ trunk/coregrind/m_main.c 2005-11-13 00:30:22 UTC (rev 5108)
@@ -545,15 +545,6 @@
break;
=20
case AT_HWCAP:
-# if defined(VGP_ppc32_linux)
- /* Acquire altivecness info */
- VG_(debugLog)(2, "main", "PPC32 hwcaps: 0x%x\n",=20
- (UInt)auxv->u.a_val);
- if (auxv->u.a_val & 0x10000000)
- VG_(have_altivec_ppc32) =3D 1;
- VG_(debugLog)(2, "main", "PPC32 AltiVec support: %u\n",=20
- VG_(have_altivec_ppc32));
-# endif
break;
=20
case AT_DCACHEBSIZE:
@@ -562,7 +553,7 @@
# if defined(VGP_ppc32_linux)
/* acquire cache info */
if (auxv->u.a_val > 0) {
- VG_(cache_line_size_ppc32) =3D auxv->u.a_val;
+ VG_(machine_ppc32_set_clszB)( auxv->u.a_val );
VG_(debugLog)(2, "main",=20
"PPC32 cache line size %u (type %u)\n",=20
(UInt)auxv->u.a_val, (UInt)auxv->a_type =
);
Modified: trunk/coregrind/m_transtab.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_transtab.c 2005-11-13 00:09:49 UTC (rev 5107)
+++ trunk/coregrind/m_transtab.c 2005-11-13 00:30:22 UTC (rev 5108)
@@ -737,9 +737,13 @@
# if defined(VGA_ppc32)
Addr startaddr =3D (Addr) ptr;
Addr endaddr =3D startaddr + nbytes;
- Addr cls =3D VG_(cache_line_size_ppc32);
+ Addr cls;
Addr addr;
+ VexArchInfo vai;
=20
+ VG_(machine_get_VexArchInfo)( NULL, &vai );
+ cls =3D vai.ppc32_cache_line_szB;
+
/* Stay sane .. */
vg_assert(cls =3D=3D 32 || cls =3D=3D 128);
=20
|
|
From: Paul M. <pa...@sa...> - 2005-11-13 00:54:39
|
sv...@va... writes: > Hook the ppc32 stuff up to the revised CPU detection machinery, and > add a bunch of code to detect what the cpu can do at startup by > catching SIGILLs. Shame PPC doesn't offer any sane mechanism for > finding out what instruction subsets the CPU is capable of (a la > x86/amd64 cpuid). What do you need that the AT_HWCAP aux table entry doesn't tell you? Paul. |
|
From: Julian S. <js...@ac...> - 2005-11-13 02:57:34
|
> What do you need that the AT_HWCAP aux table entry doesn't tell you? Well, I'd like to know whether (1) vmx is supported, and (2) FP is supported. (1) is doable from AT_HWCAP, but from some previous email we had I got the impression (2) isn't (perhaps erroneously). And this has to work for 2.4 as well as 2.6. J |
|
From: Paul M. <pa...@sa...> - 2005-11-13 04:54:45
|
Julian Seward writes: > Well, I'd like to know whether (1) vmx is supported, and (2) FP is > supported. (1) is doable from AT_HWCAP, but from some previous email > we had I got the impression (2) isn't (perhaps erroneously). And this > has to work for 2.4 as well as 2.6. The PPC_FEATURE_HAS_FPU and PPC_FEATURE_HAS_ALTIVEC bits in the AT_HWCAP entry tell you what you need to know. They have been there for ages; certainly all 2.4 kernels should have them IIRC (I checked at 2.4.7 kernel and it does). Paul. |