|
From: <sv...@va...> - 2005-12-22 15:53:21
|
Author: cerion
Date: 2005-12-22 15:53:12 +0000 (Thu, 22 Dec 2005)
New Revision: 5405
Log:
fixed up ppc64 assembly with .opd sections
do_syscall_for_client_WRK() needed a bigger stack to avoid the linkage ar=
ea.
always use dot_prefix for label calls
not wrapping assembly with
.section ".text"
...
.previous
- ppc64 doesn't like it... seems we can't 'stack' more than one section =
to pop off with .previous ?
Modified:
trunk/coregrind/m_libcassert.c
trunk/coregrind/m_machine.c
trunk/coregrind/m_main.c
trunk/coregrind/m_signals.c
trunk/coregrind/m_syscall.c
trunk/coregrind/m_syswrap/syscall-ppc64-linux.S
trunk/coregrind/m_syswrap/syswrap-ppc64-linux.c
trunk/coregrind/m_trampoline.S
trunk/coregrind/vki_unistd-ppc64-linux.h
trunk/docs/internals/performance.txt
Modified: trunk/coregrind/m_libcassert.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_libcassert.c 2005-12-22 15:16:43 UTC (rev 5404)
+++ trunk/coregrind/m_libcassert.c 2005-12-22 15:53:12 UTC (rev 5405)
@@ -78,8 +78,8 @@
#elif defined(VGP_ppc64_linux)
# define GET_REAL_PC_SP_AND_FP(pc, sp, fp) \
asm("mflr 0;" /* r0 =3D lr */ \
- "bl m_libcassert_get_ip;" /* lr =3D pc */ \
- "m_libcassert_get_ip:\n" \
+ "bl .m_libcassert_get_ip;" /* lr =3D pc */ \
+ ".m_libcassert_get_ip:\n" \
"mflr %0;" \
"mtlr 0;" /* restore lr */ \
"mr %1,1;" \
Modified: trunk/coregrind/m_machine.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_machine.c 2005-12-22 15:16:43 UTC (rev 5404)
+++ trunk/coregrind/m_machine.c 2005-12-22 15:53:12 UTC (rev 5405)
@@ -373,7 +373,7 @@
/* VG_(printf)("FP %d VMX %d\n", (Int)have_fp, (Int)have_vmx); */
=20
/* We can only support 3 cases, not 4 (vmx but no fp). So make
- fp a prerequisite for vmx. */
+ fp a prerequisite for vmx. */
if (have_vmx && !have_fp)
have_vmx =3D False;
=20
Modified: trunk/coregrind/m_main.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_main.c 2005-12-22 15:16:43 UTC (rev 5404)
+++ trunk/coregrind/m_main.c 2005-12-22 15:53:12 UTC (rev 5405)
@@ -2850,9 +2850,9 @@
);
#elif defined(VGP_ppc64_linux)
asm("\n"
- ".text\n"
/* PPC64 ELF ABI says '_start' points to a function descriptor.
So we must have one, and that is what goes into the .opd section.=
*/
+ "\t.align 2\n"
"\t.global _start\n"
"\t.section \".opd\",\"aw\"\n"
"\t.align 3\n"
@@ -2886,7 +2886,6 @@
"\tbl ._start_in_C\n"
"\tnop\n"
"\ttrap\n"
- ".previous\n"
);
#else
#error "_start: needs implementation on this platform"
Modified: trunk/coregrind/m_signals.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_signals.c 2005-12-22 15:16:43 UTC (rev 5404)
+++ trunk/coregrind/m_signals.c 2005-12-22 15:53:12 UTC (rev 5405)
@@ -477,11 +477,18 @@
".previous\n"
#elif defined(VGP_ppc64_linux)
# define _MYSIG(name) \
- ".text\n" \
+ ".align 2\n" \
+ ".globl my_sigreturn\n" \
+ ".section \".opd\",\"aw\"\n" \
+ ".align 3\n" \
"my_sigreturn:\n" \
+ ".quad .my_sigreturn,.TOC.@tocbase,0\n" \
+ ".previous\n" \
+ ".type .my_sigreturn,@function\n" \
+ ".globl .my_sigreturn\n" \
+ ".my_sigreturn:\n" \
" li 0, " #name "\n" \
- " sc\n" \
- ".previous\n"
+ " sc\n"
#else
# error Unknown platform
#endif
Modified: trunk/coregrind/m_syscall.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_syscall.c 2005-12-22 15:16:43 UTC (rev 5404)
+++ trunk/coregrind/m_syscall.c 2005-12-22 15:53:12 UTC (rev 5405)
@@ -218,7 +218,15 @@
bottom but of [1]. */
extern void do_syscall_WRK ( ULong* argblock );
asm(
-".text\n"
+".align 2\n"
+".globl do_syscall_WRK\n"
+".section \".opd\",\"aw\"\n"
+".align 3\n"
+"do_syscall_WRK:\n"
+".quad .do_syscall_WRK,.TOC.@tocbase,0\n"
+".previous\n"
+".type .do_syscall_WRK,@function\n"
+".globl .do_syscall_WRK\n"
".do_syscall_WRK:\n"
" std 3,-16(1)\n" /* stash arg */
" ld 8, 48(3)\n" /* sc arg 6 */
@@ -236,7 +244,6 @@
" andi. 3,3,1\n"
" std 3,8(5)\n" /* argblock[1] =3D cr0.s0 & 1 */
" blr\n"
-".previous\n"
);
#else
# error Unknown platform
Modified: trunk/coregrind/m_syswrap/syscall-ppc64-linux.S
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_syswrap/syscall-ppc64-linux.S 2005-12-22 15:16:43 U=
TC (rev 5404)
+++ trunk/coregrind/m_syswrap/syscall-ppc64-linux.S 2005-12-22 15:53:12 U=
TC (rev 5405)
@@ -70,14 +70,22 @@
/* from vki_arch.h */
#define VKI_SIG_SETMASK 2
=20
+.align 2
+.globl ML_(do_syscall_for_client_WRK)
+.section ".opd","aw"
+.align 3
+ML_(do_syscall_for_client_WRK):=09
+.quad .ML_(do_syscall_for_client_WRK),.TOC.@tocbase,0
+.previous
+.type .ML_(do_syscall_for_client_WRK),@function
.globl .ML_(do_syscall_for_client_WRK)
.ML_(do_syscall_for_client_WRK):
/* make a stack frame */
- stdu 1,-64(1)
- std 31,56(1)
- std 30,48(1)
- std 29,40(1)
- std 28,32(1)
+ stdu 1,-80(1)
+ std 31,72(1)
+ std 30,64(1)
+ std 29,56(1)
+ std 28,48(1)
mr 31,3 /* syscall number */
mr 30,4 /* guest_state */
mr 29,6 /* postmask */
@@ -122,11 +130,11 @@
/* now safe from signals */
=20
/* pop off stack frame */
-5: ld 28,32(1)
- ld 29,40(1)
- ld 30,48(1)
- ld 31,56(1)
- addi 1,1,64
+5: ld 28,48(1)
+ ld 29,56(1)
+ ld 30,64(1)
+ ld 31,72(1)
+ addi 1,1,80
blr
=20
/* failure: return -ve error code */
@@ -149,8 +157,7 @@
ML_(blksys_committed): .long 4b
ML_(blksys_finished): .long 5b
=20
-.previous
- =09
+
/* Let the linker know we don't need an executable stack */
.section .note.GNU-stack,"",@progbits
=20
Modified: trunk/coregrind/m_syswrap/syswrap-ppc64-linux.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_syswrap/syswrap-ppc64-linux.c 2005-12-22 15:16:43 U=
TC (rev 5404)
+++ trunk/coregrind/m_syswrap/syswrap-ppc64-linux.c 2005-12-22 15:53:12 U=
TC (rev 5405)
@@ -74,7 +74,7 @@
address, the second word is the TOC ptr (r2), and the third word is
the static chain value. */
asm(
-".text\n"
+" .align 2\n"
" .globl vgModuleLocal_call_on_new_stack_0_1\n"
" .section \".opd\",\"aw\"\n"
" .align 3\n"
@@ -122,7 +122,6 @@
" mtcr 0\n\t" // CAB: Need this?
" bctr\n\t" // jump to dst
" trap\n" // should never get here
-".previous\n"
);
=20
=20
@@ -166,7 +165,15 @@
Int* parent_tid,=20
void/*vki_modify_ldt_t*/ * );
asm(
-".text\n"
+" .align 2\n"
+" .globl do_syscall_clone_ppc64_linux\n"
+" .section \".opd\",\"aw\"\n"
+" .align 3\n"
+"do_syscall_clone_ppc64_linux:\n"
+" .quad .do_syscall_clone_ppc64_linux,.TOC.@tocbase,0\n"
+" .previous\n"
+" .type .do_syscall_clone_ppc64_linux,@function\n"
+" .globl .do_syscall_clone_ppc64_linux\n"
".do_syscall_clone_ppc64_linux:\n"
" stdu 1,-64(1)\n"
" std 29,40(1)\n"
@@ -229,7 +236,6 @@
" ld 31,56(1)\n"
" addi 1,1,64\n"
" blr\n"
-".previous\n"
);
=20
#undef __NR_CLONE
Modified: trunk/coregrind/m_trampoline.S
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_trampoline.S 2005-12-22 15:16:43 UTC (rev 5404)
+++ trunk/coregrind/m_trampoline.S 2005-12-22 15:53:12 UTC (rev 5405)
@@ -296,11 +296,28 @@
/* a leading page of unexecutable code */
UD2_PAGE
=20
+.align 2
.global VG_(trampoline_stuff_start)
+.section ".opd","aw"
+.align 3
VG_(trampoline_stuff_start):
+.quad .VG_(trampoline_stuff_start),.TOC.@tocbase,0
+.previous
+.type .VG_(trampoline_stuff_start),@function
+.global .VG_(trampoline_stuff_start)
+.VG_(trampoline_stuff_start):
=09
+
+.align 2
.global VG_(trampoline_stuff_end)
+.section ".opd","aw"
+.align 3
VG_(trampoline_stuff_end):
+.quad .VG_(trampoline_stuff_end),.TOC.@tocbase,0
+.previous
+.type .VG_(trampoline_stuff_end),@function
+.global .VG_(trampoline_stuff_end)
+.VG_(trampoline_stuff_end):
=20
# undef UD2_16
# undef UD2_64
Modified: trunk/coregrind/vki_unistd-ppc64-linux.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/vki_unistd-ppc64-linux.h 2005-12-22 15:16:43 UTC (rev=
5404)
+++ trunk/coregrind/vki_unistd-ppc64-linux.h 2005-12-22 15:53:12 UTC (rev=
5405)
@@ -309,4 +309,4 @@
#define __NR_inotify_rm_watch 277
=20
=20
-#endif /* __VKI_UNISTD_PPC32_LINUX_H */
+#endif /* __VKI_UNISTD_PPC64_LINUX_H */
Modified: trunk/docs/internals/performance.txt
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/docs/internals/performance.txt 2005-12-22 15:16:43 UTC (rev 540=
4)
+++ trunk/docs/internals/performance.txt 2005-12-22 15:53:12 UTC (rev 540=
5)
@@ -16,7 +16,7 @@
Saved 1--3% on a few programs.
- r5345,r5346,r5352: Julian improved the dispatcher so that x86 and
AMD64 use jumps instead of call/return for calling translations.
- Also, on x86, amd64 and ppc32, --profile-flags style profiling was
+ Also, on x86, amd64, ppc32 and ppc64, --profile-flags style profiling =
was
removed from the despatch loop unless --profile-flags is being used.
Improved Nulgrind performance typically by 10--20%, and Memcheck
performance typically by 2--20%.
|