From: Michael A F. <maf...@al...> - 2011-04-29 04:13:23
|
strace on x86_64 sometimes gets confused by interrupted system calls. This happens routinely when tracing a large tree of processes. You get things like this: 1405 vfork( <unfinished ...> <various output from other processes omitted> 1405 <... vfork resumed> ) = 1406 1405 close(6) = 0 1405 read(5, <unfinished ...> <various output from other processes omitted> 1405 <... close resumed> ) = 38 Note that strange "unfinished read" which is "resumed" as a close() syscall !! What's going on? Here's a big hint: In 32-bit mode, the "read" syscall is sysycall #3. In 64-bit mode, the "close" syscall is syscall #3. Process 1405 was a 32-bit process. In between the "unfinished read" and the "resumed close" there was other output, some of which was the handling of a 64-bit process's syscall. That caused the syscall "personality" in strace to switch to 64-bit, and nothing ever switched it back. The fix is below... # HG changeset patch # User Michael A Fetterman <maf...@nv...> # Date 1304049559 14400 # Node ID be916b3ff18e37fab58f1ecef1dae435a626d324 # Parent 0b241c7fcff465a1067ea2f85be7a46921e9a3e6 Remember current 32/64-bit personality of each process when initiating a system call, so that if/when we need to handle the return of that syscall, we can switch back to the correct personality, if/as needed. Signed-off: Michael A Fetterman <maf...@nv...> defs.h | 1 + syscall.c | 53 +++++++++--------------------- ----------------------- 2 files changed, 10 insertions(+), 44 deletions(-) diff --git a/defs.h b/defs.h --- a/defs.h +++ b/defs.h @@ -344,6 +344,7 @@ short flags; /* See below for TCB_ values */ int pid; /* Process Id of this entry */ long scno; /* System call number */ + int currpers; /* personality when scno was polled */ int u_nargs; /* System call arguments */ long u_arg[MAX_ARGS]; /* System call arguments */ #if defined (LINUX_MIPSN32) diff --git a/syscall.c b/syscall.c --- a/syscall.c +++ b/syscall.c @@ -897,9 +897,7 @@ return -1; if (!(tcp->flags & TCB_INSYSCALL)) { - static int currpers = -1; long val; - int pid = tcp->pid; /* Check CS register value. On x86-64 linux it is: * 0x33 for long mode (64 bit) @@ -910,53 +908,20 @@ if (upeek(tcp, 8*CS, &val) < 0) return -1; switch (val) { - case 0x23: currpers = 1; break; - case 0x33: currpers = 0; break; + case 0x23: tcp->currpers = 1; break; + case 0x33: tcp->currpers = 0; break; default: fprintf(stderr, "Unknown value CS=0x%02X while " "detecting personality of process " - "PID=%d\n", (int)val, pid); - currpers = current_personality; + "PID=%d\n", (int)val, tcp->pid); break; } -# if 0 - /* This version analyzes the opcode of a syscall instruction. - * (int 0x80 on i386 vs. syscall on x86-64) - * It works, but is too complicated. - */ - unsigned long val, rip, i; - - if (upeek(tcp, 8*RIP, &rip) < 0) - perror("upeek(RIP)"); - - /* sizeof(syscall) == sizeof(int 0x80) == 2 */ - rip -= 2; - errno = 0; - - call = ptrace(PTRACE_PEEKTEXT, pid, (char *)rip, (char *)0); - if (errno) - fprintf(stderr, "ptrace_peektext failed: %s\n", - strerror(errno)); - switch (call & 0xffff) { - /* x86-64: syscall = 0x0f 0x05 */ - case 0x050f: currpers = 0; break; - /* i386: int 0x80 = 0xcd 0x80 */ - case 0x80cd: currpers = 1; break; - default: - currpers = current_personality; - fprintf(stderr, - "Unknown syscall opcode (0x%04X) while " - "detecting personality of process " - "PID=%d\n", (int)call, pid); - break; - } -# endif - if (currpers != current_personality) { - static const char *const names[] = {"64 bit", "32 bit"}; - set_personality(currpers); - fprintf(stderr, "[ Process PID=%d runs in %s mode. ]\n", - pid, names[current_personality]); - } + } + if (tcp->currpers != current_personality) { + static const char *const names[] = {"64 bit", "32 bit"}; + set_personality(tcp->currpers); + fprintf(stderr, "[ Process PID=%d runs in %s mode. ]\n", + tcp->pid, names[current_personality]); } # elif defined(IA64) # define IA64_PSR_IS ((long)1 << 34) |