Naveen Agnihotri <agni@MIT.EDU> reported problems running
AOLserver 3.3.1+ad13 on his/her Debian Linux machine running
on parisc architecture.
The symptom is that immediately after connecting to the HTTP
port, AOLserver would immediately return a HTTP 400 Bad
Request without even waiting for input from the socket.
strace output shows this happening:
28068 accept(3, {sin_family=AF_INET, sin_port=htons(48344),
sin_addr=inet_addr("64.8.232.50")}}, [16]) = 13
28068 fcntl(13, F_GETFD) = 0
28068 fcntl(13, F_SETFD, FD_CLOEXEC) = 0
28068 ioctl(13, 0x5421, [1]) = 0
28068 gettimeofday({1054331619, 453952}, {240, 0}) = 0
28068 time(NULL) = 1054331619
28068 rt_sigprocmask(SIG_SETMASK, NULL, [HUP INT PIPE
TERM RTMIN], 8) = 0
28068 write(5, "@/\200\0\0\0\0\0@/\215\360\0\26hV\0005\240\240
\0\0P\3\0"..., 148) = 148
28068 rt_sigprocmask(SIG_SETMASK, NULL, [HUP INT PIPE
TERM RTMIN], 8) = 0
28068 rt_sigsuspend([HUP INT PIPE TERM] <unfinished ...>
28065 <... poll resumed> [{fd=4, events=POLLIN,
revents=POLLIN}], 1, 2000) = 1
28065 getppid() = 28064
28065 read(4, "@/\200\0\0\0\0\0@/\215\360\0\26hV\0005\240\240
\0\0P\3\0"..., 148) = 148
28065 mmap(NULL, 69632,
PROT_READ|PROT_WRITE|PROT_EXEC,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x40309000
28065 mprotect(0x40319000, 4096, PROT_NONE) = 0
28065 clone(child_stack=0x40309460,
flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAN
D|0x26) = 28069
28065 kill(28068, SIGRTMIN <unfinished ...>
28068 --- SIGRTMIN (Real-time signal 0) ---
28068 <... rt_sigsuspend resumed> ) = -1 EINTR (Interrupted
system call)
28068 rt_sigreturn(0x25) = -1 EINTR (Interrupted system
call)
28068 sched_yield() = 0
28068 _newselect(0xa, 0x402f85f0, 0, 0, 0 <unfinished ...>
28065 <... kill resumed> ) = 0
28065 poll( <unfinished ...>
28069 brk(0x398000) = 0x398000
28069 brk(0x399000) = 0x399000
28069 brk(0x39a000) = 0x39a000
28069 brk(0x39b000) = 0x39b000
28069 brk(0x39c000) = 0x39c000
28069 brk(0x39d000) = 0x39d000
28069 brk(0x39e000) = 0x39e000
28069 brk(0x39f000) = 0x39f000
28069 brk(0x3a0000) = 0x3a0000
28069 brk(0x3a1000) = 0x3a1000
28069 brk(0x3a2000) = 0x3a2000
28069 brk(0x3a3000) = 0x3a3000
28069 brk(0x3a4000) = 0x3a4000
28069 brk(0x3a5000) = 0x3a5000
28069 brk(0x3a6000) = 0x3a6000
28069 brk(0x3a7000) = 0x3a7000
28069 brk(0x3a8000) = 0x3a8000
28069 gettimeofday({1054331619, 561819}, {240, 0}) = 0
28069 gettimeofday({1054331619, 563088}, {240, 0}) = 0
*** 28069 recv(13, 0x3561d4, 16000, 0) = -1 EAGAIN (Resource
temporarily unavailable)
28069 time(NULL) = 1054331619
28069 send(13, "HTTP/1.0 400 Bad Request\r\nConten"..., 199, 0)
= 199
28069 send(13, "<!DOCTYPE HTML PUBLIC \&quot;-//IETF//"..., 538, 0)
= 538
28069 send(10, "\0", 1, 0) = 1
28068 <... _newselect resumed> ) = 1
28068 recv(9, "\0", 1, 0) = 1
28068 time([1054331619]) = 1054331619
28068 close(13) = 0
The line starting with "***" is the culprit.
I suggest the following change in nsd/sock.c, in function
Ns_SockRecv(), from:
int
Ns_SockRecv(SOCKET sock, void *buf, int toread, int timeout)
{
int nread;
nread = recv(sock, buf, toread, 0);
if (nread == -1
&& ns_sockerrno == EWOULDBLOCK
&& Ns_SockWait(sock, NS_SOCK_READ, timeout) ==
NS_OK) {
nread = recv(sock, buf, toread, 0);
}
return nread;
}
to:
int
Ns_SockRecv(SOCKET sock, void *buf, int toread, int timeout)
{
int nread;
do {
nread = recv(sock, buf, toread, 0);
} while (nread == -1
&& (ns_sockerrno == EWOULDBLOCK || errno == EAGAIN)
&& Ns_SockWait(sock, NS_SOCK_READ, timeout) ==
NS_OK);
return nread;
}
I'm attaching a unified diff patch to this bug for this change.
After this change, strace output looks like this:
28943 accept(3, {sin_family=AF_INET, sin_port=htons(48641),
sin_addr=inet_addr("64.8.232.50")}}, [16]) = 13
28943 fcntl(13, F_GETFD) = 0
28943 fcntl(13, F_SETFD, FD_CLOEXEC) = 0
28943 ioctl(13, 0x5421, [1]) = 0
28943 gettimeofday({1054333525, 868865}, {240, 0}) = 0
28943 time(NULL) = 1054333525
28943 rt_sigprocmask(SIG_SETMASK, NULL, [HUP INT PIPE
TERM RTMIN], 8) = 0
28943 write(5, "@/\200\0\0\0\0\0@/\215\360\0\26hN\0005\240\300
\0\0P\3\0"..., 148) = 148
28943 rt_sigprocmask(SIG_SETMASK, NULL, [HUP INT PIPE
TERM RTMIN], 8) = 0
28943 rt_sigsuspend([HUP INT PIPE TERM] <unfinished ...>
28940 <... poll resumed> [{fd=4, events=POLLIN,
revents=POLLIN}], 1, 2000) = 1
28940 getppid() = 28939
28940 read(4, "@/\200\0\0\0\0\0@/\215\360\0\26hN\0005\240\300
\0\0P\3\0"..., 148) = 148
28940 mmap(NULL, 69632,
PROT_READ|PROT_WRITE|PROT_EXEC,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x40309000
28940 mprotect(0x40319000, 4096, PROT_NONE) = 0
28940 clone(child_stack=0x40309460,
flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAN
D|0x26) = 28944
28940 kill(28943, SIGRTMIN <unfinished ...>
28943 --- SIGRTMIN (Real-time signal 0) ---
28943 <... rt_sigsuspend resumed> ) = -1 EINTR (Interrupted
system call)
28943 rt_sigreturn(0x25) = -1 EINTR (Interrupted system
call)
28943 sched_yield() = 0
28943 _newselect(0xa, 0x402f85f0, 0, 0, 0 <unfinished ...>
28940 <... kill resumed> ) = 0
28940 poll( <unfinished ...>
28944 brk(0x398000) = 0x398000
28944 brk(0x399000) = 0x399000
28944 brk(0x39a000) = 0x39a000
28944 brk(0x39b000) = 0x39b000
28944 brk(0x39c000) = 0x39c000
28944 brk(0x39d000) = 0x39d000
28944 brk(0x39e000) = 0x39e000
28944 brk(0x39f000) = 0x39f000
28944 brk(0x3a0000) = 0x3a0000
28944 brk(0x3a1000) = 0x3a1000
28944 brk(0x3a2000) = 0x3a2000
28944 brk(0x3a3000) = 0x3a3000
28944 brk(0x3a4000) = 0x3a4000
28944 brk(0x3a5000) = 0x3a5000
28944 brk(0x3a6000) = 0x3a6000
28944 brk(0x3a7000) = 0x3a7000
28944 brk(0x3a8000) = 0x3a8000
28944 gettimeofday({1054333525, 976599}, {240, 0}) = 0
28944 gettimeofday({1054333525, 977858}, {240, 0}) = 0
*** 28944 recv(13, 0x3561f4, 16000, 0) = -1 EAGAIN (Resource
temporarily unavailable)
28944 _newselect(0xe, 0x40309bb0, 0, 0, 0x40309c30
<unfinished ...>
28940 <... poll resumed> [{fd=4, events=POLLIN}], 1, 2000) = 0
28940 getppid() = 28939
28940 poll( <unfinished ...>
28944 <... _newselect resumed> ) = 1
*** 28944 recv(13, "GET / HTTP/1.0\r\n", 16000, 0) = 16
*** 28944 recv(13, 0x3561f4, 16000, 0) = -1 EAGAIN (Resource
temporarily unavailable)
28944 _newselect(0xe, 0x40309e30, 0, 0, 0x40309eb0
<unfinished ...>
28940 <... poll resumed> [{fd=4, events=POLLIN}], 1, 2000) = 0
28940 getppid() = 28939
28940 poll([{fd=4, events=POLLIN}], 1, 2000) = 0
28940 getppid() = 28939
28940 poll( <unfinished ...>
28944 <... _newselect resumed> ) = 1
*** 28944 recv(13, "\r\n", 16000, 0) = 2
28944 newstat("[censored]/servers/server1/pages",
{st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
28944 newstat("[censored]/servers/server1/pages/index.adp",
0x40309d30) = -1 ENOENT (No such file or directory)
28944 newstat("[censored]/servers/server1/pages/index.html",
{st_mode=S_IFREG|0644, st_size=1706, ...}) = 0
28944 newstat("[censored]/servers/server1/pages/index.html",
{st_mode=S_IFREG|0644, st_size=1706, ...}) = 0
28944 time(NULL) = 1054333533
28944 open("[censored]/servers/server1/pages/index.html",
O_RDONLY) = 14
28944 read(14, "<!--\nThis is the default page th"..., 1706) = 1706
28944 close(14) = 0
28944 time(NULL) = 1054333533
28944 time(NULL) = 1054333533
28944 send(13, "HTTP/1.0 200 OK\r\nLast-Modified: "..., 237, 0) =
237
28944 send(13, "<!--\nThis is the default page th"..., 1706, 0) = 1706
28944 send(10, "\0", 1, 0) = 1
28944 gettimeofday({1054333533, 386092}, {240, 0}) = 0
28944 time(NULL) = 1054333533
28944 write(8, "64.8.232.50 - - [30/May/2003:18:"..., 77) = 77
28944 gettimeofday({1054333533, 389859}, {240, 0}) = 0
28944 time(NULL) = 1054333533
28944 gettimeofday({1054333533, 391583}, {240, 0}) = 0
28944 rt_sigprocmask(SIG_BLOCK, NULL, [HUP INT PIPE
TERM RTMIN], 8) = 0
28944 rt_sigprocmask(SIG_UNBLOCK, [RTMIN], [HUP INT PIPE
TERM RTMIN], 8) = 0
28944 gettimeofday({1054333533, 394976}, NULL) = 0
28944 nanosleep({119, 996607000}, <unfinished ...>
28943 <... _newselect resumed> ) = 1
28943 recv(9, "\0", 1, 0) = 1
28943 time([1054333533]) = 1054333533
28943 shutdown(13, 1 /* send */) = 0
The interesting lines are again prefixed with "***".
-- Dossy
Patch to nsd/sock.c to fix this bug