From: Gilles E. <ges...@us...> - 2008-09-13 21:22:48
|
Update of /cvsroot/ipcop/ipcop/src/patches In directory sc8-pr-cvs2.sourceforge.net:/tmp/cvs-serv21062/src/patches Added Files: Tag: IPCOP_v1_4_0 linux-2.4.36_tcp_input.patch Log Message: backport from 2.6 Test condition 2.4.36 kernel using this iptables configuration iptables -N SLOWLO iptables -A SLOWLO -m limit --limit 2/sec --limit-burst 1 -j ACCEPT iptables -A SLOWLO -j DROP iptables -A OUTPUT -o lo -p tcp --dport 12000 -j SLOWLO borrowed ss from iproute2-2.4.7-now-ss020116-try.tar.gz, I had the same result on 2.4.36.7 as Eric Dumazet on 2.6.25 without the patch with his test program. --- NEW FILE: linux-2.4.36_tcp_input.patch --- tcp: Clear probes_out more aggressively in tcp_ack(). >From David S. Miller commit log message This is based upon an excellent bug report from Eric Dumazet. tcp_ack() should clear ->icsk_probes_out even if there are packets outstanding. Otherwise if we get a sequence of ACKs while we do have packets outstanding over and over again, we'll never clear the probes_out value and eventually think the connection is too sick and we'll reset it. This appears to be some "optimization" added to tcp_ack() in the 2.4.x timeframe. In 2.2.x, probes_out is pretty much always cleared by tcp_ack(). Here is Eric's original report: ---------------------------------------- Apparently, we can in some situations reset TCP connections in a couple of seconds when some frames are lost. In order to reproduce the problem, please try the following program on linux-2.6.25.* Setup some iptables rules to allow two frames per second sent on loopback interface to tcp destination port 12000 ... Then run the attached program and see the output : ./test_tcp-input State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:32769 127.0.0.1:12000 timer:(persist,180ms,1) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:32769 127.0.0.1:12000 timer:(persist,180ms,3) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:32769 127.0.0.1:12000 timer:(persist,180ms,5) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:32769 127.0.0.1:12000 timer:(persist,180ms,7) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:32769 127.0.0.1:12000 timer:(persist,180ms,9) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:32769 127.0.0.1:12000 timer:(persist,180ms,11) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:32769 127.0.0.1:12000 timer:(persist,180ms,13) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:32769 127.0.0.1:12000 timer:(persist,180ms,15) write(): Connection timed out wrote 880 bytes but was interrupted after 10 seconds ESTAB 0 0 127.0.0.1:12000 127.0.0.1:32769 Exiting read() because no data available (4000 ms timeout). read 860 bytes While this tcp session makes progress (sending frames with 50 bytes of payload, every 500ms), linux tcp stack decides to reset it, when tcp_retries 2 is reached (default value : 15) ... Source of program : /* * small producer/consumer program. * setup a listener on 127.0.0.1:12000 * Forks a child * child connect to 127.0.0.1, and sends 10 bytes on this tcp socket every 100 ms * Father accepts connection, and read all data */ #include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> #include <unistd.h> #include <stdio.h> #include <time.h> #include <sys/poll.h> int port = 12000; char buffer[4096]; int main(int argc, char *argv[]) { int lfd = socket(AF_INET, SOCK_STREAM, 0); struct sockaddr_in socket_address; time_t t0, t1; int on = 1, sfd, res; unsigned long total = 0; socklen_t alen = sizeof(socket_address); pid_t pid; time(&t0); socket_address.sin_family = AF_INET; socket_address.sin_port = htons(port); socket_address.sin_addr.s_addr = htonl(INADDR_LOOPBACK); if (lfd == -1) { perror("socket()"); return 1; } setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)); if (bind(lfd, (struct sockaddr *)&socket_address, sizeof(socket_address)) == -1) { perror("bind"); close(lfd); return 1; } if (listen(lfd, 1) == -1) { perror("listen()"); close(lfd); return 1; } pid = fork(); if (pid == 0) { int i, cfd = socket(AF_INET, SOCK_STREAM, 0); close(lfd); if (connect(cfd, (struct sockaddr *)&socket_address, sizeof(socket_address)) == -1) { perror("connect()"); return 1; } for (i = 0 ; ;) { res = write(cfd, "blablabla\n", 10); if (res > 0) total += res; else if (res == -1) { perror("write()"); break; } else break; usleep(100000); if (++i == 10) { system("ss -on dst 127.0.0.1:12000"); i = 0; } } time(&t1); fprintf(stderr, "wrote %lu bytes but was interrupted after %g seconds\n", total, difftime(t1, t0)); system("ss -on | grep 127.0.0.1:12000"); close(cfd); return 0; } sfd = accept(lfd, (struct sockaddr *)&socket_address, &alen); if (sfd == -1) { perror("accept"); return 1; } close(lfd); while (1) { struct pollfd pfd[1]; pfd[0].fd = sfd; pfd[0].events = POLLIN; if (poll(pfd, 1, 4000) == 0) { fprintf(stderr, "Exiting read() because no data available (4000 ms timeout).\n"); break; } res = read(sfd, buffer, sizeof(buffer)); if (res > 0) total += res; else if (res == 0) break; else perror("read()"); } fprintf(stderr, "read %lu bytes\n", total); close(sfd); return 0; } diff -Nur linux-2.4.36.old/net/ipv4/tcp_input.c linux-2.4.36/net/ipv4/tcp_input.c --- linux-2.4.36.old/net/ipv4/tcp_input.c 2008-01-01 13:06:40.000000000 +0100 +++ linux-2.4.36/net/ipv4/tcp_input.c 2008-08-21 00:42:55.000000000 +0200 @@ -2814,6 +2814,7 @@ * log. Something worked... */ sk->err_soft = 0; + tp->probes_out = 0; tp->rcv_tstamp = tcp_time_stamp; if ((prior_packets = tp->packets_out) == 0) goto no_queue; @@ -2845,8 +2846,6 @@ return 1; no_queue: - tp->probes_out = 0; - /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. |