From: Mark H. <ma...@os...> - 2004-04-19 22:37:19
|
I found a race using tipc_queue_size. Multiple processors can be accessing the variable at the same time causing it to become corrupted. I was seeing places where the tipc_queue_size was already zero but being decremented. This caused all kinds of strange things to happen. Anyway, I converted tipc_queue_size to an atomic_t to protect access to the variable. I can now run the tipc benchmark client/server test without hanging the network and/or computer. Index: socket.c =================================================================== RCS file: /cvsroot/tipc/source/unstable/net/tipc/linux-2.6/socket.c,v retrieving revision 1.3 diff -u -r1.3 socket.c --- socket.c 15 Apr 2004 18:06:37 -0000 1.3 +++ socket.c 19 Apr 2004 21:50:19 -0000 @@ -99,6 +99,7 @@ #include <linux/version.h> #include <asm/semaphore.h> #include <asm/string.h> +#include <asm/atomic.h> #include <linux/fcntl.h> #include "tipc_adapt.h" #include <tipc_msg.h> @@ -133,7 +134,7 @@ int buf_rest; }; -static uint tipc_queue_size = 0; +static atomic_t tipc_queue_size = ATOMIC_INIT(0); extern wait_queue_head_t tipc_signal_wq; static uint dispatch(struct tipc_port*,struct sk_buff *buf); static void wakeupdispatch(struct tipc_port *port); @@ -193,8 +194,8 @@ pmask |= pollmask(tsock->queue_head); tsock->pollmask = pmask; tsock->queue_size--; - tipc_queue_size--; spin_unlock_bh(tsock->p->lock); + atomic_dec(&tipc_queue_size); if (unlikely(pmask & POLLHUP)) tipc_disconnect(tsock->p->ref); } @@ -284,7 +285,7 @@ struct sk_buff *next = buf_next(buf); tipc_reject_msg(buf, TIPC_ERR_NO_PORT); buf = next; - tipc_queue_size--; + atomic_dec(&tipc_queue_size); } while (tsock->ev_head) { @@ -371,7 +372,7 @@ /* Remove for next poll/read */ tsock->pollmask &= ~MSG_ERROR; /* Empty error msg? */ - if (!(pmask & TIPC_MSG_PENDING)) + if (!(pmask & TIPC_MSG_PENDING)) advance_queue(tsock); } return pmask; @@ -760,14 +761,16 @@ uint pmask = 0; uint res = TIPC_OK;; - if (unlikely(tipc_queue_size > OVERLOAD_LIMIT_BASE)) { - if (overload(tipc_queue_size, OVERLOAD_LIMIT_BASE, msg)){ + if (unlikely((uint)atomic_read(&tipc_queue_size) > + OVERLOAD_LIMIT_BASE)) { + if (overload(atomic_read(&tipc_queue_size), + OVERLOAD_LIMIT_BASE, msg)){ res = TIPC_ERR_OVERLOAD; goto error; } } if (unlikely(tsock->queue_size > OVERLOAD_LIMIT_BASE / 2)) { - if (overload(tsock->queue_size, OVERLOAD_LIMIT_BASE / 2, msg)){ + if (overload(tsock->queue_size, OVERLOAD_LIMIT_BASE / 2, msg)) { res = TIPC_ERR_OVERLOAD; goto error; } @@ -779,7 +782,7 @@ } } tsock->queue_size += 1; - tipc_queue_size += 1; + atomic_inc(&tipc_queue_size); msg_dbg(msg,"<DISP<: "); buf_set_next(buf, 0); if (tsock->queue_head) { -- Mark Haverkamp <ma...@os...> |
From: Jon M. <jon...@er...> - 2004-04-20 00:32:29
|
It looks ok. You will find that it will collide with version 1.4 that I just checked in, though, so a merger will be needed. Cheers /Jon Mark Haverkamp wrote: >I found a race using tipc_queue_size. Multiple processors can be >accessing the variable at the same time causing it to become corrupted. >I was seeing places where the tipc_queue_size was already zero but being >decremented. This caused all kinds of strange things to happen. Anyway, >I converted tipc_queue_size to an atomic_t to protect access to the >variable. I can now run the tipc benchmark client/server test without >hanging the network and/or computer. > >Index: socket.c >=================================================================== >RCS file: /cvsroot/tipc/source/unstable/net/tipc/linux-2.6/socket.c,v >retrieving revision 1.3 >diff -u -r1.3 socket.c >--- socket.c 15 Apr 2004 18:06:37 -0000 1.3 >+++ socket.c 19 Apr 2004 21:50:19 -0000 >@@ -99,6 +99,7 @@ > #include <linux/version.h> > #include <asm/semaphore.h> > #include <asm/string.h> >+#include <asm/atomic.h> > #include <linux/fcntl.h> > #include "tipc_adapt.h" > #include <tipc_msg.h> >@@ -133,7 +134,7 @@ > int buf_rest; > }; > >-static uint tipc_queue_size = 0; >+static atomic_t tipc_queue_size = ATOMIC_INIT(0); > extern wait_queue_head_t tipc_signal_wq; > static uint dispatch(struct tipc_port*,struct sk_buff *buf); > static void wakeupdispatch(struct tipc_port *port); >@@ -193,8 +194,8 @@ > pmask |= pollmask(tsock->queue_head); > tsock->pollmask = pmask; > tsock->queue_size--; >- tipc_queue_size--; > spin_unlock_bh(tsock->p->lock); >+ atomic_dec(&tipc_queue_size); > if (unlikely(pmask & POLLHUP)) > tipc_disconnect(tsock->p->ref); > } >@@ -284,7 +285,7 @@ > struct sk_buff *next = buf_next(buf); > tipc_reject_msg(buf, TIPC_ERR_NO_PORT); > buf = next; >- tipc_queue_size--; >+ atomic_dec(&tipc_queue_size); > } > > while (tsock->ev_head) { >@@ -371,7 +372,7 @@ > /* Remove for next poll/read */ > tsock->pollmask &= ~MSG_ERROR; > /* Empty error msg? */ >- if (!(pmask & TIPC_MSG_PENDING)) >+ if (!(pmask & TIPC_MSG_PENDING)) > advance_queue(tsock); > } > return pmask; >@@ -760,14 +761,16 @@ > uint pmask = 0; > uint res = TIPC_OK;; > >- if (unlikely(tipc_queue_size > OVERLOAD_LIMIT_BASE)) { >- if (overload(tipc_queue_size, OVERLOAD_LIMIT_BASE, msg)){ >+ if (unlikely((uint)atomic_read(&tipc_queue_size) > >+ OVERLOAD_LIMIT_BASE)) { >+ if (overload(atomic_read(&tipc_queue_size), >+ OVERLOAD_LIMIT_BASE, msg)){ > res = TIPC_ERR_OVERLOAD; > goto error; > } > } > if (unlikely(tsock->queue_size > OVERLOAD_LIMIT_BASE / 2)) { >- if (overload(tsock->queue_size, OVERLOAD_LIMIT_BASE / 2, msg)){ >+ if (overload(tsock->queue_size, OVERLOAD_LIMIT_BASE / 2, msg)) { > res = TIPC_ERR_OVERLOAD; > goto error; > } >@@ -779,7 +782,7 @@ > } > } > tsock->queue_size += 1; >- tipc_queue_size += 1; >+ atomic_inc(&tipc_queue_size); > msg_dbg(msg,"<DISP<: "); > buf_set_next(buf, 0); > if (tsock->queue_head) { > > > > |
From: Mark H. <ma...@os...> - 2004-04-20 15:38:20
|
On Mon, 2004-04-19 at 17:32, Jon Maloy wrote: > It looks ok. You will find that it will collide with version 1.4 that > I just checked in, though, so a merger will be needed. > OK, I merged with your updates and checked it in. cvs automatically did the merge when I updated the file from the main view. Mark. -- Mark Haverkamp <ma...@os...> |