Update of /cvsroot/sbcl/sbcl/src/code
In directory sfp-cvsdas-3.v30.ch3.sourceforge.com:/tmp/cvs-serv14154/src/code
Modified Files:
target-thread.lisp
Log Message:
1.0.37.34: Fix lost wakeup bug between CONDITION-WAIT and CONDITION-NOTIFY.
(Patch and write up below are actually due to Nikodemus.)
Use the waitqueue object as the data item in CONDITION-NOTIFY, and the
thread in CONDITION-WAIT.
It doesn't matter if multiple notifies use the same object -- single
thread notifying multiple times has the same meaning as multiple
threads sending one notification each, but multiple waiters must use
different objects as per following scenario:
1) Thread A calls CONDITION-WAIT on a condition variable CVAR.
Thread A sets CVAR's waitqueue-data to token A.
Thread A is preempted after the (RELEASE-MUTEX), before the
FUTEX-WAIT.
2) Thread B is run. Thread B invokes CONDITION-NOTIFY.
Thread B sets CVAR's waitqueue-data to a token B.
Thread B invokes FUTEX-WAKE which is NO-OP because no thread is
yet waiting on the futex.
3) Thread C is run. Thread C invokes CONDITION-WAIT.
Thread C sets CVAR's waitqueue-data to token C.
4) Thread A is finally run again. The call to FUTEX-WAIT compares
CVAR's waitqueue-data with token A. If token A == token C, the
wakeup is lost.
Similarly, if step 3 does not happen, and token A == token B, the
wakeup is lost.
Furthermore, consider if thread A == thread B, and step 3 does not
happen. (This can occur due to an interrupt.) Hence CONDITION-NOTIFY
cannot use the thread object as the token.
Index: target-thread.lisp
===================================================================
RCS file: /cvsroot/sbcl/sbcl/src/code/target-thread.lisp,v
retrieving revision 1.130
retrieving revision 1.131
diff -u -d -r1.130 -r1.131
--- target-thread.lisp 3 Apr 2010 16:46:09 -0000 1.130
+++ target-thread.lisp 3 Apr 2010 18:06:38 -0000 1.131
@@ -588,51 +588,50 @@
;; Need to disable interrupts so that we don't miss grabbing the
;; mutex on our way out.
(without-interrupts
- (let ((me nil))
- ;; This setf becomes visible to other CPUS due to the usual
- ;; memory barrier semantics of lock acquire/release. This must
- ;; not be moved into the loop else wakeups may be lost upon
- ;; continuing after a deadline or EINTR.
- (setf (waitqueue-data queue) me)
- (loop
- (multiple-value-bind (to-sec to-usec)
- (allow-with-interrupts (decode-timeout nil))
- (case (unwind-protect
- (with-pinned-objects (queue me)
- ;; RELEASE-MUTEX is purposefully as close to
- ;; FUTEX-WAIT as possible to reduce the size
- ;; of the window where WAITQUEUE-DATA may be
- ;; set by a notifier.
- (release-mutex mutex)
- ;; Now we go to sleep using futex-wait. If
- ;; anyone else manages to grab MUTEX and call
- ;; CONDITION-NOTIFY during this comment, it
- ;; will change queue->data, and so futex-wait
- ;; returns immediately instead of sleeping.
- ;; Ergo, no lost wakeup. We may get spurious
- ;; wakeups, but that's ok.
- (allow-with-interrupts
- (futex-wait (waitqueue-data-address queue)
- (get-lisp-obj-address me)
- ;; our way of saying "no
- ;; timeout":
- (or to-sec -1)
- (or to-usec 0))))
- ;; If we are interrupted while waiting, we should
- ;; do these things before returning. Ideally, in
- ;; the case of an unhandled signal, we should do
- ;; them before entering the debugger, but this is
- ;; better than nothing.
- (allow-with-interrupts (get-mutex mutex)))
- ;; ETIMEDOUT; we know it was a timeout, yet we cannot
- ;; signal a deadline unconditionally here because the
- ;; call to GET-MUTEX may already have signaled it.
- ((1))
- ;; EINTR
- ((2))
- ;; EWOULDBLOCK, -1 here, is the possible spurious wakeup
- ;; case. 0 is the normal wakeup.
- (otherwise (return)))))))))
+ ;; This setf becomes visible to other CPUS due to the usual
+ ;; memory barrier semantics of lock acquire/release. This must
+ ;; not be moved into the loop else wakeups may be lost upon
+ ;; continuing after a deadline or EINTR.
+ (setf (waitqueue-data queue) me)
+ (loop
+ (multiple-value-bind (to-sec to-usec)
+ (allow-with-interrupts (decode-timeout nil))
+ (case (unwind-protect
+ (with-pinned-objects (queue me)
+ ;; RELEASE-MUTEX is purposefully as close to
+ ;; FUTEX-WAIT as possible to reduce the size
+ ;; of the window where WAITQUEUE-DATA may be
+ ;; set by a notifier.
+ (release-mutex mutex)
+ ;; Now we go to sleep using futex-wait. If
+ ;; anyone else manages to grab MUTEX and call
+ ;; CONDITION-NOTIFY during this comment, it
+ ;; will change queue->data, and so futex-wait
+ ;; returns immediately instead of sleeping.
+ ;; Ergo, no lost wakeup. We may get spurious
+ ;; wakeups, but that's ok.
+ (allow-with-interrupts
+ (futex-wait (waitqueue-data-address queue)
+ (get-lisp-obj-address me)
+ ;; our way of saying "no
+ ;; timeout":
+ (or to-sec -1)
+ (or to-usec 0))))
+ ;; If we are interrupted while waiting, we should
+ ;; do these things before returning. Ideally, in
+ ;; the case of an unhandled signal, we should do
+ ;; them before entering the debugger, but this is
+ ;; better than nothing.
+ (allow-with-interrupts (get-mutex mutex)))
+ ;; ETIMEDOUT; we know it was a timeout, yet we cannot
+ ;; signal a deadline unconditionally here because the
+ ;; call to GET-MUTEX may already have signaled it.
+ ((1))
+ ;; EINTR
+ ((2))
+ ;; EWOULDBLOCK, -1 here, is the possible spurious wakeup
+ ;; case. 0 is the normal wakeup.
+ (otherwise (return))))))))
(defun condition-notify (queue &optional (n 1))
#!+sb-doc
@@ -649,17 +648,19 @@
#!+sb-lutex
(with-lutex-address (lutex (waitqueue-lutex queue))
(%lutex-wake lutex n))
- ;; no problem if >1 thread notifies during the comment in
- ;; condition-wait: as long as the value in queue-data isn't the
- ;; waiting thread's id, it matters not what it is
+ ;; No problem if >1 thread notifies during the comment in condition-wait:
+ ;; as long as the value in queue-data isn't the waiting thread's id, it
+ ;; matters not what it is -- using the queue object itself is handy.
+ ;;
;; XXX we should do something to ensure that the result of this setf
- ;; is visible to all CPUs
+ ;; is visible to all CPUs.
+ ;;
+ ;; ^-- surely futex_wake() involves a memory barrier?
#!-sb-lutex
- (let ((me *current-thread*))
- (progn
- (setf (waitqueue-data queue) me)
- (with-pinned-objects (queue)
- (futex-wake (waitqueue-data-address queue) n))))))
+ (progn
+ (setf (waitqueue-data queue) queue)
+ (with-pinned-objects (queue)
+ (futex-wake (waitqueue-data-address queue) n)))))
(defun condition-broadcast (queue)
#!+sb-doc
|