[Substrate-commits] SF.net SVN: substrate: [287] trunk/Foundation
Brought to you by:
landonf
|
From: <la...@us...> - 2006-09-01 00:08:27
|
Revision: 287
http://svn.sourceforge.net/substrate/?rev=287&view=rev
Author: landonf
Date: 2006-08-31 17:08:24 -0700 (Thu, 31 Aug 2006)
Log Message:
-----------
Merge in latest spinlock implementation from Postgres
Modified Paths:
--------------
trunk/Foundation/s_lock.c
trunk/Foundation/s_lock.h
Modified: trunk/Foundation/s_lock.c
===================================================================
--- trunk/Foundation/s_lock.c 2006-08-31 22:59:15 UTC (rev 286)
+++ trunk/Foundation/s_lock.c 2006-09-01 00:08:24 UTC (rev 287)
@@ -30,7 +30,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/lmgr/s_lock.c,v 1.35 2004/12/31 22:01:05 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/lmgr/s_lock.c,v 1.40.2.2 2006/05/11 21:58:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -68,6 +68,7 @@
#include <Foundation/s_lock.h>
#include <Foundation/port.h>
+/* A few postgresql defines and macros */
/*
* The random() function is expected to yield values between 0 and
* MAX_RANDOM_VALUE. Currently, all known implementations yield
@@ -78,7 +79,12 @@
* considerably inferior to --- random().
*/
#define MAX_RANDOM_VALUE (0x7FFFFFFF)
+#define Min(x, y) ((x) < (y) ? (x) : (y))
+#define Max(x, y) ((x) > (y) ? (x) : (y))
+
+static int spins_per_delay = DEFAULT_SPINS_PER_DELAY;
+
/*
* s_lock_stuck() - complain about a stuck spinlock
*/
@@ -104,44 +110,52 @@
lf_s_lock(volatile slock_t *lock, const char *file, int line)
{
/*
- * We loop tightly for awhile, then delay using pg_usleep() and try
- * again. Preferably, "awhile" should be a small multiple of the
- * maximum time we expect a spinlock to be held. 100 iterations seems
- * about right. In most multi-CPU scenarios, the spinlock is probably
- * held by a process on another CPU and will be released before we
- * finish 100 iterations. However, on a uniprocessor, the tight loop
- * is just a waste of cycles, so don't iterate thousands of times.
+ * We loop tightly for awhile, then delay using pg_usleep() and try again.
+ * Preferably, "awhile" should be a small multiple of the maximum time we
+ * expect a spinlock to be held. 100 iterations seems about right as an
+ * initial guess. However, on a uniprocessor the loop is a waste of
+ * cycles, while in a multi-CPU scenario it's usually better to spin a bit
+ * longer than to call the kernel, so we try to adapt the spin loop count
+ * depending on whether we seem to be in a uniprocessor or multiprocessor.
*
+ * Note: you might think MIN_SPINS_PER_DELAY should be just 1, but you'd
+ * be wrong; there are platforms where that can result in a "stuck
+ * spinlock" failure. This has been seen particularly on Alphas; it seems
+ * that the first TAS after returning from kernel space will always fail
+ * on that hardware.
+ *
* Once we do decide to block, we use randomly increasing pg_usleep()
- * delays. The first delay is 10 msec, then the delay randomly
- * increases to about one second, after which we reset to 10 msec and
- * start again. The idea here is that in the presence of heavy
- * contention we need to increase the delay, else the spinlock holder
- * may never get to run and release the lock. (Consider situation
- * where spinlock holder has been nice'd down in priority by the
- * scheduler --- it will not get scheduled until all would-be
- * acquirers are sleeping, so if we always use a 10-msec sleep, there
- * is a real possibility of starvation.) But we can't just clamp the
- * delay to an upper bound, else it would take a long time to make a
- * reasonable number of tries.
+ * delays. The first delay is 1 msec, then the delay randomly increases to
+ * about one second, after which we reset to 1 msec and start again. The
+ * idea here is that in the presence of heavy contention we need to
+ * increase the delay, else the spinlock holder may never get to run and
+ * release the lock. (Consider situation where spinlock holder has been
+ * nice'd down in priority by the scheduler --- it will not get scheduled
+ * until all would-be acquirers are sleeping, so if we always use a 1-msec
+ * sleep, there is a real possibility of starvation.) But we can't just
+ * clamp the delay to an upper bound, else it would take a long time to
+ * make a reasonable number of tries.
*
* We time out and declare error after NUM_DELAYS delays (thus, exactly
- * that many tries). With the given settings, this will usually take
- * 3 or so minutes. It seems better to fix the total number of tries
- * (and thus the probability of unintended failure) than to fix the
- * total time spent.
+ * that many tries). With the given settings, this will usually take 2 or
+ * so minutes. It seems better to fix the total number of tries (and thus
+ * the probability of unintended failure) than to fix the total time
+ * spent.
*
- * The pg_usleep() delays are measured in centiseconds (0.01 sec) because
- * 10 msec is a common resolution limit at the OS level.
+ * The pg_usleep() delays are measured in milliseconds because 1 msec is a
+ * common resolution limit at the OS level for newer platforms. On older
+ * platforms the resolution limit is usually 10 msec, in which case the
+ * total delay before timeout will be a bit more.
*/
-#define SPINS_PER_DELAY 100
+#define MIN_SPINS_PER_DELAY 10
+#define MAX_SPINS_PER_DELAY 1000
#define NUM_DELAYS 1000
-#define MIN_DELAY_CSEC 1
-#define MAX_DELAY_CSEC 100
+#define MIN_DELAY_MSEC 1
+#define MAX_DELAY_MSEC 1000
int spins = 0;
int delays = 0;
- int cur_delay = MIN_DELAY_CSEC;
+ int cur_delay = 0;
#ifndef WIN32
struct timeval delay;
#endif
@@ -151,21 +165,21 @@
/* CPU-specific delay each time through the loop */
SPIN_DELAY();
- /* Block the process every SPINS_PER_DELAY tries */
- if (++spins > SPINS_PER_DELAY)
+ /* Block the process every spins_per_delay tries */
+ if (++spins >= spins_per_delay)
{
if (++delays > NUM_DELAYS)
s_lock_stuck(lock, file, line);
- if (cur_delay > 0) {
+ if (cur_delay == 0) /* first time to delay? */
+ cur_delay = MIN_DELAY_MSEC;
#ifndef WIN32
- delay.tv_sec = (cur_delay * 10000L) / 1000000L;
- delay.tv_usec = (cur_delay * 10000L) % 1000000L;
- (void) select(0, NULL, NULL, NULL, &delay);
+ delay.tv_sec = 0;
+ delay.tv_usec = cur_delay * 1000L;
+ (void) select(0, NULL, NULL, NULL, &delay);
#else
- SleepEx(((cur_delay * 10000L) < 500 ? 1 : ((cur_delay * 10000L) + 500) / 1000), FALSE);
+ SleepEx(cur_delay, FALSE)
#endif
- }
#if defined(S_LOCK_TEST)
fprintf(stdout, "*");
@@ -174,17 +188,78 @@
/* increase delay by a random fraction between 1X and 2X */
cur_delay += (int) (cur_delay *
- (((double) random()) / ((double) MAX_RANDOM_VALUE)) + 0.5);
+ (((double) random()) / ((double) MAX_RANDOM_VALUE)) + 0.5);
/* wrap back to minimum delay when max is exceeded */
- if (cur_delay > MAX_DELAY_CSEC)
- cur_delay = MIN_DELAY_CSEC;
+ if (cur_delay > MAX_DELAY_MSEC)
+ cur_delay = MIN_DELAY_MSEC;
spins = 0;
}
}
+
+ /*
+ * If we were able to acquire the lock without delaying, it's a good
+ * indication we are in a multiprocessor. If we had to delay, it's a sign
+ * (but not a sure thing) that we are in a uniprocessor. Hence, we
+ * decrement spins_per_delay slowly when we had to delay, and increase it
+ * rapidly when we didn't. It's expected that spins_per_delay will
+ * converge to the minimum value on a uniprocessor and to the maximum
+ * value on a multiprocessor.
+ *
+ * Note: spins_per_delay is local within our current process. We want to
+ * average these observations across multiple backends, since it's
+ * relatively rare for this function to even get entered, and so a single
+ * backend might not live long enough to converge on a good value. That
+ * is handled by the two routines below.
+ */
+ if (cur_delay == 0)
+ {
+ /* we never had to delay */
+ if (spins_per_delay < MAX_SPINS_PER_DELAY)
+ spins_per_delay = Min (spins_per_delay + 100, MAX_SPINS_PER_DELAY);
+ }
+ else
+ {
+ if (spins_per_delay > MIN_SPINS_PER_DELAY)
+ spins_per_delay = Max (spins_per_delay - 1, MIN_SPINS_PER_DELAY);
+ }
}
+
/*
+ * Set local copy of spins_per_delay during backend startup.
+ *
+ * NB: this has to be pretty fast as it is called while holding a spinlock
+ */
+LF_PRIVATE void
+set_spins_per_delay(int shared_spins_per_delay)
+{
+ spins_per_delay = shared_spins_per_delay;
+}
+
+/*
+ * Update shared estimate of spins_per_delay during backend exit.
+ *
+ * NB: this has to be pretty fast as it is called while holding a spinlock
+ */
+LF_PRIVATE int
+update_spins_per_delay(int shared_spins_per_delay)
+{
+ /*
+ * We use an exponential moving average with a relatively slow adaption
+ * rate, so that noise in any one backend's result won't affect the shared
+ * value too much. As long as both inputs are within the allowed range,
+ * the result must be too, so we need not worry about clamping the result.
+ *
+ * We deliberately truncate rather than rounding; this is so that single
+ * adjustments inside a backend can affect the shared estimate (see the
+ * asymmetric adjustment rules above).
+ */
+ return (shared_spins_per_delay * 15 + spins_per_delay) / 16;
+}
+
+
+/*
* Various TAS implementations that cannot live in s_lock.h as no inline
* definition exists (yet).
* In the future, get rid of tas.[cso] and fold it into this file.
@@ -204,7 +279,12 @@
*/
-#if defined(__m68k__)
+/*
+ * Note: all the if-tests here probably ought to be testing gcc version
+ * rather than platform, but I don't have adequate info to know what to
+ * write. Ideally we'd flush all this in favor of the inline version.
+ */
+#if defined(__m68k__) && !defined(__linux__)
/* really means: extern int tas(slock_t* **lock); */
static void
tas_dummy()
@@ -212,7 +292,7 @@
__asm__ __volatile__(
#if defined(__NetBSD__) && defined(__ELF__)
/* no underscore for label and % for registers */
- "\
+ "\
.global tas \n\
tas: \n\
movel %sp@(0x4),%a0 \n\
@@ -224,7 +304,7 @@
moveq #0,%d0 \n\
rts \n"
#else
- "\
+ "\
.global _tas \n\
_tas: \n\
movel sp@(0x4),a0 \n\
@@ -236,39 +316,9 @@
moveq #0,d0 \n\
rts \n"
#endif /* __NetBSD__ && __ELF__ */
-);
+ );
}
-#endif /* __m68k__ */
-
-
-#if defined(__mips__) && !defined(__sgi)
-static void
-tas_dummy()
-{
- __asm__ __volatile__(
- "\
-.global tas \n\
-tas: \n\
- .frame $sp, 0, $31 \n\
- .set push \n\
- .set mips2 \n\
- ll $14, 0($4) \n\
- or $15, $14, 1 \n\
- sc $15, 0($4) \n\
- .set pop \n\
- beq $15, 0, fail\n\
- bne $14, 0, fail\n\
- li $2, 0 \n\
- .livereg 0x2000FF0E,0x00000FFF \n\
- j $31 \n\
-fail: \n\
- li $2, 1 \n\
- j $31 \n\
-");
-}
-#endif /* __mips__ && !__sgi */
-
-
+#endif /* __m68k__ && !__linux__ */
#else /* not __GNUC__ */
/*
@@ -309,15 +359,6 @@
tas_dummy() /* really means: extern int tas(slock_t
* *lock); */
{
-
-#ifdef SUNOS4_CC
- asm(".seg \"data\"");
- asm(".seg \"text\"");
-#else
- asm(".section \"data\"");
- asm(".section \"text\"");
-#endif
-
asm("_tas:");
/*
Modified: trunk/Foundation/s_lock.h
===================================================================
--- trunk/Foundation/s_lock.h 2006-08-31 22:59:15 UTC (rev 286)
+++ trunk/Foundation/s_lock.h 2006-09-01 00:08:24 UTC (rev 287)
@@ -86,7 +86,7 @@
* ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
- * $PostgreSQL: pgsql/src/include/storage/s_lock.h,v 1.133 2004/12/31 22:03:42 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/storage/s_lock.h,v 1.142 2005/10/11 20:41:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -123,7 +123,7 @@
*/
-#if defined(__i386__) || defined(__x86_64__) /* AMD Opteron */
+#ifdef __i386__
#define HAS_TEST_AND_SET
typedef unsigned char slock_t;
@@ -135,7 +135,11 @@
{
register slock_t _res = 1;
- /* Use a non-locking test before asserting the bus lock */
+ /*
+ * Use a non-locking test before asserting the bus lock. Note that the
+ * extra test appears to be a small loss on some x86 platforms and a small
+ * win on others; it's by no means clear that we should keep it.
+ */
__asm__ __volatile__(
" cmpb $0,%1 \n"
" jne 1f \n"
@@ -180,10 +184,52 @@
" rep; nop \n");
}
-#endif /* __i386__ || __x86_64__ */
+#endif /* __i386__ */
-#if defined(__ia64__) || defined(__ia64) /* __ia64 used by ICC compiler? */
+#ifdef __x86_64__ /* AMD Opteron, Intel EM64T */
+#define HAS_TEST_AND_SET
+
+typedef unsigned char slock_t;
+
+#define TAS(lock) tas(lock)
+
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+ register slock_t _res = 1;
+
+ /*
+ * On Opteron, using a non-locking test before the locking instruction
+ * is a huge loss. On EM64T, it appears to be a wash or small loss,
+ * so we needn't bother to try to distinguish the sub-architectures.
+ */
+ __asm__ __volatile__(
+ " lock \n"
+ " xchgb %0,%1 \n"
+: "+q"(_res), "+m"(*lock)
+:
+: "memory", "cc");
+ return (int) _res;
+}
+
+#define SPIN_DELAY() spin_delay()
+
+static __inline__ void
+spin_delay(void)
+{
+ /*
+ * Adding a PAUSE in the spin delay loop is demonstrably a no-op on
+ * Opteron, but it may be of some use on EM64T, so we keep it.
+ */
+ __asm__ __volatile__(
+ " rep; nop \n");
+}
+
+#endif /* __x86_64__ */
+
+
+#if defined(__ia64__) || defined(__ia64)
/* Intel Itanium */
#define HAS_TEST_AND_SET
@@ -191,6 +237,8 @@
#define TAS(lock) tas(lock)
+#ifndef __INTEL_COMPILER
+
static __inline__ int
tas(volatile slock_t *lock)
{
@@ -204,6 +252,19 @@
return (int) ret;
}
+#else /* __INTEL_COMPILER */
+
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+ int ret;
+
+ ret = _InterlockedExchange(lock,1); /* this is a xchg asm macro */
+
+ return ret;
+}
+
+#endif /* __INTEL_COMPILER */
#endif /* __ia64__ || __ia64 */
@@ -328,7 +389,7 @@
#endif /* powerpc */
-#if defined(__mc68000__) && defined(__linux__)
+#if (defined(__mc68000__) || defined(__m68k__)) && defined(__linux__)
#define HAS_TEST_AND_SET
typedef unsigned char slock_t;
@@ -350,7 +411,7 @@
return rv;
}
-#endif /* defined(__mc68000__) && defined(__linux__) */
+#endif /* (__mc68000__ || __m68k__) && __linux__ */
#if defined(__vax__)
@@ -453,20 +514,64 @@
#endif /* __alpha || __alpha__ */
-/* These live in s_lock.c, but only for gcc */
+#if defined(__mips__) && !defined(__sgi)
+/* Note: on SGI we use the OS' mutex ABI, see below */
+/* Note: R10000 processors require a separate SYNC */
+#define HAS_TEST_AND_SET
+typedef unsigned int slock_t;
-#if defined(__m68k__)
-#define HAS_TEST_AND_SET
+#define TAS(lock) tas(lock)
-typedef unsigned char slock_t;
-#endif
+static __inline__ int
+tas(volatile slock_t *lock)
+{
+ register volatile slock_t *_l = lock;
+ register int _res;
+ register int _tmp;
+ __asm__ __volatile__(
+ " .set push \n"
+ " .set mips2 \n"
+ " .set noreorder \n"
+ " .set nomacro \n"
+ " ll %0, %2 \n"
+ " or %1, %0, 1 \n"
+ " sc %1, %2 \n"
+ " xori %1, 1 \n"
+ " or %0, %0, %1 \n"
+ " sync \n"
+ " .set pop "
+: "=&r" (_res), "=&r" (_tmp), "+R" (*_l)
+:
+: "memory");
+ return _res;
+}
-#if defined(__mips__) && !defined(__sgi)
+/* MIPS S_UNLOCK is almost standard but requires a "sync" instruction */
+#define S_UNLOCK(lock) \
+do \
+{ \
+ __asm__ __volatile__( \
+ " .set push \n" \
+ " .set mips2 \n" \
+ " .set noreorder \n" \
+ " .set nomacro \n" \
+ " sync \n" \
+ " .set pop "); \
+ *((volatile slock_t *) (lock)) = 0; \
+} while (0)
+
+#endif /* __mips__ && !__sgi */
+
+
+/* These live in s_lock.c, but only for gcc */
+
+
+#if defined(__m68k__) && !defined(__linux__)
#define HAS_TEST_AND_SET
-typedef unsigned int slock_t;
+typedef unsigned char slock_t;
#endif
@@ -492,9 +597,9 @@
tas(volatile slock_t *s_lock)
{
/* UNIVEL wants %mem in column 1, so we don't pg_indent this file */
-%mem lf_s_lock
+%mem s_lock
pushl %ebx
- movl lf_s_lock, %ebx
+ movl s_lock, %ebx
movl $255, %eax
lock
xchgb %al, (%ebx)
@@ -713,7 +818,7 @@
/* Blow up if we didn't have any way to do spinlocks */
#ifndef HAS_TEST_AND_SET
-#error libFoundation does not have native spinlock support on this platform. Please report this to lib...@op....
+#error Substrate does not have native spinlock support on this platform. Please report this to sub...@li....
#endif
/*
@@ -757,4 +862,10 @@
*/
extern void lf_s_lock(volatile slock_t *lock, const char *file, int line);
+/* Support for dynamic adjustment of spins_per_delay */
+#define DEFAULT_SPINS_PER_DELAY 100
+
+extern void set_spins_per_delay(int shared_spins_per_delay);
+extern int update_spins_per_delay(int shared_spins_per_delay);
+
#endif /* S_LOCK_H */
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|