|
From: <sv...@va...> - 2011-03-20 12:51:01
|
Author: sewardj
Date: 2011-03-20 12:50:48 +0000 (Sun, 20 Mar 2011)
New Revision: 11658
Log:
Initial implementation of VTS pruning. This removes VTS entries for
threads that have exited, thereby limiting the size of the VTSes to
O(# currently live threads) rather than O(# threads that have ever
lived). This drastically improves behaviour on programs that create
many threads over their lifetime, but do not have many live threads at
any given time. Downside is some (hopefully small) risk of missing
races. Controlled by new flag --vts-pruning=never|auto|always [auto].
Since it requires visiting each VtsID in the system, this gives for
free an opportunity for cross-checking the refcounts of the VtsIDs.
TODO: add documentation about this
TODO: more principled way of deciding when a thread can be pruned
TODO: better heuristics for deciding when to do pruning
Also rearrange data decls in libhb_core.c; move the decls up to the
top of the file, away from their associated functions.
Modified:
branches/HGDEV2/coregrind/m_xarray.c
branches/HGDEV2/helgrind/hg_basics.c
branches/HGDEV2/helgrind/hg_basics.h
branches/HGDEV2/helgrind/hg_lock_n_thread.h
branches/HGDEV2/helgrind/hg_main.c
branches/HGDEV2/helgrind/libhb.h
branches/HGDEV2/helgrind/libhb_core.c
branches/HGDEV2/include/pub_tool_xarray.h
Modified: branches/HGDEV2/coregrind/m_xarray.c
===================================================================
--- branches/HGDEV2/coregrind/m_xarray.c 2011-03-20 12:11:21 UTC (rev 11657)
+++ branches/HGDEV2/coregrind/m_xarray.c 2011-03-20 12:50:48 UTC (rev 11658)
@@ -233,8 +233,6 @@
void* midv;
struct _XArray* xa = (struct _XArray*)xao;
vg_assert(xa);
- vg_assert(first);
- vg_assert(last);
lo = 0;
hi = xa->usedsizeE-1;
while (True) {
@@ -248,13 +246,20 @@
/* Found it, at mid. See how far we can expand this. */
vg_assert(cmpFn( key, VG_(indexXA)(xa, lo) ) >= 0);
vg_assert(cmpFn( key, VG_(indexXA)(xa, hi) ) <= 0);
- *first = *last = mid;
- while (*first > 0
- && 0 == cmpFn( key, VG_(indexXA)(xa, (*first)-1)))
- (*first)--;
- while (*last < xa->usedsizeE-1
- && 0 == cmpFn( key, VG_(indexXA)(xa, (*last)+1)))
- (*last)++;
+ if (first) {
+ *first = mid;
+ while (*first > 0
+ && 0 == cmpFn( key, VG_(indexXA)(xa, (*first)-1))) {
+ (*first)--;
+ }
+ }
+ if (last) {
+ *last = mid;
+ while (*last < xa->usedsizeE-1
+ && 0 == cmpFn( key, VG_(indexXA)(xa, (*last)+1))) {
+ (*last)++;
+ }
+ }
return True;
}
}
Modified: branches/HGDEV2/helgrind/hg_basics.c
===================================================================
--- branches/HGDEV2/helgrind/hg_basics.c 2011-03-20 12:11:21 UTC (rev 11657)
+++ branches/HGDEV2/helgrind/hg_basics.c 2011-03-20 12:50:48 UTC (rev 11658)
@@ -82,6 +82,7 @@
Bool HG_(clo_free_is_write) = False;
+UWord HG_(clo_vts_pruning) = 1;
/*--------------------------------------------------------------------*/
/*--- end hg_basics.c ---*/
Modified: branches/HGDEV2/helgrind/hg_basics.h
===================================================================
--- branches/HGDEV2/helgrind/hg_basics.h 2011-03-20 12:11:21 UTC (rev 11657)
+++ branches/HGDEV2/helgrind/hg_basics.h 2011-03-20 12:50:48 UTC (rev 11658)
@@ -110,6 +110,19 @@
before the free. */
extern Bool HG_(clo_free_is_write);
+/* Controls the application of VTS pruning. There are three levels:
+
+ 0: "never": VTS pruning is never done
+
+ 1: "auto": (the default): VTS pruning is sometimes done after VTS
+ GCs, just frequently enough to keep space use under control, as
+ determined by heuristics in libhb_core.c.
+
+ 2: "always": VTS pruning is done after every VTS GC. This is
+ mostly a big time waster, but minimises space use. */
+extern UWord HG_(clo_vts_pruning);
+
+
#endif /* ! __HG_BASICS_H */
/*--------------------------------------------------------------------*/
Modified: branches/HGDEV2/helgrind/hg_lock_n_thread.h
===================================================================
--- branches/HGDEV2/helgrind/hg_lock_n_thread.h 2011-03-20 12:11:21 UTC (rev 11657)
+++ branches/HGDEV2/helgrind/hg_lock_n_thread.h 2011-03-20 12:50:48 UTC (rev 11658)
@@ -97,6 +97,8 @@
}
Thread;
+/* Get hg's admin_threads value, so libhb can visit all of them. */
+Thread* get_admin_threads ( void );
/* Stores information about a lock's current state. These are
allocated and later freed (when the containing memory becomes
Modified: branches/HGDEV2/helgrind/hg_main.c
===================================================================
--- branches/HGDEV2/helgrind/hg_main.c 2011-03-20 12:11:21 UTC (rev 11657)
+++ branches/HGDEV2/helgrind/hg_main.c 2011-03-20 12:50:48 UTC (rev 11658)
@@ -120,6 +120,7 @@
/* Admin linked list of Threads */
static Thread* admin_threads = NULL;
+Thread* get_admin_threads ( void ) { return admin_threads; }
/* Admin double linked list of Locks */
/* We need a double linked list to properly and efficiently
@@ -1653,10 +1654,21 @@
/* Send last arg of _so_send as False, since the sending thread
doesn't actually exist any more, so we don't want _so_send to
try taking stack snapshots of it. */
- libhb_so_send(hbthr_q, so, True/*strong_send*/);
+ libhb_so_send(hbthr_q, so, True/*strong_send*//*?!? wrt comment above*/);
libhb_so_recv(hbthr_s, so, True/*strong_recv*/);
libhb_so_dealloc(so);
+ /* Tell libhb that the quitter has been reaped. Note that we might
+ have to be cleverer about this, to exclude 2nd and subsequent
+ notifications for the same hbthr_q, in the case where the app is
+ buggy (calls pthread_join twice or more on the same thread) AND
+ where libpthread is also buggy and doesn't return ESRCH on
+ subsequent calls. (If libpthread isn't thusly buggy, then the
+ wrapper for pthread_join in hg_intercepts.c will stop us getting
+ notified here multiple times for the same joinee.) See also
+ comments in helgrind/tests/jointwice.c. */
+ libhb_joinedwith_done(hbthr_q);
+
/* evh__pre_thread_ll_exit issues an error message if the exiting
thread holds any locks. No need to check here. */
@@ -4620,6 +4632,14 @@
else if VG_BOOL_CLO(arg, "--free-is-write",
HG_(clo_free_is_write)) {}
+
+ else if VG_XACT_CLO(arg, "--vts-pruning=never",
+ HG_(clo_vts_pruning), 0);
+ else if VG_XACT_CLO(arg, "--vts-pruning=auto",
+ HG_(clo_vts_pruning), 1);
+ else if VG_XACT_CLO(arg, "--vts-pruning=always",
+ HG_(clo_vts_pruning), 2);
+
else
return VG_(replacement_malloc_process_cmd_line_option)(arg);
@@ -4653,6 +4673,12 @@
"ranges >= %d bytes\n", SCE_BIGRANGE_T);
VG_(printf)(" 000010 at lock/unlock events\n");
VG_(printf)(" 000001 at thread create/join events\n");
+ VG_(printf)(
+" --vts-pruning=never|auto|always [auto]\n"
+" never: is never done (may cause big space leaks in Helgrind)\n"
+" auto: done just often enough to keep space usage under control\n"
+" always: done after every VTS GC (mostly just a big time waster)\n"
+ );
}
static void hg_fini ( Int exitcode )
Modified: branches/HGDEV2/helgrind/libhb.h
===================================================================
--- branches/HGDEV2/helgrind/libhb.h 2011-03-20 12:11:21 UTC (rev 11657)
+++ branches/HGDEV2/helgrind/libhb.h 2011-03-20 12:50:48 UTC (rev 11658)
@@ -55,7 +55,8 @@
Thr* libhb_create ( Thr* parent );
/* Thread async exit */
-void libhb_async_exit ( Thr* exitter );
+void libhb_async_exit ( Thr* exitter );
+void libhb_joinedwith_done ( Thr* exitter );
/* Synchronisation objects (abstract to caller) */
Modified: branches/HGDEV2/helgrind/libhb_core.c
===================================================================
--- branches/HGDEV2/helgrind/libhb_core.c 2011-03-20 12:11:21 UTC (rev 11657)
+++ branches/HGDEV2/helgrind/libhb_core.c 2011-03-20 12:50:48 UTC (rev 11658)
@@ -94,30 +94,27 @@
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
-// Forward declarations //
+// data decls: VtsID //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
-/* fwds for
- Globals needed by other parts of the library. These are set
- once at startup and then never changed. */
-static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
-static ExeContext* (*main_get_EC)( Thr* ) = NULL;
+/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
+ bits, since they have to be packed into the lowest 30 bits of an
+ SVal. */
+typedef UInt VtsID;
+#define VtsID_INVALID 0xFFFFFFFF
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
-// SECTION BEGIN compressed shadow memory //
+// data decls: SVal //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
-#ifndef __HB_ZSM_H
-#define __HB_ZSM_H
-
typedef ULong SVal;
/* This value has special significance to the implementation, and callers
@@ -129,6 +126,260 @@
value. TODO: make this caller-defineable. */
#define SVal_NOACCESS (2ULL << 62)
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// data decls: ScalarTS //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+/* Scalar Timestamp. We have to store a lot of these, so there is
+ some effort to make them as small as possible. Logically they are
+ a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
+ We pack it into 64 bits by representing the Thr* using a ThrID, a
+ small integer (18 bits), and a 46 bit integer for the timestamp
+ number. The 46/18 split is arbitary, but has the effect that
+ Helgrind can only handle programs that create 2^18 or fewer threads
+ over their entire lifetime, and have no more than 2^46 timestamp
+ ticks (synchronisation operations on the same thread).
+
+ This doesn't seem like much of a limitation. 2^46 ticks is
+ 7.06e+13, and if each tick (optimistically) takes the machine 1000
+ cycles to process, then the minimum time to process that many ticks
+ at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
+ but VTS ticks, which isn't realistic.
+
+ NB1: SCALARTS_N_THRBITS must be 21 or lower. The obvious limit is
+ 32 since a ThrID is a UInt. 21 comes from the fact that
+ 'Thr_n_RCEC', which records information about old accesses, packs
+ not only a ThrID but also 2+1+8 other bits in a UInt, hence
+ limiting size to 32-(2+1+8) == 21.
+
+ NB2: thrid values are issued upwards from 1024, and values less
+ than that aren't valid. This isn't per se necessary (any order
+ will do, so long as they are unique), but it does help ensure they
+ are less likely to get confused with the various other kinds of
+ small-integer thread ids drifting around (eg, TId). See also NB5.
+
+ NB3: this probably also relies on the fact that Thr's are never
+ deallocated -- they exist forever. Hence the 1-1 mapping from
+ Thr's to thrid values (set up in Thr__new) persists forever.
+
+ NB4: temp_max_sized_VTS is allocated at startup and never freed.
+ It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
+ ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
+ making the memory use for this go sky-high. With
+ SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
+ like an OK tradeoff. If more than 256k threads need to be
+ supported, we could change SCALARTS_N_THRBITS to 20, which would
+ facilitate supporting 1 million threads at the cost of 8MB storage
+ for temp_max_sized_VTS.
+
+ NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
+ ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
+ must never be a valid ThrID. Given NB2 that's OK.
+*/
+#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 21 inclusive */
+#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
+typedef
+ struct {
+ ThrID thrid : SCALARTS_N_THRBITS;
+ ULong tym : SCALARTS_N_TYMBITS;
+ }
+ ScalarTS;
+
+#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
+
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// data decls: Filter //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+// baseline: 5, 9
+#define FI_LINE_SZB_LOG2 5
+#define FI_NUM_LINES_LOG2 10
+
+#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
+#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
+
+#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
+#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
+
+#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
+ & (Addr)(FI_NUM_LINES-1) )
+
+
+/* In the lines, each 8 bytes are treated individually, and are mapped
+ to a UShort. Regardless of endianness of the underlying machine,
+ bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
+ the highest address.
+
+ Of each bit pair, the higher numbered bit is set if a R has been
+ seen, so the actual layout is:
+
+ 15 14 ... 01 00
+
+ R W for addr+7 ... R W for addr+0
+
+ So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
+*/
+
+/* tags are separated from lines. tags are Addrs and are
+ the base address of the line. */
+typedef
+ struct {
+ UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
+ }
+ FiLine;
+
+typedef
+ struct {
+ Addr tags[FI_NUM_LINES];
+ FiLine lines[FI_NUM_LINES];
+ }
+ Filter;
+
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// data decls: Thr, ULong_n_EC //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+// Records stacks for H1 history mechanism (DRD-style)
+typedef
+ struct { ULong ull; ExeContext* ec; }
+ ULong_n_EC;
+
+
+/* How many of the above records to collect for each thread? Older
+ ones are dumped when we run out of space. 62.5k requires 1MB per
+ thread, since each ULong_n_EC record is 16 bytes long. When more
+ than N_KWs_N_STACKs_PER_THREAD are present, the older half are
+ deleted to make space. Hence in the worst case we will be able to
+ produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
+ Kw transitions (segments in this thread). For the current setting
+ that gives a guaranteed stack for at least the last 31.25k
+ segments. */
+#define N_KWs_N_STACKs_PER_THREAD 62500
+
+
+struct _Thr {
+ /* Current VTSs for this thread. They change as we go along. viR
+ is the VTS to be used for reads, viW for writes. Usually they
+ are the same, but can differ when we deal with reader-writer
+ locks. It is always the case that
+ VtsID__cmpLEQ(viW,viR) == True
+ that is, viW must be the same, or lagging behind, viR. */
+ VtsID viR;
+ VtsID viW;
+
+ /* Is initially False, and is set to True after the thread really
+ has done a low-level exit. When True, we expect to never see
+ any more memory references done by this thread. */
+ Bool llexit_done;
+
+ /* Is initially False, and is set to True after the thread has been
+ joined with (reaped by some other thread). After this point, we
+ do not expect to see any uses of .viR or .viW, so it is safe to
+ set them to VtsID_INVALID. */
+ Bool joinedwith_done;
+
+ /* A small integer giving a unique identity to this Thr. See
+ comments on the definition of ScalarTS for details. */
+ ThrID thrid : SCALARTS_N_THRBITS;
+
+ /* A filter that removes references for which we believe that
+ msmcread/msmcwrite will not change the state, nor report a
+ race. */
+ Filter* filter;
+
+ /* A pointer back to the top level Thread structure. There is a
+ 1-1 mapping between Thread and Thr structures -- each Thr points
+ at its corresponding Thread, and vice versa. Really, Thr and
+ Thread should be merged into a single structure. */
+ Thread* hgthread;
+
+ /* The ULongs (scalar Kws) in this accumulate in strictly
+ increasing order, without duplicates. This is important because
+ we need to be able to find a given scalar Kw in this array
+ later, by binary search. */
+ XArray* /* ULong_n_EC */ local_Kws_n_stacks;
+};
+
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// data decls: SO //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+// (UInt) `echo "Synchronisation object" | md5sum`
+#define SO_MAGIC 0x56b3c5b0U
+
+struct _SO {
+ struct _SO* admin_prev;
+ struct _SO* admin_next;
+ VtsID viR; /* r-clock of sender */
+ VtsID viW; /* w-clock of sender */
+ UInt magic;
+};
+
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// Forward declarations //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+/* fwds for
+ Globals needed by other parts of the library. These are set
+ once at startup and then never changed. */
+static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
+static ExeContext* (*main_get_EC)( Thr* ) = NULL;
+
+/* misc fn and data fwdses */
+static void VtsID__rcinc ( VtsID ii );
+static void VtsID__rcdec ( VtsID ii );
+
+static inline Bool SVal__isC ( SVal s );
+static inline VtsID SVal__unC_Rmin ( SVal s );
+static inline VtsID SVal__unC_Wmin ( SVal s );
+static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
+
+/* A double linked list of all the SO's. */
+SO* admin_SO;
+
+
+
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+// //
+// SECTION BEGIN compressed shadow memory //
+// //
+/////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////
+
+#ifndef __HB_ZSM_H
+#define __HB_ZSM_H
+
/* Initialise the library. Once initialised, it will (or may) call
rcinc and rcdec in response to all the calls below, in order to
allow the user to do reference counting on the SVals stored herein.
@@ -1539,58 +1790,6 @@
static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
-
-/* Scalar Timestamp. We have to store a lot of these, so there is
- some effort to make them as small as possible. Logically they are
- a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
- We pack it into 64 bits by representing the Thr* using a ThrID, a
- small integer (18 bits), and a 46 bit integer for the timestamp
- number. The 46/18 split is arbitary, but has the effect that
- Helgrind can only handle programs that create 2^18 or fewer threads
- over their entire lifetime, and have no more than 2^46 timestamp
- ticks (synchronisation operations on the same thread).
-
- This doesn't seem like much of a limitation. 2^46 ticks is
- 7.06e+13, and if each tick (optimistically) takes the machine 1000
- cycles to process, then the minimum time to process that many ticks
- at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
- but VTS ticks, which isn't realistic.
-
- NB1: SCALARTS_N_THRBITS must be 32 or lower, so they fit in a ThrID
- (== a UInt).
-
- NB2: thrid values are issued upwards from 1024, and values less
- than that aren't valid. This isn't per se necessary (any order
- will do, so long as they are unique), but it does help ensure they
- are less likely to get confused with the various other kinds of
- small-integer thread ids drifting around (eg, TId).
-
- NB3: this probably also relies on the fact that Thr's are never
- deallocated -- they exist forever. Hence the 1-1 mapping from
- Thr's to thrid values (set up in Thr__new) persists forever.
-
- NB4: temp_max_sized_VTS is allocated at startup and never freed.
- It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
- ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
- making the memory use for this go sky-high. With
- SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
- like an OK tradeoff. If more than 256k threads need to be
- supported, we could change SCALARTS_N_THRBITS to 20, which would
- facilitate supporting 1 million threads at the cost of 8MB storage
- for temp_max_sized_VTS.
-*/
-#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 32 inclusive */
-#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
-typedef
- struct {
- ThrID thrid : SCALARTS_N_THRBITS;
- ULong tym : SCALARTS_N_TYMBITS;
- }
- ScalarTS;
-
-#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
-
-
__attribute__((noreturn))
static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
{
@@ -1618,12 +1817,39 @@
}
-/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
- bits, since they have to be packed into the lowest 30 bits of an
- SVal. */
-typedef UInt VtsID;
-#define VtsID_INVALID 0xFFFFFFFF
+/* The dead thread (ThrID, actually) table. A thread may only be
+ listed here if we have been notified thereof by libhb_async_exit.
+ New entries are added at the end. The order isn't important, but
+ the ThrID values must be unique. This table lists the identity of
+ all threads that have ever died -- none are ever removed. We keep
+ this table so as to be able to prune entries from VTSs. We don't
+ actually need to keep the set of threads that have ever died --
+ only the threads that have died since the previous round of
+ pruning. But it's useful for sanity check purposes to keep the
+ entire set, so we do. */
+static XArray* /* of ThrID */ verydead_thread_table = NULL;
+/* Arbitrary total ordering on ThrIDs. */
+static Int cmp__ThrID ( void* v1, void* v2 ) {
+ ThrID id1 = *(ThrID*)v1;
+ ThrID id2 = *(ThrID*)v2;
+ if (id1 < id2) return -1;
+ if (id1 > id2) return 1;
+ return 0;
+}
+
+static void verydead_thread_table_init ( void )
+{
+ tl_assert(!verydead_thread_table);
+ verydead_thread_table
+ = VG_(newXA)( HG_(zalloc),
+ "libhb.verydead_thread_table_init.1",
+ HG_(free), sizeof(ThrID) );
+ tl_assert(verydead_thread_table);
+ VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
+}
+
+
/* A VTS contains .ts, its vector clock, and also .id, a field to hold
a backlink for the caller's convenience. Since we have no idea
what to set that to in the library, it always gets set to
@@ -1640,10 +1866,16 @@
/* Allocate a VTS capable of storing 'sizeTS' entries. */
static VTS* VTS__new ( HChar* who, UInt sizeTS );
-/* Make a clone of 'vts', resizing the array to exactly match the
+/* Make a clone of 'vts', sizing the new array to exactly match the
number of ScalarTSs present. */
static VTS* VTS__clone ( HChar* who, VTS* vts );
+/* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
+ array is sized exactly to hold the number of required elements.
+ 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
+ must be in strictly increasing order. */
+static VTS* VTS__subtract ( HChar* who, VTS* vts, XArray* thridsToDel );
+
/* Delete this VTS in its entirety. */
static void VTS__delete ( VTS* vts );
@@ -1687,6 +1919,12 @@
/* Debugging only. Return vts[index], so to speak. */
static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
+/* Notify the VTS machinery that a thread has been declared
+ comprehensively dead: that is, it has done an async exit AND it has
+ been joined with. This should ensure that its local clocks (.viR
+ and .viW) will never again change, and so all mentions of this
+ thread from all VTSs in the system may be removed. */
+static void VTS__declare_thread_very_dead ( Thr* idx );
/*--------------- to do with Vector Timestamps ---------------*/
@@ -1749,6 +1987,42 @@
}
+/* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
+ must be in strictly increasing order. We could obviously do this
+ much more efficiently (in linear time) if necessary.
+*/
+static VTS* VTS__subtract ( HChar* who, VTS* vts, XArray* thridsToDel )
+{
+ UInt i, j;
+ tl_assert(vts);
+ tl_assert(thridsToDel);
+ tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
+ UInt nTS = vts->usedTS;
+ /* Figure out how many ScalarTSs will remain in the output. */
+ UInt nReq = nTS;
+ for (i = 0; i < nTS; i++) {
+ ThrID thrid = vts->ts[i].thrid;
+ if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
+ nReq--;
+ }
+ tl_assert(nReq <= nTS);
+ /* Copy the ones that will remain. */
+ VTS* res = VTS__new(who, nReq);
+ j = 0;
+ for (i = 0; i < nTS; i++) {
+ ThrID thrid = vts->ts[i].thrid;
+ if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
+ continue;
+ res->ts[j++] = vts->ts[i];
+ }
+ tl_assert(j == nReq);
+ tl_assert(j == res->sizeTS);
+ res->usedTS = j;
+ tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
+ return res;
+}
+
+
/* Delete this VTS in its entirety.
*/
static void VTS__delete ( VTS* vts )
@@ -2155,6 +2429,32 @@
}
+/* See comment on prototype above.
+*/
+static void VTS__declare_thread_very_dead ( Thr* thr )
+{
+ if (0) VG_(printf)("VTQ: tae %p\n", thr);
+
+ tl_assert(thr->llexit_done);
+ tl_assert(thr->joinedwith_done);
+
+ ThrID nyu;
+ nyu = Thr__to_ThrID(thr);
+ VG_(addToXA)( verydead_thread_table, &nyu );
+
+ /* We can only get here if we're assured that we'll never again
+ need to look at this thread's ::viR or ::viW. Set them to
+ VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
+ mostly so that we don't wind up pruning them (as that would be
+ nonsensical: the only interesting ScalarTS entry for a dead
+ thread is its own index, and the pruning will remove that.). */
+ VtsID__rcdec(thr->viR);
+ VtsID__rcdec(thr->viW);
+ thr->viR = VtsID_INVALID;
+ thr->viW = VtsID_INVALID;
+}
+
+
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
@@ -2180,7 +2480,7 @@
// //
/////////////////////////////////////////////////////////
-static WordFM* /* VTS* void void */ vts_set = NULL;
+static WordFM* /* WordFM VTS* void */ vts_set = NULL;
static void vts_set_init ( void )
{
@@ -2232,18 +2532,19 @@
If .vts == NULL, then this entry is not in use, so:
- .rc == 0
- this entry is on the freelist (unfortunately, does not imply
- any constraints on value for .nextfree)
+ any constraints on value for .freelink)
If .vts != NULL, then this entry is in use:
- .vts is findable in vts_set
- .vts->id == this entry number
- no specific value for .rc (even 0 is OK)
- - this entry is not on freelist, so .nextfree == VtsID_INVALID
+ - this entry is not on freelist, so .freelink == VtsID_INVALID
*/
typedef
struct {
VTS* vts; /* vts, in vts_set */
UWord rc; /* reference count - enough for entire aspace */
VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
+ VtsID remap; /* used only during pruning */
}
VtsTE;
@@ -2309,6 +2610,7 @@
te.vts = NULL;
te.rc = 0;
te.freelink = VtsID_INVALID;
+ te.remap = VtsID_INVALID;
ii = (VtsID)VG_(addToXA)( vts_tab, &te );
return ii;
}
@@ -2394,12 +2696,53 @@
VG_(printf)(" total rc %4llu\n", totrc);
}
+
+/* --- Helpers for VtsID pruning --- */
+
+static
+void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
+ /*MOD*/XArray* /* of VtsTE */ new_tab,
+ VtsID* ii )
+{
+ VtsTE *old_te, *new_te;
+ VtsID old_id, new_id;
+ /* We're relying here on VG_(indexXA)'s range checking to assert on
+ any stupid values, in particular *ii == VtsID_INVALID. */
+ old_id = *ii;
+ old_te = VG_(indexXA)( old_tab, old_id );
+ old_te->rc--;
+ new_id = old_te->remap;
+ new_te = VG_(indexXA)( new_tab, new_id );
+ new_te->rc++;
+ *ii = new_id;
+}
+
+static
+void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
+ /*MOD*/XArray* /* of VtsTE */ new_tab,
+ SVal* s )
+{
+ SVal old_sv, new_sv;
+ old_sv = *s;
+ if (SVal__isC(old_sv)) {
+ VtsID rMin, wMin;
+ rMin = SVal__unC_Rmin(old_sv);
+ wMin = SVal__unC_Wmin(old_sv);
+ remap_VtsID( old_tab, new_tab, &rMin );
+ remap_VtsID( old_tab, new_tab, &wMin );
+ new_sv = SVal__mkC( rMin, wMin );
+ *s = new_sv;
+ }
+}
+
+
/* NOT TO BE CALLED FROM WITHIN libzsm. */
__attribute__((noinline))
static void vts_tab__do_GC ( Bool show_stats )
{
UWord i, nTab, nLive, nFreed;
+ /* ---------- BEGIN VTS GC ---------- */
/* check this is actually necessary. */
tl_assert(vts_tab_freelist == VtsID_INVALID);
@@ -2418,13 +2761,13 @@
show_vts_stats("before GC");
}
- /* Now we can inspect the entire vts_tab. Any entries
- with zero .rc fields are now no longer in use and can be
+ /* Now we can inspect the entire vts_tab. Any entries with zero
+ .rc fields are now no longer in use and can be put back on the
free list, removed from vts_set, and deleted. */
nFreed = 0;
for (i = 0; i < nTab; i++) {
Bool present;
- UWord oldK = 0, oldV = 0;
+ UWord oldK = 0, oldV = 12345;
VtsTE* te = VG_(indexXA)( vts_tab, i );
if (te->vts == NULL) {
tl_assert(te->rc == 0);
@@ -2466,12 +2809,332 @@
}
if (VG_(clo_stats)) {
- static UInt ctr = 0;
+ static UInt ctr = 1;
tl_assert(nTab > 0);
VG_(message)(Vg_DebugMsg,
"libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
}
+ /* ---------- END VTS GC ---------- */
+
+ /* Decide whether to do VTS pruning. We have one of three
+ settings. */
+ static UInt pruning_auto_ctr = 0; /* do not make non-static */
+
+ Bool do_pruning = False;
+ switch (HG_(clo_vts_pruning)) {
+ case 0: /* never */
+ break;
+ case 1: /* auto */
+ do_pruning = (++pruning_auto_ctr % 5) == 0;
+ break;
+ case 2: /* always */
+ do_pruning = True;
+ break;
+ default:
+ tl_assert(0);
+ }
+
+ /* The rest of this routine only handles pruning, so we can
+ quit at this point if it is not to be done. */
+ if (!do_pruning)
+ return;
+
+ /* ---------- BEGIN VTS PRUNING ---------- */
+ /* We begin by sorting the backing table on its .thr values, so as
+ to (1) check they are unique [else something has gone wrong,
+ since it means we must have seen some Thr* exiting more than
+ once, which can't happen], and (2) so that we can quickly look
+ up the dead-thread entries as we work through the VTSs. */
+ VG_(sortXA)( verydead_thread_table );
+ /* Sanity check: check for unique .sts.thr values. */
+ UWord nBT = VG_(sizeXA)( verydead_thread_table );
+ if (nBT > 0) {
+ ThrID thrid1, thrid2;
+ thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, 0 );
+ for (i = 1; i < nBT; i++) {
+ thrid1 = thrid2;
+ thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, i );
+ tl_assert(thrid1 < thrid2);
+ }
+ }
+ /* Ok, so the dead thread table has unique and in-order keys. */
+
+ /* We will run through the old table, and create a new table and
+ set, at the same time setting the .remap entries in the old
+ table to point to the new entries. Then, visit every VtsID in
+ the system, and replace all of them with new ones, using the
+ .remap entries in the old table. Finally, we can delete the old
+ table and set. */
+
+ XArray* /* of VtsTE */ new_tab
+ = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
+ HG_(free), sizeof(VtsTE) );
+
+ /* WordFM VTS* void */
+ WordFM* new_set
+ = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
+ HG_(free),
+ (Word(*)(UWord,UWord))VTS__cmp_structural );
+
+ /* Visit each old VTS. For each one:
+
+ * make a pruned version
+
+ * search new_set for the pruned version, yielding either
+ Nothing (not present) or the new VtsID for it.
+
+ * if not present, allocate a new VtsID for it, insert (pruned
+ VTS, new VtsID) in the tree, and set
+ remap_table[old VtsID] = new VtsID.
+
+ * if present, set remap_table[old VtsID] = new VtsID, where
+ new VtsID was determined by the tree lookup. Then free up
+ the clone.
+ */
+
+ UWord nBeforePruning = 0, nAfterPruning = 0;
+ UWord nSTSsBefore = 0, nSTSsAfter = 0;
+ VtsID new_VtsID_ctr = 0;
+
+ for (i = 0; i < nTab; i++) {
+
+ /* For each old VTS .. */
+ VtsTE* old_te = VG_(indexXA)( vts_tab, i );
+ VTS* old_vts = old_te->vts;
+ tl_assert(old_te->remap == VtsID_INVALID);
+
+ /* Skip it if not in use */
+ if (old_te->rc == 0) {
+ tl_assert(old_vts == NULL);
+ continue;
+ }
+ tl_assert(old_vts != NULL);
+ tl_assert(old_vts->id == i);
+ tl_assert(old_vts->ts != NULL);
+
+ /* It is in use. Make a pruned version. */
+ nBeforePruning++;
+ nSTSsBefore += old_vts->usedTS;
+ VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
+ old_vts, verydead_thread_table);
+ tl_assert(new_vts->sizeTS == new_vts->usedTS);
+ tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
+ == 0x0ddC0ffeeBadF00dULL);
+
+ /* Get rid of the old VTS and the tree entry. It's a bit more
+ complex to incrementally delete the VTSs now than to nuke
+ them all after we're done, but the upside is that we don't
+ wind up temporarily storing potentially two complete copies
+ of each VTS and hence spiking memory use. */
+ UWord oldK = 0, oldV = 12345;
+ Bool present = VG_(delFromFM)( vts_set,
+ &oldK, &oldV, (UWord)old_vts );
+ tl_assert(present); /* else it isn't in vts_set ?! */
+ tl_assert(oldV == 0); /* no info stored in vts_set val fields */
+ tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
+ /* now free the VTS itself */
+ VTS__delete(old_vts);
+ old_te->vts = NULL;
+ old_vts = NULL;
+
+ /* NO MENTIONS of old_vts allowed beyond this point. */
+
+ /* Ok, we have the pruned copy in new_vts. See if a
+ structurally identical version is already present in new_set.
+ If so, delete the one we just made and move on; if not, add
+ it. */
+ VTS* identical_version = NULL;
+ UWord valW = 12345;
+ if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
+ (UWord)new_vts)) {
+ // already have it
+ tl_assert(valW == 0);
+ tl_assert(identical_version != NULL);
+ tl_assert(identical_version != new_vts);
+ VTS__delete(new_vts);
+ new_vts = identical_version;
+ tl_assert(new_vts->id != VtsID_INVALID);
+ } else {
+ tl_assert(valW == 12345);
+ tl_assert(identical_version == NULL);
+ new_vts->id = new_VtsID_ctr++;
+ Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
+ tl_assert(!b);
+ VtsTE new_te;
+ new_te.vts = new_vts;
+ new_te.rc = 0;
+ new_te.freelink = VtsID_INVALID;
+ new_te.remap = VtsID_INVALID;
+ Word j = VG_(addToXA)( new_tab, &new_te );
+ tl_assert(j <= i);
+ tl_assert(j == new_VtsID_ctr - 1);
+ // stats
+ nAfterPruning++;
+ nSTSsAfter += new_vts->usedTS;
+ }
+ old_te->remap = new_vts->id;
+
+ } /* for (i = 0; i < nTab; i++) */
+
+ /* At this point, we have:
+ * the old VTS table, with its .remap entries set,
+ and with all .vts == NULL.
+ * the old VTS tree should be empty, since it and the old VTSs
+ it contained have been incrementally deleted was we worked
+ through the old table.
+ * the new VTS table, with all .rc == 0, all .freelink and .remap
+ == VtsID_INVALID.
+ * the new VTS tree.
+ */
+ tl_assert( VG_(sizeFM)(vts_set) == 0 );
+
+ /* Now actually apply the mapping. */
+ /* Visit all the VtsIDs in the entire system. Where do we expect
+ to find them?
+ (a) in shadow memory -- the LineZs and LineFs
+ (b) in our collection of struct _Thrs.
+ (c) in our collection of struct _SOs.
+ Nowhere else, AFAICS. Not in the zsm cache, because that just
+ got invalidated.
+
+ Using the .remap fields in vts_tab, map each old VtsID to a new
+ VtsID. For each old VtsID, dec its rc; and for each new one,
+ inc it. This sets up the new refcounts, and it also gives a
+ cheap sanity check of the old ones: all old refcounts should be
+ zero after this operation.
+ */
+
+ /* Do the mappings for (a) above: iterate over the Primary shadow
+ mem map (WordFM Addr SecMap*). */
+ UWord secmapW = 0;
+ VG_(initIterFM)( map_shmem );
+ while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
+ UWord j;
+ SecMap* sm = (SecMap*)secmapW;
+ tl_assert(sm->magic == SecMap_MAGIC);
+ /* Deal with the LineZs */
+ for (i = 0; i < N_SECMAP_ZLINES; i++) {
+ LineZ* lineZ = &sm->linesZ[i];
+ if (lineZ->dict[0] == SVal_INVALID)
+ continue; /* not in use -- data is in F rep instead */
+ for (j = 0; j < 4; j++)
+ remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
+ }
+ /* Deal with the LineFs */
+ for (i = 0; i < sm->linesF_size; i++) {
+ LineF* lineF = &sm->linesF[i];
+ if (!lineF->inUse)
+ continue;
+ for (j = 0; j < N_LINE_ARANGE; j++)
+ remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
+ }
+ }
+ VG_(doneIterFM)( map_shmem );
+
+ /* Do the mappings for (b) above: visit our collection of struct
+ _Thrs. */
+ Thread* hgthread = get_admin_threads();
+ tl_assert(hgthread);
+ while (hgthread) {
+ Thr* hbthr = hgthread->hbthr;
+ tl_assert(hbthr);
+ /* Threads that are listed in the prunable set have their viR
+ and viW set to VtsID_INVALID, so we can't mess with them. */
+ if (hbthr->llexit_done && hbthr->joinedwith_done) {
+ tl_assert(hbthr->viR == VtsID_INVALID);
+ tl_assert(hbthr->viW == VtsID_INVALID);
+ hgthread = hgthread->admin;
+ continue;
+ }
+ remap_VtsID( vts_tab, new_tab, &hbthr->viR );
+ remap_VtsID( vts_tab, new_tab, &hbthr->viW );
+ hgthread = hgthread->admin;
+ }
+
+ /* Do the mappings for (c) above: visit the struct _SOs. */
+ SO* so = admin_SO;
+ while (so) {
+ if (so->viR != VtsID_INVALID)
+ remap_VtsID( vts_tab, new_tab, &so->viR );
+ if (so->viW != VtsID_INVALID)
+ remap_VtsID( vts_tab, new_tab, &so->viW );
+ so = so->admin_next;
+ }
+
+ /* So, we're nearly done (with this incredibly complex operation).
+ Check the refcounts for the old VtsIDs all fell to zero, as
+ expected. Any failure is serious. */
+ for (i = 0; i < nTab; i++) {
+ VtsTE* te = VG_(indexXA)( vts_tab, i );
+ tl_assert(te->vts == NULL);
+ /* This is the assert proper. Note we're also asserting
+ zeroness for old entries which are unmapped (hence have
+ .remap == VtsID_INVALID). That's OK. */
+ tl_assert(te->rc == 0);
+ }
+
+ /* Install the new table and set. */
+ VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
+ vts_set = new_set;
+ VG_(deleteXA)( vts_tab );
+ vts_tab = new_tab;
+
+ /* The freelist of vts_tab entries is empty now, because we've
+ compacted all of the live entries at the low end of the
+ table. */
+ vts_tab_freelist = VtsID_INVALID;
+
+ /* Sanity check vts_set and vts_tab. */
+
+ /* Because all the live entries got slid down to the bottom of vts_tab: */
+ tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
+
+ /* Assert that the vts_tab and vts_set entries point at each other
+ in the required way */
+ UWord wordK = 0, wordV = 0;
+ VG_(initIterFM)( vts_set );
+ while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
+ tl_assert(wordK != 0);
+ tl_assert(wordV == 0);
+ VTS* vts = (VTS*)wordK;
+ tl_assert(vts->id != VtsID_INVALID);
+ VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
+ tl_assert(te->vts == vts);
+ }
+ VG_(doneIterFM)( vts_set );
+
+ /* Also iterate over the table, and check each entry is
+ plausible. */
+ nTab = VG_(sizeXA)( vts_tab );
+ for (i = 0; i < nTab; i++) {
+ VtsTE* te = VG_(indexXA)( vts_tab, i );
+ tl_assert(te->vts);
+ tl_assert(te->vts->id == i);
+ tl_assert(te->rc > 0); /* 'cos we just GC'd */
+ tl_assert(te->freelink == VtsID_INVALID); /* in use */
+ tl_assert(te->remap == VtsID_INVALID); /* not relevant */
+ }
+
+ /* And we're done. Bwahahaha. Ha. Ha. Ha. */
+ if (VG_(clo_stats)) {
+ static UInt ctr = 1;
+ tl_assert(nTab > 0);
+ VG_(message)(
+ Vg_DebugMsg,
+ "libhb: VTS PR: #%u before %lu (avg sz %lu) "
+ "after %lu (avg sz %lu)\n",
+ ctr++,
+ nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
+ nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
+ );
+ }
+ if (0)
+ VG_(printf)("VTQ: before pruning %lu (avg sz %lu), "
+ "after pruning %lu (avg sz %lu)\n",
+ nBeforePruning, nSTSsBefore / nBeforePruning,
+ nAfterPruning, nSTSsAfter / nAfterPruning);
+ /* ---------- END VTS PRUNING ---------- */
}
@@ -2661,50 +3324,6 @@
// //
/////////////////////////////////////////////////////////
-// baseline: 5, 9
-#define FI_LINE_SZB_LOG2 5
-#define FI_NUM_LINES_LOG2 10
-
-#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
-#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
-
-#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
-#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
-
-#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
- & (Addr)(FI_NUM_LINES-1) )
-
-
-/* In the lines, each 8 bytes are treated individually, and are mapped
- to a UShort. Regardless of endianness of the underlying machine,
- bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
- the highest address.
-
- Of each bit pair, the higher numbered bit is set if a R has been
- seen, so the actual layout is:
-
- 15 14 ... 01 00
-
- R W for addr+7 ... R W for addr+0
-
- So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
-*/
-
-/* tags are separated from lines. tags are Addrs and are
- the base address of the line. */
-typedef
- struct {
- UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
- }
- FiLine;
-
-typedef
- struct {
- Addr tags[FI_NUM_LINES];
- FiLine lines[FI_NUM_LINES];
- }
- Filter;
-
/* Forget everything we know -- clear the filter and let everything
through. This needs to be as fast as possible, since it is called
every time the running thread changes, and every time a thread's
@@ -3022,58 +3641,6 @@
// //
/////////////////////////////////////////////////////////
-// QQQ move this somewhere else
-typedef struct { ULong ull; ExeContext* ec; } ULong_n_EC;
-
-/* How many of the above records to collect for each thread? Older
- ones are dumped when we run out of space. 62.5k requires 1MB per
- thread, since each ULong_n_EC record is 16 bytes long. When more
- than N_KWs_N_STACKs_PER_THREAD are present, the older half are
- deleted to make space. Hence in the worst case we will be able to
- produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
- Kw transitions (segments in this thread). For the current setting
- that gives a guaranteed stack for at least the last 31.25k
- segments. */
-#define N_KWs_N_STACKs_PER_THREAD 62500
-
-
-struct _Thr {
- /* Current VTSs for this thread. They change as we go along. viR
- is the VTS to be used for reads, viW for writes. Usually they
- are the same, but can differ when we deal with reader-writer
- locks. It is always the case that
- VtsID__cmpLEQ(viW,viR) == True
- that is, viW must be the same, or lagging behind, viR. */
- VtsID viR;
- VtsID viW;
-
- /* Is initially False, and is set to true after the thread really
- has done a low-level exit. */
- Bool still_alive;
-
- /* A small integer giving a unique identity to this Thr. See
- comments on the definition of ScalarTS for details. */
- ThrID thrid : SCALARTS_N_THRBITS;
-
- /* A filter that removes references for which we believe that
- msmcread/msmcwrite will not change the state, nor report a
- race. */
- Filter* filter;
-
- /* A pointer back to the top level Thread structure. There is a
- 1-1 mapping between Thread and Thr structures -- each Thr points
- at its corresponding Thread, and vice versa. Really, Thr and
- Thread should be merged into a single structure. */
- Thread* hgthread;
-
- /* The ULongs (scalar Kws) in this accumulate in strictly
- increasing order, without duplicates. This is important because
- we need to be able to find a given scalar Kw in this array
- later, by binary search. */
- XArray* /* ULong_n_EC */ local_Kws_n_stacks;
-};
-
-
/* Maps ThrID values to their Thr*s (which contain ThrID values that
should point back to the relevant slot in the array. Lowest
numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
@@ -3097,7 +3664,8 @@
Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
thr->viR = VtsID_INVALID;
thr->viW = VtsID_INVALID;
- thr->still_alive = True;
+ thr->llexit_done = False;
+ thr->joinedwith_done = False;
thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
/* We only really need this at history level 1, but unfortunately
this routine is called before the command line processing is
@@ -4441,7 +5009,7 @@
we should search a bit further along the array than
lastIx+1 if hist1_seg_end is NULL. */
} else {
- if (confThr->still_alive)
+ if (!confThr->llexit_done)
hist1_seg_end = main_get_EC( confThr );
}
// seg_start could be NULL iff this is the first stack in the thread
@@ -5489,7 +6057,7 @@
{
if (0) VG_(printf)("resume %p\n", thr);
tl_assert(thr);
- tl_assert(thr->still_alive);
+ tl_assert(!thr->llexit_done);
Filter__clear(thr->filter, "libhb_Thr_resumes");
/* A kludge, but .. if this thread doesn't have any marker stacks
at all, get one right now. This is easier than figuring out
@@ -5509,23 +6077,31 @@
// //
/////////////////////////////////////////////////////////
-// (UInt) `echo "Synchronisation object" | md5sum`
-#define SO_MAGIC 0x56b3c5b0U
+/* A double linked list of all the SO's. */
+SO* admin_SO = NULL;
-struct _SO {
- VtsID viR; /* r-clock of sender */
- VtsID viW; /* w-clock of sender */
- UInt magic;
-};
-
-static SO* SO__Alloc ( void ) {
+static SO* SO__Alloc ( void )
+{
SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
so->viR = VtsID_INVALID;
so->viW = VtsID_INVALID;
so->magic = SO_MAGIC;
+ /* Add to double linked list */
+ if (admin_SO) {
+ tl_assert(admin_SO->admin_prev == NULL);
+ admin_SO->admin_prev = so;
+ so->admin_next = admin_SO;
+ } else {
+ so->admin_next = NULL;
+ }
+ so->admin_prev = NULL;
+ admin_SO = so;
+ /* */
return so;
}
-static void SO__Dealloc ( SO* so ) {
+
+static void SO__Dealloc ( SO* so )
+{
tl_assert(so);
tl_assert(so->magic == SO_MAGIC);
if (so->viR == VtsID_INVALID) {
@@ -5536,6 +6112,14 @@
VtsID__rcdec(so->viW);
}
so->magic = 0;
+ /* Del from double linked list */
+ if (so->admin_prev)
+ so->admin_prev->admin_next = so->admin_next;
+ if (so->admin_next)
+ so->admin_next->admin_prev = so->admin_prev;
+ if (so == admin_SO)
+ admin_SO = so->admin_next;
+ /* */
HG_(free)( so );
}
@@ -5589,6 +6173,7 @@
VTS singleton, tick and join operations. */
temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
temp_max_sized_VTS->id = VtsID_INVALID;
+ verydead_thread_table_init();
vts_set_init();
vts_tab_init();
event_map_init();
@@ -5780,11 +6365,14 @@
}
}
+/* Receive notification that a thread has low level exited. The
+ significance here is that we do not expect to see any more memory
+ references from it. */
void libhb_async_exit ( Thr* thr )
{
tl_assert(thr);
- tl_assert(thr->still_alive);
- thr->still_alive = False;
+ tl_assert(!thr->llexit_done);
+ thr->llexit_done = True;
/* free up Filter and local_Kws_n_stacks (well, actually not the
latter ..) */
@@ -5792,6 +6380,12 @@
HG_(free)(thr->filter);
thr->filter = NULL;
+ /* Tell the VTS mechanism this thread has exited, so it can
+ participate in VTS pruning. Note this can only happen if the
+ thread has both ll_exited and has been joined with. */
+ if (thr->joinedwith_done)
+ VTS__declare_thread_very_dead(thr);
+
/* Another space-accuracy tradeoff. Do we want to be able to show
H1 history for conflicts in threads which have since exited? If
yes, then we better not free up thr->local_Kws_n_stacks. The
@@ -5803,6 +6397,20 @@
// thr->local_Kws_n_stacks = NULL;
}
+/* Receive notification that a thread has been joined with. The
+ significance here is that we do not expect to see any further
+ references to its vector clocks (Thr::viR and Thr::viW). */
+void libhb_joinedwith_done ( Thr* thr )
+{
+ tl_assert(thr);
+ /* Caller must ensure that this is only ever called once per Thr. */
+ tl_assert(!thr->joinedwith_done);
+ thr->joinedwith_done = True;
+ if (thr->llexit_done)
+ VTS__declare_thread_very_dead(thr);
+}
+
+
/* Both Segs and SOs point to VTSs. However, there is no sharing, so
a Seg that points at a VTS is its one-and-only owner, and ditto for
a SO that points at a VTS. */
@@ -5861,7 +6469,7 @@
VtsID__rcdec(thr->viW);
thr->viR = VtsID__tick( thr->viR, thr );
thr->viW = VtsID__tick( thr->viW, thr );
- if (thr->still_alive) {
+ if (!thr->llexit_done) {
Filter__clear(thr->filter, "libhb_so_send");
note_local_Kw_n_stack_for(thr);
}
Modified: branches/HGDEV2/include/pub_tool_xarray.h
===================================================================
--- branches/HGDEV2/include/pub_tool_xarray.h 2011-03-20 12:11:21 UTC (rev 11657)
+++ branches/HGDEV2/include/pub_tool_xarray.h 2011-03-20 12:50:48 UTC (rev 11658)
@@ -79,8 +79,8 @@
/* Lookup (by binary search) 'key' in the array. Set *first to be the
index of the first, and *last to be the index of the last matching
value found. If any values are found, return True, else return
- False, and don't change *first or *last. Bomb if the array is not
- sorted. */
+ False, and don't change *first or *last. first and/or last may be
+ NULL. Bomb if the array is not sorted. */
extern Bool VG_(lookupXA) ( XArray*, void* key,
/*OUT*/Word* first, /*OUT*/Word* last );
|