|
From: <sv...@va...> - 2008-03-04 21:45:09
|
Author: sewardj
Date: 2008-03-04 21:45:07 +0000 (Tue, 04 Mar 2008)
New Revision: 7566
Log:
Make happens-before caching more 32-bit friendly. Also, make the
table much smaller as that appears to have minimal effect on the hit
rate but trashes D1 less.
Modified:
branches/HGDEV/auxprogs/primes.c
branches/HGDEV/helgrind/hg_main.c
Modified: branches/HGDEV/auxprogs/primes.c
===================================================================
--- branches/HGDEV/auxprogs/primes.c 2008-03-04 20:32:23 UTC (rev 7565)
+++ branches/HGDEV/auxprogs/primes.c 2008-03-04 21:45:07 UTC (rev 7566)
@@ -1,6 +1,8 @@
#include <stdio.h>
#include <math.h>
+#include <stdlib.h>
+#include <assert.h>
int isprime ( int n )
{
@@ -13,8 +15,14 @@
int main ( int argc, char** argv )
{
- int i;
- for (i = 79000; i < 81000; i++)
+ int i, start;
+ if (argc != 2) {
+ fprintf(stderr, "usage: %s <number>\n", argv[0]);
+ return 1;
+ }
+ start = atoi(argv[1]);
+ assert(start >= 2);
+ for (i = start; i < start+2000; i++)
if (isprime(i)) { printf ( "%d ", i ); fflush(stdout); }
return 0;
}
Modified: branches/HGDEV/helgrind/hg_main.c
===================================================================
--- branches/HGDEV/helgrind/hg_main.c 2008-03-04 20:32:23 UTC (rev 7565)
+++ branches/HGDEV/helgrind/hg_main.c 2008-03-04 21:45:07 UTC (rev 7566)
@@ -2125,54 +2125,60 @@
return reachable;
}
+
/*--------------- the happens_before hash table ---------------*/
-
-
-// HBEFORE__N_HTABLE should be a prime number
+// HBEFORE__N_HTABLE should be a prime number. But not a large prime
+// number, as that just causes D1 misses and slows things down.
// #define HBEFORE__N_HTABLE 104729
// #define HBEFORE__N_HTABLE 399989
// #define HBEFORE__N_HTABLE 49999
- #define HBEFORE__N_HTABLE 19997
+// #define HBEFORE__N_HTABLE 19997
+// #define HBEFORE__N_HTABLE 9973
+#define HBEFORE__N_HTABLE 4999
-
// Simple closed hash table with prime size.
// Each entry is a 64-bit value:
// bits 0-31: SegmentID-2
// bits 32-62: SegmentID-1
// bit 63: happens-before(SegmentID-1, SegmentID-2)
-static ULong hbefore__hash_table[HBEFORE__N_HTABLE];
+
+static
+ struct { Bool res; SegmentID segid1; SegmentID segid2; }
+ hbefore__hash_table[HBEFORE__N_HTABLE];
+
static void hbefore__invalidate_htable ( void )
{
stats__hbefore_invals++;
VG_(memset)(hbefore__hash_table, 0, sizeof(hbefore__hash_table));
}
+static inline UInt ROL32 ( UInt w, Int n )
+{
+ w = (w << n) | (w >> (32-n));
+ return w;
+}
-
static Bool happens_before ( SegmentID segid1, SegmentID segid2 )
{
- ULong seg_1_and_2 = ((ULong)segid1) << 32 | (ULong)(segid2);
Bool hbG, hbV;
Segment *seg1, *seg2;
+ UInt hash;
+ tl_assert(sizeof(SegmentID) == sizeof(UInt));
tl_assert(SEG_id_is_sane(segid1));
tl_assert(SEG_id_is_sane(segid2));
tl_assert(segid1 != segid2);
stats__hbefore_queries++;
+ hash = (ROL32(segid1,19) ^ ROL32(segid2,13)) % HBEFORE__N_HTABLE;
{ // try hash table
// Hmm, % on ULong is OK on 64-bit machine (32ish cycles on Core2)
// but really bad on 32-bit (there is a call to umoddi3 and that
// can be many tens of instructions)
- ULong cached = hbefore__hash_table[seg_1_and_2 % HBEFORE__N_HTABLE];
- if (cached == seg_1_and_2) {
- stats__hbefore_hits++;
- return False;
- }
-
- if (cached == (seg_1_and_2 | (1ULL << 63))) {
- stats__hbefore_hits++;
- return True;
- }
+ if (hbefore__hash_table[hash].segid1 == segid1
+ && hbefore__hash_table[hash].segid2 == segid2) {
+ stats__hbefore_hits++;
+ return hbefore__hash_table[hash].res;
+ }
}
/* Not found. Search the graph and add an entry to the cache. */
@@ -2206,14 +2212,16 @@
tl_assert(hbV == hbG);
{ // remember the results in the hash table
- ULong cache = hbG ? (seg_1_and_2 | (1ULL << 63)) : seg_1_and_2;
- hbefore__hash_table[seg_1_and_2 % HBEFORE__N_HTABLE] = cache;
+ hbefore__hash_table[hash].segid1 = segid1;
+ hbefore__hash_table[hash].segid2 = segid2;
+ hbefore__hash_table[hash].res = hbG;
}
if (0)
VG_(printf)("hb %d %d\n", (Int)segid1, (Int)segid2);
return hbG;
}
+
/*--------------- generating .vcg output ---------------*/
static void segments__generate_vcg ( void )
|