|
From: <sv...@va...> - 2015-04-25 14:53:43
|
Author: philippe
Date: Sat Apr 25 15:53:35 2015
New Revision: 15142
Log:
Replace adler32 by sdbm_hash in m_deduppoolalloc.c
adler32 is not very good as a hash function.
sdbm_hash gives more different keys that adler32,
and in a large majority of the cases, shorter chains.
Modified:
trunk/coregrind/m_deduppoolalloc.c
Modified: trunk/coregrind/m_deduppoolalloc.c
==============================================================================
--- trunk/coregrind/m_deduppoolalloc.c (original)
+++ trunk/coregrind/m_deduppoolalloc.c Sat Apr 25 15:53:35 2015
@@ -231,6 +231,19 @@
ddpa->ht_node_pa = NULL;
}
+
+// hash function used by gawk and SDBM.
+static UInt sdbm_hash (const UChar* buf, UInt len )
+{
+ UInt h;
+ UInt i;
+
+ h = 0;
+ for (i = 0; i < len; i++)
+ h = *buf++ + (h<<6) + (h<<16) - h;
+ return h;
+}
+
const void* VG_(allocEltDedupPA) (DedupPoolAlloc *ddpa, SizeT eltSzB,
const void *elt)
{
@@ -243,14 +256,7 @@
ddpa->nr_alloc_calls++;
- // Currently using adler32 as hash function.
- // Many references tells adler32 is bad as a hash function.
- // And effectively, some tests on dwarf debug string shows
- // a lot of collisions (at least for short elements).
- // (A lot can be 10% of the elements colliding, even on
- // small nr of elements such as 10_000).
- ht_elt.key = VG_(adler32) (0, NULL, 0);
- ht_elt.key = VG_(adler32) (ht_elt.key, elt, eltSzB);
+ ht_elt.key = sdbm_hash (elt, eltSzB);
ht_elt.eltSzB = eltSzB;
ht_elt.elt = elt;
|