|
From: <sv...@va...> - 2008-10-12 22:32:02
|
Author: sewardj
Date: 2008-10-12 23:31:51 +0100 (Sun, 12 Oct 2008)
New Revision: 8668
Log:
Add a caching mechanism for searches of the DebugInfo.cfsi array.
This speeds up stack unwinding on amd64-linux by about 50%.
Modified:
branches/YARD/coregrind/m_debuginfo/debuginfo.c
branches/YARD/coregrind/m_debuginfo/misc.c
branches/YARD/coregrind/m_debuginfo/priv_storage.h
branches/YARD/coregrind/m_debuginfo/storage.c
Modified: branches/YARD/coregrind/m_debuginfo/debuginfo.c
===================================================================
--- branches/YARD/coregrind/m_debuginfo/debuginfo.c 2008-10-12 19:53:28 UTC (rev 8667)
+++ branches/YARD/coregrind/m_debuginfo/debuginfo.c 2008-10-12 22:31:51 UTC (rev 8668)
@@ -171,8 +171,8 @@
di->memname = memname ? ML_(dinfo_strdup)("di.debuginfo.aDI.3", memname)
: NULL;
- /* Everything else -- pointers, sizes, arrays -- is zeroed by calloc.
- Now set up the debugging-output flags. */
+ /* Everything else -- pointers, sizes, arrays -- is zeroed by
+ ML_(dinfo_zalloc). Now set up the debugging-output flags. */
traceme
= VG_(string_match)( VG_(clo_trace_symtab_patt), filename )
|| (memname && VG_(string_match)( VG_(clo_trace_symtab_patt),
Modified: branches/YARD/coregrind/m_debuginfo/misc.c
===================================================================
--- branches/YARD/coregrind/m_debuginfo/misc.c 2008-10-12 19:53:28 UTC (rev 8667)
+++ branches/YARD/coregrind/m_debuginfo/misc.c 2008-10-12 22:31:51 UTC (rev 8668)
@@ -42,6 +42,8 @@
#include "priv_misc.h" /* self */
+/* Various functions rely on this returning zeroed memory.
+ alloc_DebugInfo is one of them. */
void* ML_(dinfo_zalloc) ( HChar* cc, SizeT szB ) {
void* v;
vg_assert(szB > 0);
Modified: branches/YARD/coregrind/m_debuginfo/priv_storage.h
===================================================================
--- branches/YARD/coregrind/m_debuginfo/priv_storage.h 2008-10-12 19:53:28 UTC (rev 8667)
+++ branches/YARD/coregrind/m_debuginfo/priv_storage.h 2008-10-12 22:31:51 UTC (rev 8668)
@@ -241,6 +241,8 @@
#define SEGINFO_STRCHUNKSIZE (64*1024)
+#define N_CFSI_SEARCH_CACHE 8
+
struct _DebugInfo {
/* Admin stuff */
@@ -362,11 +364,20 @@
records require any expression nodes, they are stored in
cfsi_exprs. */
DiCfSI* cfsi;
- UInt cfsi_used;
- UInt cfsi_size;
+ UWord cfsi_used;
+ UWord cfsi_size;
Addr cfsi_minavma;
Addr cfsi_maxavma;
XArray* cfsi_exprs; /* XArray of CfiExpr */
+ /* Stack unwinding on amd64 causes a lot of searching in .cfsi to
+ find the DiCfSI record that covers a particular address. To
+ speed up the searches we add a small (8-entry) cache containing
+ cached results from ML_(search_one_cfitab). This speeds up the
+ searching by about a factor of 3 and overall increases the stack
+ unwind speed by about 50% on amd64-linux on large C++ apps. */
+ UWord cfsi_search_cache_used; /* 0 .. N_CFSI_SEARCH_CACHE */
+ struct { Addr aMin; Addr aMax; Word ix; }
+ cfsi_search_cache[N_CFSI_SEARCH_CACHE];
/* Expandable arrays of characters -- the string table. Pointers
into this are stable (the arrays are not reallocated). */
@@ -464,7 +475,7 @@
/* Find a CFI-table index containing the specified pointer, or -1 if
not found. Binary search. */
-extern Int ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr );
+extern Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr );
/* ------ Misc ------ */
Modified: branches/YARD/coregrind/m_debuginfo/storage.c
===================================================================
--- branches/YARD/coregrind/m_debuginfo/storage.c 2008-10-12 19:53:28 UTC (rev 8667)
+++ branches/YARD/coregrind/m_debuginfo/storage.c 2008-10-12 22:31:51 UTC (rev 8668)
@@ -365,6 +365,9 @@
ML_(ppDiCfSI)(di->cfsi_exprs, cfsi);
}
+ /* invalidate the lookup cache */
+ di->cfsi_search_cache_used = 0;
+
/* sanity */
vg_assert(cfsi->len > 0);
/* If this fails, the implication is you have a single procedure
@@ -1285,8 +1288,11 @@
di->cfsi_maxavma = here_max;
}
+ /* invalidate the lookup cache */
+ di->cfsi_search_cache_used = 0;
+
if (di->trace_cfi)
- VG_(printf)("canonicaliseCfiSI: %d entries, %#lx .. %#lx\n",
+ VG_(printf)("canonicaliseCfiSI: %ld entries, %#lx .. %#lx\n",
di->cfsi_used,
di->cfsi_minavma, di->cfsi_maxavma);
@@ -1420,11 +1426,12 @@
/* Find a CFI-table index containing the specified pointer, or -1
if not found. Binary search. */
-
-Int ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr )
+__attribute__((noinline))
+static
+Word search_one_cfitab_WRK ( struct _DebugInfo* di, Addr ptr )
{
Addr a_mid_lo, a_mid_hi;
- Int mid, size,
+ Word mid, size,
lo = 0,
hi = di->cfsi_used-1;
while (True) {
@@ -1442,7 +1449,57 @@
}
}
+/* Find the CFI-table index containing the specified pointer,
+ or -1 if not found. It uses search_one_cfitab_WRK to do the
+ real work, but caches the results so as to avoid calling
+ search_one_cfitab_WRK nost of the time. */
+Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr )
+{
+ static UWord nq = 0;
+ static UWord nm = 0;
+ Word i, w;
+
+ if (0 && 0 == (nq & 0x3FFFF))
+ VG_(printf)("ZZZZZ %lu qs %lu misses\n", nq,nm);
+
+ nq++;
+ /* Check the cache first */
+ for (i = 0; i < di->cfsi_search_cache_used; i++) {
+ if (di->cfsi_search_cache[i].aMin <= ptr
+ && ptr <= di->cfsi_search_cache[i].aMax) {
+ /* found it. Once in every 16 searches, move the found element
+ one step closer to the front. */
+ if (i > 0 && 0 == (nq & 0xF)) {
+ __typeof__(di->cfsi_search_cache[0]) tmp;
+ tmp = di->cfsi_search_cache[i-1];
+ di->cfsi_search_cache[i-1] = di->cfsi_search_cache[i];
+ di->cfsi_search_cache[i] = tmp;
+ i--;
+ }
+ return di->cfsi_search_cache[i].ix;
+ }
+ }
+ /* not found in the cache; do slow search */
+ nm++;
+ w = search_one_cfitab_WRK(di, ptr);
+ if (w >= 0) {
+ /* We got a result, so update the cache. Slide all entries
+ along one and insert new one at [0]. */
+ for (i = N_CFSI_SEARCH_CACHE-1; i >= 1; i--) {
+ di->cfsi_search_cache[i] = di->cfsi_search_cache[i-1];
+ }
+ if (di->cfsi_search_cache_used < N_CFSI_SEARCH_CACHE)
+ di->cfsi_search_cache_used++;
+ tl_assert(di->cfsi[w].len > 0);
+ di->cfsi_search_cache[0].aMin = di->cfsi[w].base;
+ di->cfsi_search_cache[0].aMax = di->cfsi[w].base + di->cfsi[w].len - 1;
+ di->cfsi_search_cache[0].ix = w;
+ }
+ return w;
+}
+
+
/*--------------------------------------------------------------------*/
/*--- end ---*/
/*--------------------------------------------------------------------*/
|