|
From: <sv...@va...> - 2014-07-04 22:36:50
|
Author: philippe
Date: Fri Jul 4 22:36:38 2014
New Revision: 14129
Log:
This patch decreases significantly the memory needed to store the cfsi info.
On a big executable, the trunk needs:
dinfo: 155844608/106737664 max/curr mmap'd 155572624/102276760 max/curr
With the patch, we have:
dinfo: 134873088/70389760 max/curr mmap'd 134607808/66717512 max/curr
So, peak dinfo memory decreases by 21Mb, and final by 36Mb.
The memory decrease is obtained by:
* using a dedup pool to store the machine dependent part (cfsi_m)
of the cfsi information as this information is highly duplicated.
For x86 and arm64, the duplication factor of cfsi machine dependent
part is very high (up to a factor 60).
For arm64, it is more like a factor 3.
A 'variable size' (1, 2 or 4 bytes) is automatically used to identify
the cfsi_m, if there is less than or more than 255/64K different cfsi_m.
* not storing explicitely the length of a range for which a cfsi_m
is to be used: in a large majority of the cases, ranges are
consecutive, and so the end of a range is just one byte before
the start of the next range.
So, we do not store the length of the ranges.
If there is a hole between 2 ranges, the hole is stored explicitely
as a range in which we have no cfsi_m information.
On x86 and amd64, we have quite some holes (something like one hole
every 7 cfsi). On arm64, we have very few holes (less than one hole
every 50 cfsi).
Even with the nr of holes on x86/amd64, it is more memory efficient
to store the holes rather than to store the length of each cfsi.
* Merging consecutive ranges that have the same cfsi_m info:
Many cfsi are "mergeable": there is no hole between 2 cfsi, and their
machine dependent part is identical
(I guess the unwind info needed by valgrind is subset of the full
unwind info, and so, the cfsi entries are not merged by the compiler,
but can be merged for simple unwind). Depending on the platform
(x86, amd64, arm64) and of the library/object file, we can have a
significant nr of mergeable entries.
The patch is not very small, but a lot is mechanical changes.
The patch has been compiled and tested on x86/amd64/ppc32/ppc64
(but ppc does not use cfsi so that just verifies it compiles).
It has been compiled on arm64, and "tested" by launching valgrind on
one executable.
It has not been compiled on s390 and mips.
With some luck, maybe it will compile on these platforms.
And if that uses the whole provision of luck for 2014, it might even work
on these platforms :).
If it does not compile, the fix should be straightforward.
Runtime problems might be more tricky (but arm64 "worked out of the box"
once x86/amd64 were ok).
This has also be tested in an outer/inner setup, to verify no memory leak/bugs.
Modified:
trunk/coregrind/m_debuginfo/debuginfo.c
trunk/coregrind/m_debuginfo/priv_storage.h
trunk/coregrind/m_debuginfo/readdwarf.c
trunk/coregrind/m_debuginfo/readelf.c
trunk/coregrind/m_debuginfo/storage.c
Modified: trunk/coregrind/m_debuginfo/debuginfo.c
==============================================================================
--- trunk/coregrind/m_debuginfo/debuginfo.c (original)
+++ trunk/coregrind/m_debuginfo/debuginfo.c Fri Jul 4 22:36:38 2014
@@ -102,7 +102,7 @@
/*------------------------------------------------------------*/
static UInt CF_info_generation = 0;
-static void cfsi_cache__invalidate ( void );
+static void cfsi_m_cache__invalidate ( void );
/*------------------------------------------------------------*/
@@ -213,7 +213,10 @@
if (di->soname) ML_(dinfo_free)(di->soname);
if (di->loctab) ML_(dinfo_free)(di->loctab);
if (di->inltab) ML_(dinfo_free)(di->inltab);
- if (di->cfsi) ML_(dinfo_free)(di->cfsi);
+ if (di->cfsi_base) ML_(dinfo_free)(di->cfsi_base);
+ if (di->cfsi_m_ix) ML_(dinfo_free)(di->cfsi_m_ix);
+ if (di->cfsi_rd) ML_(dinfo_free)(di->cfsi_rd);
+ if (di->cfsi_m_pool) VG_(deleteDedupPA)(di->cfsi_m_pool);
if (di->cfsi_exprs) VG_(deleteXA)(di->cfsi_exprs);
if (di->fpo) ML_(dinfo_free)(di->fpo);
@@ -520,7 +523,7 @@
di2 = NULL;
/* invariant (2) */
- if (di->cfsi) {
+ if (di->cfsi_rd) {
vg_assert(di->cfsi_minavma <= di->cfsi_maxavma); /* duh! */
/* Assume the csfi fits completely into one individual mapping
for now. This might need to be improved/reworked later. */
@@ -532,25 +535,25 @@
/* degenerate case: all r-x sections are empty */
if (!has_nonempty_rx) {
- vg_assert(di->cfsi == NULL);
+ vg_assert(di->cfsi_rd == NULL);
return;
}
/* invariant (2) - cont. */
- if (di->cfsi)
+ if (di->cfsi_rd)
vg_assert(cfsi_fits);
/* invariants (3) and (4) */
- if (di->cfsi) {
+ if (di->cfsi_rd) {
vg_assert(di->cfsi_used > 0);
vg_assert(di->cfsi_size > 0);
for (i = 0; i < di->cfsi_used; i++) {
- DiCfSI* cfsi = &di->cfsi[i];
+ DiCfSI* cfsi = &di->cfsi_rd[i];
vg_assert(cfsi->len > 0);
vg_assert(cfsi->base >= di->cfsi_minavma);
vg_assert(cfsi->base + cfsi->len - 1 <= di->cfsi_maxavma);
if (i > 0) {
- DiCfSI* cfsip = &di->cfsi[i-1];
+ DiCfSI* cfsip = &di->cfsi_rd[i-1];
vg_assert(cfsip->base + cfsip->len <= cfsi->base);
}
}
@@ -576,7 +579,7 @@
vg_assert(debugInfo_list == NULL);
/* flush the CFI fast query cache. */
- cfsi_cache__invalidate();
+ cfsi_m_cache__invalidate();
}
@@ -637,9 +640,14 @@
TRACE_SYMTAB("\n------ Canonicalising the "
"acquired info ------\n");
/* invalidate the CFI unwind cache. */
- cfsi_cache__invalidate();
+ cfsi_m_cache__invalidate();
/* prepare read data for use */
ML_(canonicaliseTables)( di );
+ /* Check invariants listed in
+ Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in
+ priv_storage.h. */
+ check_CFSI_related_invariants(di);
+ ML_(finish_CFSI_arrays)(di);
/* notify m_redir about it */
TRACE_SYMTAB("\n------ Notifying m_redir ------\n");
VG_(redir_notify_new_DebugInfo)( di );
@@ -647,10 +655,6 @@
di->have_dinfo = True;
tl_assert(di->handle > 0);
di_handle = di->handle;
- /* Check invariants listed in
- Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in
- priv_storage.h. */
- check_CFSI_related_invariants(di);
} else {
TRACE_SYMTAB("\n------ ELF reading failed ------\n");
@@ -940,7 +944,7 @@
if (0) VG_(printf)("DISCARD %#lx %#lx\n", a, a+len);
anyFound = discard_syms_in_range(a, len);
if (anyFound)
- cfsi_cache__invalidate();
+ cfsi_m_cache__invalidate();
}
@@ -957,7 +961,7 @@
if (0 && !exe_ok) {
Bool anyFound = discard_syms_in_range(a, len);
if (anyFound)
- cfsi_cache__invalidate();
+ cfsi_m_cache__invalidate();
}
}
@@ -1237,7 +1241,7 @@
/* play safe; always invalidate the CFI cache. I don't know if
this is necessary, but anyway .. */
- cfsi_cache__invalidate();
+ cfsi_m_cache__invalidate();
/* dump old info for this range, if any */
discard_syms_in_range( avma_obj, total_size );
@@ -2364,17 +2368,17 @@
}
-/* Search all the DebugInfos in the entire system, to find the DiCfSI
+/* Search all the DebugInfos in the entire system, to find the DiCfSI_m
that pertains to 'ip'.
If found, set *diP to the DebugInfo in which it resides, and
- *ixP to the index in that DebugInfo's cfsi array.
+ *cfsi_mP to the cfsi_m pointer in that DebugInfo's cfsi_m_pool.
- If not found, set *diP to (DebugInfo*)1 and *ixP to zero.
+ If not found, set *diP to (DebugInfo*)1 and *cfsi_mP to zero.
*/
__attribute__((noinline))
static void find_DiCfSI ( /*OUT*/DebugInfo** diP,
- /*OUT*/Word* ixP,
+ /*OUT*/DiCfSI_m** cfsi_mP,
Addr ip )
{
DebugInfo* di;
@@ -2411,17 +2415,22 @@
/* we didn't find it. */
*diP = (DebugInfo*)1;
- *ixP = 0;
+ *cfsi_mP = 0;
} else {
- /* found it. */
+ /* found a di corresponding to ip. */
/* ensure that di is 4-aligned (at least), so it can't possibly
be equal to (DebugInfo*)1. */
vg_assert(di && VG_IS_4_ALIGNED(di));
- vg_assert(i >= 0 && i < di->cfsi_used);
- *diP = di;
- *ixP = i;
+ *cfsi_mP = ML_(get_cfsi_m) (di, i);
+ if (*cfsi_mP == NULL) {
+ // This is a cfsi hole. Report no cfi information found.
+ *diP = (DebugInfo*)1;
+ // But we will still perform the hack below.
+ } else {
+ *diP = di;
+ }
/* Start of performance-enhancing hack: once every 64 (chosen
hackily after profiling) successful searches, move the found
@@ -2449,35 +2458,34 @@
/* Now follows a mechanism for caching queries to find_DiCfSI, since
they are extremely frequent on amd64-linux, during stack unwinding.
- Each cache entry binds an ip value to a (di, ix) pair. Possible
+ Each cache entry binds an ip value to a (di, cfsi_m*) pair. Possible
values:
- di is non-null, ix >= 0 ==> cache slot in use, "di->cfsi[ix]"
- di is (DebugInfo*)1 ==> cache slot in use, no associated di
- di is NULL ==> cache slot not in use
+ di is non-null, cfsi_m* >= 0 ==> cache slot in use, "cfsi_m*"
+ di is (DebugInfo*)1 ==> cache slot in use, no associated di
+ di is NULL ==> cache slot not in use
Hence simply zeroing out the entire cache invalidates all
entries.
- Why not map ip values directly to DiCfSI*'s? Because this would
- cause problems if/when the cfsi array is moved due to resizing.
- Instead we cache .cfsi array index value, which should be invariant
- across resizing. (That said, I don't think the current
- implementation will resize whilst during queries, since the DiCfSI
- records are added all at once, when the debuginfo for an object is
- read, and is not changed ever thereafter. */
-
-// Prime number, giving about 3K cache on 32 bits, 6K cache on 64 bits.
-#define N_CFSI_CACHE 509
+ We can map an ip value directly to a (di, cfsi_m*) pair as
+ once a DebugInfo is read, adding new DiCfSI_m* is not possible
+ anymore, as the cfsi_m_pool is frozen once the reading is terminated.
+ Also, the cache is invalidated when new debuginfo is read due to
+ an mmap or some debuginfo is discarded due to an munmap. */
+
+// Prime number, giving about 6Kbytes cache on 32 bits,
+// 12Kbytes cache on 64 bits.
+#define N_CFSI_M_CACHE 509
typedef
- struct { Addr ip; DebugInfo* di; Word ix; }
- CFSICacheEnt;
+ struct { Addr ip; DebugInfo* di; DiCfSI_m* cfsi_m; }
+ CFSI_m_CacheEnt;
-static CFSICacheEnt cfsi_cache[N_CFSI_CACHE];
+static CFSI_m_CacheEnt cfsi_m_cache[N_CFSI_M_CACHE];
-static void cfsi_cache__invalidate ( void ) {
- VG_(memset)(&cfsi_cache, 0, sizeof(cfsi_cache));
+static void cfsi_m_cache__invalidate ( void ) {
+ VG_(memset)(&cfsi_m_cache, 0, sizeof(cfsi_m_cache));
CF_info_generation++;
}
@@ -2486,10 +2494,10 @@
return CF_info_generation;
}
-static inline CFSICacheEnt* cfsi_cache__find ( Addr ip )
+static inline CFSI_m_CacheEnt* cfsi_m_cache__find ( Addr ip )
{
- UWord hash = ip % N_CFSI_CACHE;
- CFSICacheEnt* ce = &cfsi_cache[hash];
+ UWord hash = ip % N_CFSI_M_CACHE;
+ CFSI_m_CacheEnt* ce = &cfsi_m_cache[hash];
static UWord n_q = 0, n_m = 0;
n_q++;
@@ -2502,7 +2510,7 @@
/* not found in cache. Search and update. */
n_m++;
ce->ip = ip;
- find_DiCfSI( &ce->di, &ce->ix, ip );
+ find_DiCfSI( &ce->di, &ce->cfsi_m, ip );
}
if (UNLIKELY(ce->di == (DebugInfo*)1)) {
@@ -2518,7 +2526,7 @@
inline
static Addr compute_cfa ( D3UnwindRegs* uregs,
Addr min_accessible, Addr max_accessible,
- DebugInfo* di, DiCfSI* cfsi )
+ DebugInfo* di, DiCfSI_m* cfsi_m )
{
CfiExprEvalContext eec;
Addr cfa;
@@ -2526,34 +2534,34 @@
/* Compute the CFA. */
cfa = 0;
- switch (cfsi->cfa_how) {
+ switch (cfsi_m->cfa_how) {
# if defined(VGA_x86) || defined(VGA_amd64)
case CFIC_IA_SPREL:
- cfa = cfsi->cfa_off + uregs->xsp;
+ cfa = cfsi_m->cfa_off + uregs->xsp;
break;
case CFIC_IA_BPREL:
- cfa = cfsi->cfa_off + uregs->xbp;
+ cfa = cfsi_m->cfa_off + uregs->xbp;
break;
# elif defined(VGA_arm)
case CFIC_ARM_R13REL:
- cfa = cfsi->cfa_off + uregs->r13;
+ cfa = cfsi_m->cfa_off + uregs->r13;
break;
case CFIC_ARM_R12REL:
- cfa = cfsi->cfa_off + uregs->r12;
+ cfa = cfsi_m->cfa_off + uregs->r12;
break;
case CFIC_ARM_R11REL:
- cfa = cfsi->cfa_off + uregs->r11;
+ cfa = cfsi_m->cfa_off + uregs->r11;
break;
case CFIC_ARM_R7REL:
- cfa = cfsi->cfa_off + uregs->r7;
+ cfa = cfsi_m->cfa_off + uregs->r7;
break;
# elif defined(VGA_s390x)
case CFIC_IA_SPREL:
- cfa = cfsi->cfa_off + uregs->sp;
+ cfa = cfsi_m->cfa_off + uregs->sp;
break;
case CFIR_MEMCFAREL:
{
- Addr a = uregs->sp + cfsi->cfa_off;
+ Addr a = uregs->sp + cfsi_m->cfa_off;
if (a < min_accessible || a > max_accessible-sizeof(Addr))
break;
cfa = ML_(read_Addr)((void *)a);
@@ -2563,25 +2571,25 @@
cfa = uregs->fp;
break;
case CFIC_IA_BPREL:
- cfa = cfsi->cfa_off + uregs->fp;
+ cfa = cfsi_m->cfa_off + uregs->fp;
break;
# elif defined(VGA_mips32) || defined(VGA_mips64)
case CFIC_IA_SPREL:
- cfa = cfsi->cfa_off + uregs->sp;
+ cfa = cfsi_m->cfa_off + uregs->sp;
break;
case CFIR_SAME:
cfa = uregs->fp;
break;
case CFIC_IA_BPREL:
- cfa = cfsi->cfa_off + uregs->fp;
+ cfa = cfsi_m->cfa_off + uregs->fp;
break;
# elif defined(VGA_ppc32) || defined(VGA_ppc64)
# elif defined(VGP_arm64_linux)
case CFIC_ARM64_SPREL:
- cfa = cfsi->cfa_off + uregs->sp;
+ cfa = cfsi_m->cfa_off + uregs->sp;
break;
case CFIC_ARM64_X29REL:
- cfa = cfsi->cfa_off + uregs->x29;
+ cfa = cfsi_m->cfa_off + uregs->x29;
break;
# else
# error "Unsupported arch"
@@ -2589,14 +2597,14 @@
case CFIC_EXPR: /* available on all archs */
if (0) {
VG_(printf)("CFIC_EXPR: ");
- ML_(ppCfiExpr)(di->cfsi_exprs, cfsi->cfa_off);
+ ML_(ppCfiExpr)(di->cfsi_exprs, cfsi_m->cfa_off);
VG_(printf)("\n");
}
eec.uregs = uregs;
eec.min_accessible = min_accessible;
eec.max_accessible = max_accessible;
ok = True;
- cfa = evalCfiExpr(di->cfsi_exprs, cfsi->cfa_off, &eec, &ok );
+ cfa = evalCfiExpr(di->cfsi_exprs, cfsi_m->cfa_off, &eec, &ok );
if (!ok) return 0;
break;
default:
@@ -2612,17 +2620,17 @@
Addr ML_(get_CFA) ( Addr ip, Addr sp, Addr fp,
Addr min_accessible, Addr max_accessible )
{
- CFSICacheEnt* ce;
+ CFSI_m_CacheEnt* ce;
DebugInfo* di;
- DiCfSI* cfsi __attribute__((unused));
+ DiCfSI_m* cfsi_m __attribute__((unused));
- ce = cfsi_cache__find(ip);
+ ce = cfsi_m_cache__find(ip);
if (UNLIKELY(ce == NULL))
return 0; /* no info. Nothing we can do. */
di = ce->di;
- cfsi = &di->cfsi[ ce->ix ];
+ cfsi_m = ce->cfsi_m;
/* Temporary impedance-matching kludge so that this keeps working
on x86-linux and amd64-linux. */
@@ -2632,7 +2640,7 @@
uregs.xsp = sp;
uregs.xbp = fp;
return compute_cfa(&uregs,
- min_accessible, max_accessible, di, cfsi);
+ min_accessible, max_accessible, di, cfsi_m);
}
#elif defined(VGA_s390x)
{ D3UnwindRegs uregs;
@@ -2640,7 +2648,7 @@
uregs.sp = sp;
uregs.fp = fp;
return compute_cfa(&uregs,
- min_accessible, max_accessible, di, cfsi);
+ min_accessible, max_accessible, di, cfsi_m);
}
#elif defined(VGA_mips32) || defined(VGA_mips64)
{ D3UnwindRegs uregs;
@@ -2648,7 +2656,7 @@
uregs.sp = sp;
uregs.fp = fp;
return compute_cfa(&uregs,
- min_accessible, max_accessible, di, cfsi);
+ min_accessible, max_accessible, di, cfsi_m);
}
# else
@@ -2674,9 +2682,9 @@
Addr max_accessible )
{
DebugInfo* di;
- DiCfSI* cfsi = NULL;
+ DiCfSI_m* cfsi_m = NULL;
Addr cfa, ipHere = 0;
- CFSICacheEnt* ce;
+ CFSI_m_CacheEnt* ce;
CfiExprEvalContext eec __attribute__((unused));
D3UnwindRegs uregsPrev;
@@ -2694,24 +2702,24 @@
# else
# error "Unknown arch"
# endif
- ce = cfsi_cache__find(ipHere);
+ ce = cfsi_m_cache__find(ipHere);
if (UNLIKELY(ce == NULL))
return False; /* no info. Nothing we can do. */
di = ce->di;
- cfsi = &di->cfsi[ ce->ix ];
+ cfsi_m = ce->cfsi_m;
if (0) {
- VG_(printf)("found cfisi: ");
- ML_(ppDiCfSI)(di->cfsi_exprs, cfsi);
+ VG_(printf)("found cfsi_m (but printing fake base/len): ");
+ ML_(ppDiCfSI)(di->cfsi_exprs, 0, 0, cfsi_m);
}
VG_(bzero_inline)(&uregsPrev, sizeof(uregsPrev));
/* First compute the CFA. */
cfa = compute_cfa(uregsHere,
- min_accessible, max_accessible, di, cfsi);
+ min_accessible, max_accessible, di, cfsi_m);
if (UNLIKELY(cfa == 0))
return False;
@@ -2752,30 +2760,30 @@
} while (0)
# if defined(VGA_x86) || defined(VGA_amd64)
- COMPUTE(uregsPrev.xip, uregsHere->xip, cfsi->ra_how, cfsi->ra_off);
- COMPUTE(uregsPrev.xsp, uregsHere->xsp, cfsi->sp_how, cfsi->sp_off);
- COMPUTE(uregsPrev.xbp, uregsHere->xbp, cfsi->bp_how, cfsi->bp_off);
+ COMPUTE(uregsPrev.xip, uregsHere->xip, cfsi_m->ra_how, cfsi_m->ra_off);
+ COMPUTE(uregsPrev.xsp, uregsHere->xsp, cfsi_m->sp_how, cfsi_m->sp_off);
+ COMPUTE(uregsPrev.xbp, uregsHere->xbp, cfsi_m->bp_how, cfsi_m->bp_off);
# elif defined(VGA_arm)
- COMPUTE(uregsPrev.r15, uregsHere->r15, cfsi->ra_how, cfsi->ra_off);
- COMPUTE(uregsPrev.r14, uregsHere->r14, cfsi->r14_how, cfsi->r14_off);
- COMPUTE(uregsPrev.r13, uregsHere->r13, cfsi->r13_how, cfsi->r13_off);
- COMPUTE(uregsPrev.r12, uregsHere->r12, cfsi->r12_how, cfsi->r12_off);
- COMPUTE(uregsPrev.r11, uregsHere->r11, cfsi->r11_how, cfsi->r11_off);
- COMPUTE(uregsPrev.r7, uregsHere->r7, cfsi->r7_how, cfsi->r7_off);
+ COMPUTE(uregsPrev.r15, uregsHere->r15, cfsi_m->ra_how, cfsi_m->ra_off);
+ COMPUTE(uregsPrev.r14, uregsHere->r14, cfsi_m->r14_how, cfsi_m->r14_off);
+ COMPUTE(uregsPrev.r13, uregsHere->r13, cfsi_m->r13_how, cfsi_m->r13_off);
+ COMPUTE(uregsPrev.r12, uregsHere->r12, cfsi_m->r12_how, cfsi_m->r12_off);
+ COMPUTE(uregsPrev.r11, uregsHere->r11, cfsi_m->r11_how, cfsi_m->r11_off);
+ COMPUTE(uregsPrev.r7, uregsHere->r7, cfsi_m->r7_how, cfsi_m->r7_off);
# elif defined(VGA_s390x)
- COMPUTE(uregsPrev.ia, uregsHere->ia, cfsi->ra_how, cfsi->ra_off);
- COMPUTE(uregsPrev.sp, uregsHere->sp, cfsi->sp_how, cfsi->sp_off);
- COMPUTE(uregsPrev.fp, uregsHere->fp, cfsi->fp_how, cfsi->fp_off);
+ COMPUTE(uregsPrev.ia, uregsHere->ia, cfsi_m->ra_how, cfsi_m->ra_off);
+ COMPUTE(uregsPrev.sp, uregsHere->sp, cfsi_m->sp_how, cfsi_m->sp_off);
+ COMPUTE(uregsPrev.fp, uregsHere->fp, cfsi_m->fp_how, cfsi_m->fp_off);
# elif defined(VGA_mips32) || defined(VGA_mips64)
- COMPUTE(uregsPrev.pc, uregsHere->pc, cfsi->ra_how, cfsi->ra_off);
- COMPUTE(uregsPrev.sp, uregsHere->sp, cfsi->sp_how, cfsi->sp_off);
- COMPUTE(uregsPrev.fp, uregsHere->fp, cfsi->fp_how, cfsi->fp_off);
+ COMPUTE(uregsPrev.pc, uregsHere->pc, cfsi_m->ra_how, cfsi_m->ra_off);
+ COMPUTE(uregsPrev.sp, uregsHere->sp, cfsi_m->sp_how, cfsi_m->sp_off);
+ COMPUTE(uregsPrev.fp, uregsHere->fp, cfsi_m->fp_how, cfsi_m->fp_off);
# elif defined(VGA_ppc32) || defined(VGA_ppc64)
# elif defined(VGP_arm64_linux)
- COMPUTE(uregsPrev.pc, uregsHere->pc, cfsi->ra_how, cfsi->ra_off);
- COMPUTE(uregsPrev.sp, uregsHere->sp, cfsi->sp_how, cfsi->sp_off);
- COMPUTE(uregsPrev.x30, uregsHere->x30, cfsi->x30_how, cfsi->x30_off);
- COMPUTE(uregsPrev.x29, uregsHere->x29, cfsi->x29_how, cfsi->x29_off);
+ COMPUTE(uregsPrev.pc, uregsHere->pc, cfsi_m->ra_how, cfsi_m->ra_off);
+ COMPUTE(uregsPrev.sp, uregsHere->sp, cfsi_m->sp_how, cfsi_m->sp_off);
+ COMPUTE(uregsPrev.x30, uregsHere->x30, cfsi_m->x30_how, cfsi_m->x30_off);
+ COMPUTE(uregsPrev.x29, uregsHere->x29, cfsi_m->x29_how, cfsi_m->x29_off);
# else
# error "Unknown arch"
# endif
Modified: trunk/coregrind/m_debuginfo/priv_storage.h
==============================================================================
--- trunk/coregrind/m_debuginfo/priv_storage.h (original)
+++ trunk/coregrind/m_debuginfo/priv_storage.h Fri Jul 4 22:36:38 2014
@@ -236,11 +236,11 @@
#define CFIR_MEMCFAREL ((UChar)67)
#define CFIR_EXPR ((UChar)68)
+/* Definition of the DiCfSI_m DiCfSI machine dependent part.
+ These are highly duplicated, and are stored in a pool. */
#if defined(VGA_x86) || defined(VGA_amd64)
typedef
struct {
- Addr base;
- UInt len;
UChar cfa_how; /* a CFIC_IA value */
UChar ra_how; /* a CFIR_ value */
UChar sp_how; /* a CFIR_ value */
@@ -250,12 +250,10 @@
Int sp_off;
Int bp_off;
}
- DiCfSI;
+ DiCfSI_m;
#elif defined(VGA_arm)
typedef
struct {
- Addr base;
- UInt len;
UChar cfa_how; /* a CFIC_ value */
UChar ra_how; /* a CFIR_ value */
UChar r14_how; /* a CFIR_ value */
@@ -271,12 +269,10 @@
Int r11_off;
Int r7_off;
}
- DiCfSI;
+ DiCfSI_m;
#elif defined(VGA_arm64)
typedef
struct {
- Addr base;
- UInt len;
UChar cfa_how; /* a CFIC_ value */
UChar ra_how; /* a CFIR_ value */
UChar sp_how; /* a CFIR_ value */ /*dw31=SP*/
@@ -288,7 +284,7 @@
Int x30_off;
Int x29_off;
}
- DiCfSI;
+ DiCfSI_m;
#elif defined(VGA_ppc32) || defined(VGA_ppc64)
/* Just have a struct with the common fields in, so that code that
processes the common fields doesn't have to be ifdef'd against
@@ -296,19 +292,15 @@
at the moment. */
typedef
struct {
- Addr base;
- UInt len;
UChar cfa_how; /* a CFIC_ value */
UChar ra_how; /* a CFIR_ value */
Int cfa_off;
Int ra_off;
}
- DiCfSI;
+ DiCfSI_m;
#elif defined(VGA_s390x)
typedef
struct {
- Addr base;
- UInt len;
UChar cfa_how; /* a CFIC_ value */
UChar sp_how; /* a CFIR_ value */
UChar ra_how; /* a CFIR_ value */
@@ -318,12 +310,10 @@
Int ra_off;
Int fp_off;
}
- DiCfSI;
+ DiCfSI_m;
#elif defined(VGA_mips32) || defined(VGA_mips64)
typedef
struct {
- Addr base;
- UInt len;
UChar cfa_how; /* a CFIC_ value */
UChar ra_how; /* a CFIR_ value */
UChar sp_how; /* a CFIR_ value */
@@ -333,11 +323,18 @@
Int sp_off;
Int fp_off;
}
- DiCfSI;
+ DiCfSI_m;
#else
# error "Unknown arch"
#endif
+typedef
+ struct {
+ Addr base;
+ UInt len;
+ UInt cfsi_m_ix;
+ }
+ DiCfSI;
typedef
enum {
@@ -820,14 +817,67 @@
UWord inltab_used;
UWord inltab_size;
SizeT maxinl_codesz;
- /* An expandable array of CFI summary info records. Also includes
- summary address bounds, showing the min and max address covered
- by any of the records, as an aid to fast searching. And, if the
+
+ /* A set of expandable arrays to store CFI summary info records.
+ The machine specific information (i.e. the DiCfSI_m struct)
+ are stored in cfsi_m_pool, as these are highly duplicated.
+ The DiCfSI_m are allocated in cfsi_m_pool and identified using
+ a (we hope) small integer : often one byte is enough, sometimes
+ 2 bytes are needed.
+
+ cfsi_base contains the bases of the code address ranges.
+ cfsi_size is the size of the cfsi_base array.
+ The elements cfsi_base[0] till cfsi_base[cfsi_used-1] are used.
+ Following elements are not used (yet).
+
+ For each base in cfsi_base, an index into cfsi_m_pool is stored
+ in cfsi_m_ix array. The size of cfsi_m_ix is equal to
+ cfsi_size*sizeof_ix. The used portion of cfsi_m_ix is
+ cfsi_m_ix[0] till cfsi_m_ix[(cfsi_used-1)*sizeof_ix].
+
+ cfsi_base[i] gives the base address of a code range covered by
+ some CF Info. The corresponding CF Info is identified by an index
+ in cfsi_m_pool. The DiCfSI_m index in cfsi_m_pool corresponding to
+ cfsi_base[i] is given
+ by ((UChar*) cfsi_m_ix)[i] if sizeof_ix == 1
+ by ((UShort*)cfsi_m_ix)[i] if sizeof_ix == 2
+ by ((UInt*) cfsi_m_ix)[i] if sizeof_ix == 3.
+
+ The end of the code range starting at cfsi_base[i] is given by
+ cfsi_base[i+1]-1 (or cfsi_maxavma for cfsi_base[cfsi_used-1]).
+ Some code ranges between cfsi_minavma and cfsi_maxavma might not
+ be covered by cfi information. Such not covered ranges are stored by
+ a base in cfsi_base and a corresponding 0 index in cfsi_m_ix.
+
+ A variable size representation has been chosen for the elements of
+ cfsi_m_ix as in many case, one byte is good enough. For big
+ objects, 2 bytes are needed. No object has yet been found where
+ 4 bytes are needed (but the code is ready to handle this case).
+ Not covered ranges ('cfi holes') are stored explicitely in
+ cfsi_base/cfsi_m_ix as this is more memory efficient than storing
+ a length for each covered range : on x86 or amd64, we typically have
+ a hole every 8 covered ranges. On arm64, we have very few holes
+ (1 every 50 or 100 ranges).
+
+ The cfsi information is read and prepared in the cfsi_rd array.
+ Once all the information has been read, the cfsi_base and cfsi_m_ix
+ arrays will be filled in from cfsi_rd. cfsi_rd will then be freed.
+ This is all done by ML_(finish_CFSI_arrays).
+
+ Also includes summary address bounds, showing the min and max address
+ covered by any of the records, as an aid to fast searching. And, if the
records require any expression nodes, they are stored in
cfsi_exprs. */
- DiCfSI* cfsi;
+ Addr* cfsi_base;
+ UInt sizeof_ix; /* size in byte of the indexes stored in cfsi_m_ix. */
+ void* cfsi_m_ix; /* Each index occupies sizeof_ix bytes. */
+
+ DiCfSI* cfsi_rd; /* Only used during reading, NULL once info is read. */
+
UWord cfsi_used;
UWord cfsi_size;
+
+ DedupPoolAlloc *cfsi_m_pool;
Addr cfsi_minavma;
Addr cfsi_maxavma;
XArray* cfsi_exprs; /* XArray of CfiExpr */
@@ -919,8 +969,14 @@
const HChar* dirname, /* NULL is allowable */
Int lineno, UShort level);
-/* Add a CFI summary record. The supplied DiCfSI is copied. */
-extern void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi );
+/* Add a CFI summary record. The supplied DiCfSI_m is copied. */
+extern void ML_(addDiCfSI) ( struct _DebugInfo* di,
+ Addr base, UInt len, DiCfSI_m* cfsi_m );
+
+/* Given a position in the di->cfsi_base/cfsi_m_ix arrays, return
+ the corresponding cfsi_m*. Return NULL if the position corresponds
+ to a cfsi hole. */
+DiCfSI_m* ML_(get_cfsi_m) (struct _DebugInfo* di, UInt pos);
/* Add a string to the string table of a DebugInfo. If len==-1,
ML_(addStr) will itself measure the length of the string. */
@@ -952,6 +1008,10 @@
called on it's own to sort just this table. */
extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
+/* ML_(finish_CFSI_arrays) fills in the cfsi_base and cfsi_m_ix arrays
+ from cfsi_rd array. cfsi_rd is then freed. */
+extern void ML_(finish_CFSI_arrays) ( struct _DebugInfo* di );
+
/* ------ Searching ------ */
/* Find a symbol-table index containing the specified pointer, or -1
@@ -991,7 +1051,9 @@
extern void ML_(ppSym) ( Int idx, DiSym* sym );
/* Print a call-frame-info summary. */
-extern void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si );
+extern void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs,
+ Addr base, UInt len,
+ DiCfSI_m* si_m );
#define TRACE_SYMTAB_ENABLED (di->trace_symtab)
Modified: trunk/coregrind/m_debuginfo/readdwarf.c
==============================================================================
--- trunk/coregrind/m_debuginfo/readdwarf.c (original)
+++ trunk/coregrind/m_debuginfo/readdwarf.c Fri Jul 4 22:36:38 2014
@@ -2126,12 +2126,6 @@
/* ------------ Deal with summary-info records ------------ */
-static void initCfiSI ( DiCfSI* si )
-{
- VG_(bzero_inline)(si, sizeof(*si));
-}
-
-
/* --------------- Summarisation --------------- */
/* Forward */
@@ -2145,14 +2139,20 @@
summary is up to but not including the current loc. This works
on both x86 and amd64.
*/
-static Bool summarise_context( /*OUT*/DiCfSI* si,
+static Bool summarise_context(/*OUT*/Addr* base,
+ /*OUT*/UInt* len,
+ /*OUT*/DiCfSI_m* si_m,
Addr loc_start,
UnwindContext* ctx,
struct _DebugInfo* debuginfo )
{
Int why = 0;
struct UnwindContextState* ctxs;
- initCfiSI(si);
+
+ *base = 0;
+ *len = 0;
+ VG_(bzero_inline)(si_m, sizeof(*si_m));
+
/* Guard against obviously stupid settings of the reg-rule stack
pointer. */
@@ -2177,49 +2177,49 @@
( dst, ctx, ctxs->cfa_expr_ix );
vg_assert(conv >= -1);
if (conv == -1) { why = 6; goto failed; }
- si->cfa_how = CFIC_EXPR;
- si->cfa_off = conv;
+ si_m->cfa_how = CFIC_EXPR;
+ si_m->cfa_off = conv;
if (0 && debuginfo->ddump_frames)
ML_(ppCfiExpr)(dst, conv);
}
else
if (ctxs->cfa_is_regoff && ctxs->cfa_reg == SP_REG) {
- si->cfa_off = ctxs->cfa_off;
+ si_m->cfa_off = ctxs->cfa_off;
# if defined(VGA_x86) || defined(VGA_amd64) || defined(VGA_s390x) \
|| defined(VGA_mips32) || defined(VGA_mips64)
- si->cfa_how = CFIC_IA_SPREL;
+ si_m->cfa_how = CFIC_IA_SPREL;
# elif defined(VGA_arm)
- si->cfa_how = CFIC_ARM_R13REL;
+ si_m->cfa_how = CFIC_ARM_R13REL;
# elif defined(VGA_arm64)
- si->cfa_how = CFIC_ARM64_SPREL;
+ si_m->cfa_how = CFIC_ARM64_SPREL;
# else
- si->cfa_how = 0; /* invalid */
+ si_m->cfa_how = 0; /* invalid */
# endif
}
else
if (ctxs->cfa_is_regoff && ctxs->cfa_reg == FP_REG) {
- si->cfa_off = ctxs->cfa_off;
+ si_m->cfa_off = ctxs->cfa_off;
# if defined(VGA_x86) || defined(VGA_amd64) || defined(VGA_s390x) \
|| defined(VGA_mips32) || defined(VGA_mips64)
- si->cfa_how = CFIC_IA_BPREL;
+ si_m->cfa_how = CFIC_IA_BPREL;
# elif defined(VGA_arm)
- si->cfa_how = CFIC_ARM_R12REL;
+ si_m->cfa_how = CFIC_ARM_R12REL;
# elif defined(VGA_arm64)
- si->cfa_how = CFIC_ARM64_X29REL;
+ si_m->cfa_how = CFIC_ARM64_X29REL;
# else
- si->cfa_how = 0; /* invalid */
+ si_m->cfa_how = 0; /* invalid */
# endif
}
# if defined(VGA_arm)
else
if (ctxs->cfa_is_regoff && ctxs->cfa_reg == 11/*??_REG*/) {
- si->cfa_how = CFIC_ARM_R11REL;
- si->cfa_off = ctxs->cfa_off;
+ si_m->cfa_how = CFIC_ARM_R11REL;
+ si_m->cfa_off = ctxs->cfa_off;
}
else
if (ctxs->cfa_is_regoff && ctxs->cfa_reg == 7/*??_REG*/) {
- si->cfa_how = CFIC_ARM_R7REL;
- si->cfa_off = ctxs->cfa_off;
+ si_m->cfa_how = CFIC_ARM_R7REL;
+ si_m->cfa_off = ctxs->cfa_off;
}
# elif defined(VGA_arm64)
// do we need any arm64 specifics here?
@@ -2271,23 +2271,23 @@
/* --- entire tail of this fn specialised for x86/amd64 --- */
- SUMMARISE_HOW(si->ra_how, si->ra_off,
- ctxs->reg[ctx->ra_reg] );
- SUMMARISE_HOW(si->bp_how, si->bp_off,
- ctxs->reg[FP_REG] );
+ SUMMARISE_HOW(si_m->ra_how, si_m->ra_off,
+ ctxs->reg[ctx->ra_reg] );
+ SUMMARISE_HOW(si_m->bp_how, si_m->bp_off,
+ ctxs->reg[FP_REG] );
/* on x86/amd64, it seems the old %{e,r}sp value before the call is
always the same as the CFA. Therefore ... */
- si->sp_how = CFIR_CFAREL;
- si->sp_off = 0;
+ si_m->sp_how = CFIR_CFAREL;
+ si_m->sp_off = 0;
/* also, gcc says "Undef" for %{e,r}bp when it is unchanged. So
.. */
if (ctxs->reg[FP_REG].tag == RR_Undef)
- si->bp_how = CFIR_SAME;
+ si_m->bp_how = CFIR_SAME;
/* knock out some obviously stupid cases */
- if (si->ra_how == CFIR_SAME)
+ if (si_m->ra_how == CFIR_SAME)
{ why = 3; goto failed; }
/* bogus looking range? Note, we require that the difference is
@@ -2297,8 +2297,8 @@
if (ctx->loc - loc_start > 10000000 /* let's say */)
{ why = 5; goto failed; }
- si->base = loc_start + ctx->initloc;
- si->len = (UInt)(ctx->loc - loc_start);
+ *base = loc_start + ctx->initloc;
+ *len = (UInt)(ctx->loc - loc_start);
return True;
@@ -2306,20 +2306,20 @@
/* ---- entire tail of this fn specialised for arm ---- */
- SUMMARISE_HOW(si->r14_how, si->r14_off,
- ctxs->reg[14] );
+ SUMMARISE_HOW(si_m->r14_how, si_m->r14_off,
+ ctxs->reg[14] );
- //SUMMARISE_HOW(si->r13_how, si->r13_off,
- // ctxs->reg[13] );
+ //SUMMARISE_HOW(si_m->r13_how, si_m->r13_off,
+ // ctxs->reg[13] );
- SUMMARISE_HOW(si->r12_how, si->r12_off,
- ctxs->reg[FP_REG] );
+ SUMMARISE_HOW(si_m->r12_how, si_m->r12_off,
+ ctxs->reg[FP_REG] );
- SUMMARISE_HOW(si->r11_how, si->r11_off,
- ctxs->reg[11/*FP_REG*/] );
+ SUMMARISE_HOW(si_m->r11_how, si_m->r11_off,
+ ctxs->reg[11/*FP_REG*/] );
- SUMMARISE_HOW(si->r7_how, si->r7_off,
- ctxs->reg[7] );
+ SUMMARISE_HOW(si_m->r7_how, si_m->r7_off,
+ ctxs->reg[7] );
if (ctxs->reg[14/*LR*/].tag == RR_Same
&& ctx->ra_reg == 14/*as we expect it always to be*/) {
@@ -2331,19 +2331,19 @@
"di.ccCt.2a",
ML_(dinfo_free),
sizeof(CfiExpr) );
- si->ra_off = ML_(CfiExpr_CfiReg)( debuginfo->cfsi_exprs,
- Creg_ARM_R14);
- si->ra_how = CFIR_EXPR;
+ si_m->ra_off = ML_(CfiExpr_CfiReg)( debuginfo->cfsi_exprs,
+ Creg_ARM_R14);
+ si_m->ra_how = CFIR_EXPR;
} else {
/* Just summarise it in the normal way */
- SUMMARISE_HOW(si->ra_how, si->ra_off,
- ctxs->reg[ctx->ra_reg] );
+ SUMMARISE_HOW(si_m->ra_how, si_m->ra_off,
+ ctxs->reg[ctx->ra_reg] );
}
/* on arm, it seems the old r13 (SP) value before the call is
always the same as the CFA. Therefore ... */
- si->r13_how = CFIR_CFAREL;
- si->r13_off = 0;
+ si_m->r13_how = CFIR_CFAREL;
+ si_m->r13_off = 0;
/* bogus looking range? Note, we require that the difference is
representable in 32 bits. */
@@ -2352,8 +2352,8 @@
if (ctx->loc - loc_start > 10000000 /* let's say */)
{ why = 5; goto failed; }
- si->base = loc_start + ctx->initloc;
- si->len = (UInt)(ctx->loc - loc_start);
+ *base = loc_start + ctx->initloc;
+ *len = (UInt)(ctx->loc - loc_start);
return True;
@@ -2361,8 +2361,8 @@
/* --- entire tail of this fn specialised for arm64 --- */
- SUMMARISE_HOW(si->x30_how, si->x30_off, ctxs->reg[30/*LR*/]);
- SUMMARISE_HOW(si->x29_how, si->x29_off, ctxs->reg[29/*FP*/]);
+ SUMMARISE_HOW(si_m->x30_how, si_m->x30_off, ctxs->reg[30/*LR*/]);
+ SUMMARISE_HOW(si_m->x29_how, si_m->x29_off, ctxs->reg[29/*FP*/]);
if (ctxs->reg[30/*LR*/].tag == RR_Same
&& ctx->ra_reg == 30/*as we expect it always to be*/) {
@@ -2374,18 +2374,18 @@
"di.ccCt.2a-arm64",
ML_(dinfo_free),
sizeof(CfiExpr) );
- si->ra_off = ML_(CfiExpr_CfiReg)( debuginfo->cfsi_exprs,
- Creg_ARM64_X30);
- si->ra_how = CFIR_EXPR;
+ si_m->ra_off = ML_(CfiExpr_CfiReg)( debuginfo->cfsi_exprs,
+ Creg_ARM64_X30);
+ si_m->ra_how = CFIR_EXPR;
} else {
/* Just summarise it in the normal way */
- SUMMARISE_HOW(si->ra_how, si->ra_off, ctxs->reg[ctx->ra_reg]);
+ SUMMARISE_HOW(si_m->ra_how, si_m->ra_off, ctxs->reg[ctx->ra_reg]);
}
/* on arm64, it seems the old SP value before the call is always
the same as the CFA. Therefore ... */
- si->sp_how = CFIR_CFAREL;
- si->sp_off = 0;
+ si_m->sp_how = CFIR_CFAREL;
+ si_m->sp_off = 0;
/* bogus looking range? Note, we require that the difference is
representable in 32 bits. */
@@ -2394,8 +2394,8 @@
if (ctx->loc - loc_start > 10000000 /* let's say */)
{ why = 5; goto failed; }
- si->base = loc_start + ctx->initloc;
- si->len = (UInt)(ctx->loc - loc_start);
+ *base = loc_start + ctx->initloc;
+ *len = (UInt)(ctx->loc - loc_start);
return True;
@@ -2403,37 +2403,37 @@
/* --- entire tail of this fn specialised for s390 --- */
- SUMMARISE_HOW(si->ra_how, si->ra_off,
- ctxs->reg[ctx->ra_reg] );
- SUMMARISE_HOW(si->fp_how, si->fp_off,
- ctxs->reg[FP_REG] );
- SUMMARISE_HOW(si->sp_how, si->sp_off,
- ctxs->reg[SP_REG] );
+ SUMMARISE_HOW(si_m->ra_how, si_m->ra_off,
+ ctxs->reg[ctx->ra_reg] );
+ SUMMARISE_HOW(si_m->fp_how, si_m->fp_off,
+ ctxs->reg[FP_REG] );
+ SUMMARISE_HOW(si_m->sp_how, si_m->sp_off,
+ ctxs->reg[SP_REG] );
/* change some defaults to consumable values */
- if (si->sp_how == CFIR_UNKNOWN)
- si->sp_how = CFIR_SAME;
+ if (si_m->sp_how == CFIR_UNKNOWN)
+ si_m->sp_how = CFIR_SAME;
- if (si->fp_how == CFIR_UNKNOWN)
- si->fp_how = CFIR_SAME;
+ if (si_m->fp_how == CFIR_UNKNOWN)
+ si_m->fp_how = CFIR_SAME;
- if (si->cfa_how == CFIR_UNKNOWN) {
- si->cfa_how = CFIC_IA_SPREL;
- si->cfa_off = 160;
+ if (si_m->cfa_how == CFIR_UNKNOWN) {
+ si_m->cfa_how = CFIC_IA_SPREL;
+ si_m->cfa_off = 160;
}
- if (si->ra_how == CFIR_UNKNOWN) {
+ if (si_m->ra_how == CFIR_UNKNOWN) {
if (!debuginfo->cfsi_exprs)
debuginfo->cfsi_exprs = VG_(newXA)( ML_(dinfo_zalloc),
"di.ccCt.2a",
ML_(dinfo_free),
sizeof(CfiExpr) );
- si->ra_how = CFIR_EXPR;
- si->ra_off = ML_(CfiExpr_CfiReg)( debuginfo->cfsi_exprs,
- Creg_S390_R14);
+ si_m->ra_how = CFIR_EXPR;
+ si_m->ra_off = ML_(CfiExpr_CfiReg)( debuginfo->cfsi_exprs,
+ Creg_S390_R14);
}
/* knock out some obviously stupid cases */
- if (si->ra_how == CFIR_SAME)
+ if (si_m->ra_how == CFIR_SAME)
{ why = 3; goto failed; }
/* bogus looking range? Note, we require that the difference is
@@ -2443,8 +2443,8 @@
if (ctx->loc - loc_start > 10000000 /* let's say */)
{ why = 5; goto failed; }
- si->base = loc_start + ctx->initloc;
- si->len = (UInt)(ctx->loc - loc_start);
+ *base = loc_start + ctx->initloc;
+ *len = (UInt)(ctx->loc - loc_start);
return True;
@@ -2452,33 +2452,33 @@
/* --- entire tail of this fn specialised for mips --- */
- SUMMARISE_HOW(si->ra_how, si->ra_off,
- ctxs->reg[ctx->ra_reg] );
- SUMMARISE_HOW(si->fp_how, si->fp_off,
- ctxs->reg[FP_REG] );
- SUMMARISE_HOW(si->sp_how, si->sp_off,
- ctxs->reg[SP_REG] );
- si->sp_how = CFIR_CFAREL;
- si->sp_off = 0;
-
- if (si->fp_how == CFIR_UNKNOWN)
- si->fp_how = CFIR_SAME;
- if (si->cfa_how == CFIR_UNKNOWN) {
- si->cfa_how = CFIC_IA_SPREL;
- si->cfa_off = 160;
+ SUMMARISE_HOW(si_m->ra_how, si_m->ra_off,
+ ctxs->reg[ctx->ra_reg] );
+ SUMMARISE_HOW(si_m->fp_how, si_m->fp_off,
+ ctxs->reg[FP_REG] );
+ SUMMARISE_HOW(si_m->sp_how, si_m->sp_off,
+ ctxs->reg[SP_REG] );
+ si_m->sp_how = CFIR_CFAREL;
+ si_m->sp_off = 0;
+
+ if (si_m->fp_how == CFIR_UNKNOWN)
+ si_m->fp_how = CFIR_SAME;
+ if (si_m->cfa_how == CFIR_UNKNOWN) {
+ si_m->cfa_how = CFIC_IA_SPREL;
+ si_m->cfa_off = 160;
}
- if (si->ra_how == CFIR_UNKNOWN) {
+ if (si_m->ra_how == CFIR_UNKNOWN) {
if (!debuginfo->cfsi_exprs)
debuginfo->cfsi_exprs = VG_(newXA)( ML_(dinfo_zalloc),
"di.ccCt.2a",
ML_(dinfo_free),
sizeof(CfiExpr) );
- si->ra_how = CFIR_EXPR;
- si->ra_off = ML_(CfiExpr_CfiReg)( debuginfo->cfsi_exprs,
- Creg_MIPS_RA);
+ si_m->ra_how = CFIR_EXPR;
+ si_m->ra_off = ML_(CfiExpr_CfiReg)( debuginfo->cfsi_exprs,
+ Creg_MIPS_RA);
}
- if (si->ra_how == CFIR_SAME)
+ if (si_m->ra_how == CFIR_SAME)
{ why = 3; goto failed; }
if (loc_start >= ctx->loc)
@@ -2486,8 +2486,8 @@
if (ctx->loc - loc_start > 10000000 /* let's say */)
{ why = 5; goto failed; }
- si->base = loc_start + ctx->initloc;
- si->len = (UInt)(ctx->loc - loc_start);
+ *base = loc_start + ctx->initloc;
+ *len = (UInt)(ctx->loc - loc_start);
return True;
@@ -3656,7 +3656,9 @@
UnwindContext* restore_ctx,
AddressDecodingInfo* adi )
{
- DiCfSI cfsi;
+ Addr base;
+ UInt len;
+ DiCfSI_m cfsi_m;
Bool summ_ok;
Int j, i = 0;
Addr loc_prev;
@@ -3674,11 +3676,12 @@
i += j;
if (0) ppUnwindContext(ctx);
if (record && loc_prev != ctx->loc) {
- summ_ok = summarise_context ( &cfsi, loc_prev, ctx, di );
+ summ_ok = summarise_context ( &base, &len, &cfsi_m,
+ loc_prev, ctx, di );
if (summ_ok) {
- ML_(addDiCfSI)(di, &cfsi);
+ ML_(addDiCfSI)(di, base, len, &cfsi_m);
if (di->trace_cfi)
- ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi);
+ ML_(ppDiCfSI)(di->cfsi_exprs, base, len, &cfsi_m);
}
}
}
@@ -3686,11 +3689,12 @@
loc_prev = ctx->loc;
ctx->loc = fde_arange;
if (record) {
- summ_ok = summarise_context ( &cfsi, loc_prev, ctx, di );
+ summ_ok = summarise_context ( &base, &len, &cfsi_m,
+ loc_prev, ctx, di );
if (summ_ok) {
- ML_(addDiCfSI)(di, &cfsi);
+ ML_(addDiCfSI)(di, base, len, &cfsi_m);
if (di->trace_cfi)
- ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi);
+ ML_(ppDiCfSI)(di->cfsi_exprs, base, len, &cfsi_m);
}
}
}
@@ -4180,8 +4184,8 @@
/* current unsearched space is from lo to hi, inclusive. */
if (lo > hi) break; /* not found */
mid = (lo + hi) / 2;
- a_mid_lo = di->cfsi[mid].base;
- size = di->cfsi[mid].len;
+ a_mid_lo = di->cfsi_rd[mid].base;
+ size = di->cfsi_rd[mid].len;
a_mid_hi = a_mid_lo + size - 1;
vg_assert(a_mid_hi >= a_mid_lo);
if (fde_initloc + fde_arange <= a_mid_lo) {
Modified: trunk/coregrind/m_debuginfo/readelf.c
==============================================================================
--- trunk/coregrind/m_debuginfo/readelf.c (original)
+++ trunk/coregrind/m_debuginfo/readelf.c Fri Jul 4 22:36:38 2014
@@ -1448,7 +1448,9 @@
vg_assert(!di->symtab);
vg_assert(!di->loctab);
vg_assert(!di->inltab);
- vg_assert(!di->cfsi);
+ vg_assert(!di->cfsi_base);
+ vg_assert(!di->cfsi_m_ix);
+ vg_assert(!di->cfsi_rd);
vg_assert(!di->cfsi_exprs);
vg_assert(!di->strpool);
vg_assert(!di->soname);
Modified: trunk/coregrind/m_debuginfo/storage.c
==============================================================================
--- trunk/coregrind/m_debuginfo/storage.c (original)
+++ trunk/coregrind/m_debuginfo/storage.c Fri Jul 4 22:36:38 2014
@@ -40,6 +40,7 @@
#include "pub_core_basics.h"
#include "pub_core_options.h" /* VG_(clo_verbosity) */
#include "pub_core_debuginfo.h"
+#include "pub_core_debuglog.h"
#include "pub_core_libcassert.h"
#include "pub_core_libcbase.h"
#include "pub_core_libcprint.h"
@@ -114,7 +115,9 @@
}
/* Print a call-frame-info summary. */
-void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si )
+void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs,
+ Addr base, UInt len,
+ DiCfSI_m* si_m )
{
# define SHOW_HOW(_how, _off) \
do { \
@@ -139,39 +142,39 @@
} \
} while (0)
- VG_(printf)("[%#lx .. %#lx]: ", si->base,
- si->base + (UWord)si->len - 1);
- switch (si->cfa_how) {
+ VG_(printf)("[%#lx .. %#lx]: ", base,
+ base + (UWord)len - 1);
+ switch (si_m->cfa_how) {
case CFIC_IA_SPREL:
- VG_(printf)("let cfa=oldSP+%d", si->cfa_off);
+ VG_(printf)("let cfa=oldSP+%d", si_m->cfa_off);
break;
case CFIC_IA_BPREL:
- VG_(printf)("let cfa=oldBP+%d", si->cfa_off);
+ VG_(printf)("let cfa=oldBP+%d", si_m->cfa_off);
break;
case CFIC_ARM_R13REL:
- VG_(printf)("let cfa=oldR13+%d", si->cfa_off);
+ VG_(printf)("let cfa=oldR13+%d", si_m->cfa_off);
break;
case CFIC_ARM_R12REL:
- VG_(printf)("let cfa=oldR12+%d", si->cfa_off);
+ VG_(printf)("let cfa=oldR12+%d", si_m->cfa_off);
break;
case CFIC_ARM_R11REL:
- VG_(printf)("let cfa=oldR11+%d", si->cfa_off);
+ VG_(printf)("let cfa=oldR11+%d", si_m->cfa_off);
break;
case CFIR_SAME:
VG_(printf)("let cfa=Same");
break;
case CFIC_ARM_R7REL:
- VG_(printf)("let cfa=oldR7+%d", si->cfa_off);
+ VG_(printf)("let cfa=oldR7+%d", si_m->cfa_off);
break;
case CFIC_ARM64_SPREL:
- VG_(printf)("let cfa=oldSP+%d", si->cfa_off);
+ VG_(printf)("let cfa=oldSP+%d", si_m->cfa_off);
break;
case CFIC_ARM64_X29REL:
- VG_(printf)("let cfa=oldX29+%d", si->cfa_off);
+ VG_(printf)("let cfa=oldX29+%d", si_m->cfa_off);
break;
case CFIC_EXPR:
VG_(printf)("let cfa={");
- ML_(ppCfiExpr)(exprs, si->cfa_off);
+ ML_(ppCfiExpr)(exprs, si_m->cfa_off);
VG_(printf)("}");
break;
default:
@@ -179,36 +182,36 @@
}
VG_(printf)(" in RA=");
- SHOW_HOW(si->ra_how, si->ra_off);
+ SHOW_HOW(si_m->ra_how, si_m->ra_off);
# if defined(VGA_x86) || defined(VGA_amd64)
VG_(printf)(" SP=");
- SHOW_HOW(si->sp_how, si->sp_off);
+ SHOW_HOW(si_m->sp_how, si_m->sp_off);
VG_(printf)(" BP=");
- SHOW_HOW(si->bp_how, si->bp_off);
+ SHOW_HOW(si_m->bp_how, si_m->bp_off);
# elif defined(VGA_arm)
VG_(printf)(" R14=");
- SHOW_HOW(si->r14_how, si->r14_off);
+ SHOW_HOW(si_m->r14_how, si_m->r14_off);
VG_(printf)(" R13=");
- SHOW_HOW(si->r13_how, si->r13_off);
+ SHOW_HOW(si_m->r13_how, si_m->r13_off);
VG_(printf)(" R12=");
- SHOW_HOW(si->r12_how, si->r12_off);
+ SHOW_HOW(si_m->r12_how, si_m->r12_off);
VG_(printf)(" R11=");
- SHOW_HOW(si->r11_how, si->r11_off);
+ SHOW_HOW(si_m->r11_how, si_m->r11_off);
VG_(printf)(" R7=");
- SHOW_HOW(si->r7_how, si->r7_off);
+ SHOW_HOW(si_m->r7_how, si_m->r7_off);
# elif defined(VGA_ppc32) || defined(VGA_ppc64)
# elif defined(VGA_s390x) || defined(VGA_mips32) || defined(VGA_mips64)
VG_(printf)(" SP=");
- SHOW_HOW(si->sp_how, si->sp_off);
+ SHOW_HOW(si_m->sp_how, si_m->sp_off);
VG_(printf)(" FP=");
- SHOW_HOW(si->fp_how, si->fp_off);
+ SHOW_HOW(si_m->fp_how, si_m->fp_off);
# elif defined(VGA_arm64)
VG_(printf)(" SP=");
- SHOW_HOW(si->sp_how, si->sp_off);
+ SHOW_HOW(si_m->sp_how, si_m->sp_off);
VG_(printf)(" X30=");
- SHOW_HOW(si->x30_how, si->x30_off);
+ SHOW_HOW(si_m->x30_how, si_m->x30_off);
VG_(printf)(" X29=");
- SHOW_HOW(si->x29_how, si->x29_off);
+ SHOW_HOW(si_m->x29_how, si_m->x29_off);
# else
# error "Unknown arch"
# endif
@@ -543,41 +546,55 @@
addInl ( di, &inl );
}
+DiCfSI_m* ML_(get_cfsi_m) (struct _DebugInfo* di, UInt pos)
+{
+ UInt cfsi_m_ix;
+
+ vg_assert(pos >= 0 && pos < di->cfsi_used);
+ switch (di->sizeof_ix) {
+ case 1: cfsi_m_ix = ((UChar*) di->cfsi_m_ix)[pos]; break;
+ case 2: cfsi_m_ix = ((UShort*) di->cfsi_m_ix)[pos]; break;
+ case 4: cfsi_m_ix = ((UInt*) di->cfsi_m_ix)[pos]; break;
+ default: vg_assert(0);
+ }
+ if (cfsi_m_ix == 0)
+ return NULL; // cfi hole
+ else
+ return VG_(indexEltNumber) (di->cfsi_m_pool, cfsi_m_ix);
+}
/* Top-level place to call to add a CFI summary record. The supplied
- DiCfSI is copied. */
-void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi_orig )
+ DiCfSI_m is copied. */
+void ML_(addDiCfSI) ( struct _DebugInfo* di,
+ Addr base, UInt len, DiCfSI_m* cfsi_m )
{
static const Bool debug = False;
- UInt new_sz, i;
+ UInt new_sz;
DiCfSI* new_tab;
SSizeT delta;
struct _DebugInfoMapping* map;
struct _DebugInfoMapping* map2;
- /* copy the original, so we can mess with it */
- DiCfSI cfsi = *cfsi_orig;
-
if (debug) {
VG_(printf)("adding DiCfSI: ");
- ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi);
+ ML_(ppDiCfSI)(di->cfsi_exprs, base, len, cfsi_m);
}
/* sanity */
- vg_assert(cfsi.len > 0);
+ vg_assert(len > 0);
/* If this fails, the implication is you have a single procedure
with more than 5 million bytes of code. Which is pretty
unlikely. Either that, or the debuginfo reader is somehow
broken. 5 million is of course arbitrary; but it's big enough
to be bigger than the size of any plausible piece of code that
would fall within a single procedure. */
- vg_assert(cfsi.len < 5000000);
+ vg_assert(len < 5000000);
vg_assert(di->fsm.have_rx_map && di->fsm.have_rw_map);
/* Find mapping where at least one end of the CFSI falls into. */
- map = ML_(find_rx_mapping)(di, cfsi.base, cfsi.base);
- map2 = ML_(find_rx_mapping)(di, cfsi.base + cfsi.len - 1,
- cfsi.base + cfsi.len - 1);
+ map = ML_(find_rx_mapping)(di, base, base);
+ map2 = ML_(find_rx_mapping)(di, base + len - 1,
+ base + len - 1);
if (map == NULL)
map = map2;
else if (map2 == NULL)
@@ -595,13 +612,13 @@
VG_(message)(
Vg_DebugMsg,
"warning: DiCfSI %#lx .. %#lx outside mapped rw segments (%s)\n",
- cfsi.base,
- cfsi.base + cfsi.len - 1,
+ base,
+ base + len - 1,
di->soname
);
}
if (VG_(clo_trace_cfi))
- ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi);
+ ML_(ppDiCfSI)(di->cfsi_exprs, base, len, cfsi_m);
}
return;
}
@@ -614,28 +631,28 @@
will fail. See
"Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS" in
priv_storage.h for background. */
- if (cfsi.base < map->avma) {
+ if (base < map->avma) {
/* Lower end is outside the mapped area. Hence upper end must
be inside it. */
if (0) VG_(printf)("XXX truncate lower\n");
- vg_assert(cfsi.base + cfsi.len - 1 >= map->avma);
- delta = (SSizeT)(map->avma - cfsi.base);
+ vg_assert(base + len - 1 >= map->avma);
+ delta = (SSizeT)(map->avma - base);
vg_assert(delta > 0);
- vg_assert(delta < (SSizeT)cfsi.len);
- cfsi.base += delta;
- cfsi.len -= delta;
+ vg_assert(delta < (SSizeT)len);
+ base += delta;
+ len -= delta;
}
else
- if (cfsi.base + cfsi.len - 1 > map->avma + map->size - 1) {
+ if (base + len - 1 > map->avma + map->size - 1) {
/* Upper end is outside the mapped area. Hence lower end must be
inside it. */
if (0) VG_(printf)("XXX truncate upper\n");
- vg_assert(cfsi.base <= map->avma + map->size - 1);
- delta = (SSizeT)( (cfsi.base + cfsi.len - 1)
+ vg_assert(base <= map->avma + map->size - 1);
+ delta = (SSizeT)( (base + len - 1)
- (map->avma + map->size - 1) );
vg_assert(delta > 0);
- vg_assert(delta < (SSizeT)cfsi.len);
- cfsi.len -= delta;
+ vg_assert(delta < (SSizeT)len);
+ len -= delta;
}
/* Final checks */
@@ -644,11 +661,11 @@
case we asserted that len > 0 at the start, OR it fell partially
inside the range, in which case we reduced it by some size
(delta) which is < its original size. */
- vg_assert(cfsi.len > 0);
+ vg_assert(len > 0);
/* Similar logic applies for the next two assertions. */
- vg_assert(cfsi.base >= map->avma);
- vg_assert(cfsi.base + cfsi.len - 1
+ vg_assert(base >= map->avma);
+ vg_assert(base + len - 1
<= map->avma + map->size - 1);
if (di->cfsi_used == di->cfsi_size) {
@@ -656,16 +673,27 @@
if (new_sz == 0) new_sz = 20;
new_tab = ML_(dinfo_zalloc)( "di.storage.addDiCfSI.1",
new_sz * sizeof(DiCfSI) );
- if (di->cfsi != NULL) {
- for (i = 0; i < di->cfsi_used; i++)
- new_tab[i] = di->cfsi[i];
- ML_(dinfo_free)(di->cfsi);
+ if (di->cfsi_rd != NULL) {
+ VG_(memcpy)(new_tab, di->cfsi_rd,
+ di->cfsi_used * sizeof(DiCfSI));
+ ML_(dinfo_free)(di->cfsi_rd);
}
- di->cfsi = new_tab;
+ di->cfsi_rd = new_tab;
di->cfsi_size = new_sz;
+ if (di->cfsi_m_pool == NULL)
+ di->cfsi_m_pool = VG_(newDedupPA)(1000 * sizeof(DiCfSI_m),
+ vg_alignof(DiCfSI_m),
+ ML_(dinfo_zalloc),
+ "di.storage.DiCfSI_m_pool",
+ ML_(dinfo_free));
}
- di->cfsi[di->cfsi_used] = cfsi;
+ di->cfsi_rd[di->cfsi_used].base = base;
+ di->cfsi_rd[di->cfsi_used].len = len;
+ di->cfsi_rd[di->cfsi_used].cfsi_m_ix
+ = VG_(allocFixedEltDedupPA)(di->cfsi_m_pool,
+ sizeof(DiCfSI_m),
+ cfsi_m);
di->cfsi_used++;
vg_assert(di->cfsi_used <= di->cfsi_size);
}
@@ -1834,7 +1862,7 @@
}
-/* Sort the call-frame-info table by starting address. Mash the table
+/* Sort the call-frame-info cfsi_rd by starting address. Mash the table
around so as to establish the property that addresses are in order
and the ranges do not overlap. This facilitates using binary
search to map addresses to locations when we come to query the
@@ -1854,6 +1882,26 @@
return 0;
}
+static void get_cfsi_rd_stats ( struct _DebugInfo* di,
+ UWord *n_mergeables, UWord *n_holes )
+{
+ Word i;
+
+ *n_mergeables = 0;
+ *n_holes = 0;
+
+ vg_assert (di->cfsi_used == 0 || di->cfsi_rd);
+ for (i = 1; i < (Word)di->cfsi_used; i++) {
+ Addr here_min = di->cfsi_rd[i].base;
+ Addr prev_max = di->cfsi_rd[i-1].base + di->cfsi_rd[i-1].len - 1;
+ Addr sep = here_min - prev_max;
+ if (sep > 1)
+ (*n_holes)++;
+ if (sep == 1 && di->cfsi_rd[i-1].cfsi_m_ix == di->cfsi_rd[i].cfsi_m_ix)
+ (*n_mergeables)++;
+ }
+}
+
void ML_(canonicaliseCFI) ( struct _DebugInfo* di )
{
Word i, j;
@@ -1862,18 +1910,22 @@
/* Note: take care in here. di->cfsi can be NULL, in which
case _used and _size fields will be zero. */
- if (di->cfsi == NULL) {
+ if (di->cfsi_rd == NULL) {
vg_assert(di->cfsi_used == 0);
vg_assert(di->cfsi_size == 0);
+ vg_assert(di->cfsi_m_pool == NULL);
+ } else {
+ vg_assert(di->cfsi_size != 0);
+ vg_assert(di->cfsi_m_pool != NULL);
}
/* Set cfsi_minavma and cfsi_maxavma to summarise the entire
- address range contained in cfsi[0 .. cfsi_used-1]. */
+ address range contained in cfsi_rd[0 .. cfsi_used-1]. */
di->cfsi_minavma = maxAvma;
di->cfsi_maxavma = minAvma;
for (i = 0; i < (Word)di->cfsi_used; i++) {
- Addr here_min = di->cfsi[i].base;
- Addr here_max = di->cfsi[i].base + di->cfsi[i].len - 1;
+ Addr here_min = di->cfsi_rd[i].base;
+ Addr here_max = di->cfsi_rd[i].base + di->cfsi_rd[i].len - 1;
if (here_min < di->cfsi_minavma)
di->cfsi_minavma = here_min;
if (here_max > di->cfsi_maxavma)
@@ -1885,18 +1937,18 @@
di->cfsi_used,
di->cfsi_minavma, di->cfsi_maxavma);
- /* Sort the cfsi array by base address. */
- VG_(ssort)(di->cfsi, di->cfsi_used, sizeof(*di->cfsi), compare_DiCfSI);
+ /* Sort the cfsi_rd array by base address. */
+ VG_(ssort)(di->cfsi_rd, di->cfsi_used, sizeof(*di->cfsi_rd), compare_DiCfSI);
/* If two adjacent entries overlap, truncate the first. */
for (i = 0; i < (Word)di->cfsi_used-1; i++) {
- if (di->cfsi[i].base + di->cfsi[i].len > di->cfsi[i+1].base) {
- Word new_len = di->cfsi[i+1].base - di->cfsi[i].base;
+ if (di->cfsi_rd[i].base + di->cfsi_rd[i].len > di->cfsi_rd[i+1].base) {
+ Word new_len = di->cfsi_rd[i+1].base - di->cfsi_rd[i].base;
/* how could it be otherwise? The entries are sorted by the
.base field. */
vg_assert(new_len >= 0);
- vg_assert(new_len <= di->cfsi[i].len);
- di->cfsi[i].len = new_len;
+ vg_assert(new_len <= di->cfsi_rd[i].len);
+ di->cfsi_rd[i].len = new_len;
}
}
@@ -1904,9 +1956,9 @@
process. */
j = 0;
for (i = 0; i < (Word)di->cfsi_used; i++) {
- if (di->cfsi[i].len > 0) {
+ if (di->cfsi_rd[i].len > 0) {
if (j != i)
- di->cfsi[j] = di->cfsi[i];
+ di->cfsi_rd[j] = di->cfsi_rd[i];
j++;
}
}
@@ -1916,10 +1968,10 @@
/* Ensure relevant postconditions hold. */
for (i = 0; i < (Word)di->cfsi_used; i++) {
/* No zero-length ranges. */
- vg_assert(di->cfsi[i].len > 0);
+ vg_assert(di->cfsi_rd[i].len > 0);
/* Makes sense w.r.t. summary address range */
- vg_assert(di->cfsi[i].base >= di->cfsi_minavma);
- vg_assert(di->cfsi[i].base + di->cfsi[i].len - 1
+ vg_assert(di->cfsi_rd[i].base >= di->cfsi_minavma);
+ vg_assert(di->cfsi_rd[...
[truncated message content] |