|
From: <sv...@va...> - 2005-11-14 15:10:16
|
Author: sewardj
Date: 2005-11-14 15:10:12 +0000 (Mon, 14 Nov 2005)
New Revision: 5125
Log:
On amd64, when running allocation-intensive code in the presence of
many shared objects, finding the relevant CFI information for stack
unwinding becomes a significant performance overhead. This change
slowly rearranges the SegInfo list to bring more popular entries to
the front during CFI lookup. This reduces the startup time of
konqueror on memcheck on amd64 from 124 to 110 seconds.
Modified:
trunk/coregrind/m_debuginfo/symtab.c
Modified: trunk/coregrind/m_debuginfo/symtab.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_debuginfo/symtab.c 2005-11-14 15:01:32 UTC (rev 512=
4)
+++ trunk/coregrind/m_debuginfo/symtab.c 2005-11-14 15:10:12 UTC (rev 512=
5)
@@ -2616,7 +2616,8 @@
}
=20
/* Returns True if OK. If not OK, *{ip,sp,fp}P are not changed. */
-
+/* NOTE: this function may rearrange the order of entries in the
+ SegInfo list. */
Bool VG_(use_CFI_info) ( /*MOD*/Addr* ipP,
/*MOD*/Addr* spP,
/*MOD*/Addr* fpP,
@@ -2628,10 +2629,15 @@
CfiSI* cfisi =3D NULL;
Addr cfa, ipHere, spHere, fpHere, ipPrev, spPrev, fpPrev;
=20
+ static UInt n_search =3D 0;
+ static UInt n_steps =3D 0;
+ n_search++;
=20
if (0) VG_(printf)("search for %p\n", *ipP);
=20
for (si =3D segInfo_list; si !=3D NULL; si =3D si->next) {
+ n_steps++;
+
/* Use the per-SegInfo summary address ranges to skip
inapplicable SegInfos quickly. */
if (si->cfisi_used =3D=3D 0)
@@ -2650,6 +2656,40 @@
if (cfisi =3D=3D NULL)
return False;
=20
+ if (0 && ((n_search & 0xFFFFF) =3D=3D 0))
+ VG_(printf)("%u %u\n", n_search, n_steps);
+
+ /* Start of performance-enhancing hack: once every 16 (chosen
+ hackily after profiling) successful searchs, move the found
+ SegInfo one step closer to the start of the list. This makes
+ future searches cheaper. For starting konqueror on amd64, this
+ in fact reduces the total amount of searching done by the above
+ find-the-right-SegInfo loop by more than a factor of 20. */
+ if ((n_search & 0xF) =3D=3D 0) {
+ /* Move si one step closer to the start of the list. */
+ SegInfo* si0 =3D segInfo_list;
+ SegInfo* si1 =3D NULL;
+ SegInfo* si2 =3D NULL;
+ SegInfo* tmp;
+ while (True) {
+ if (si0 =3D=3D NULL) break;
+ if (si0 =3D=3D si) break;
+ si2 =3D si1;
+ si1 =3D si0;
+ si0 =3D si0->next;
+ }
+ if (si0 =3D=3D si && si0 !=3D NULL && si1 !=3D NULL && si2 !=3D NU=
LL) {
+ /* si0 points to si, si1 to its predecessor, and si2 to si1's
+ predecessor. Swap si0 and si1, that is, move si0 one step
+ closer to the start of the list. */
+ tmp =3D si0->next;
+ si2->next =3D si0;
+ si0->next =3D si1;
+ si1->next =3D tmp;
+ }
+ }
+ /* End of performance-enhancing hack. */
+
if (0) {
VG_(printf)("found cfisi: ");=20
ML_(ppCfiSI)(cfisi);
|