|
From: <sv...@va...> - 2010-03-14 17:19:21
|
Author: sewardj
Date: 2010-03-14 17:19:02 +0000 (Sun, 14 Mar 2010)
New Revision: 11093
Log:
Inline most functions in VG_(use_CF_info). May or may not give a
3% performance increase for Helgrind in default (detailed-history)
mode.
Modified:
trunk/coregrind/m_debuginfo/debuginfo.c
trunk/include/pub_tool_libcbase.h
Modified: trunk/coregrind/m_debuginfo/debuginfo.c
===================================================================
--- trunk/coregrind/m_debuginfo/debuginfo.c 2010-03-14 15:53:53 UTC (rev 11092)
+++ trunk/coregrind/m_debuginfo/debuginfo.c 2010-03-14 17:19:02 UTC (rev 11093)
@@ -1896,7 +1896,8 @@
/* Evaluate the CfiExpr rooted at ix in exprs given the context eec.
*ok is set to False on failure, but not to True on success. The
caller must set it to True before calling. */
-static
+__attribute__((noinline))
+static
UWord evalCfiExpr ( XArray* exprs, Int ix,
CfiExprEvalContext* eec, Bool* ok )
{
@@ -2079,7 +2080,7 @@
}
-static CFSICacheEnt* cfsi_cache__find ( Addr ip )
+static inline CFSICacheEnt* cfsi_cache__find ( Addr ip )
{
UWord hash = ip % N_CFSI_CACHE;
CFSICacheEnt* ce = &cfsi_cache[hash];
@@ -2108,6 +2109,7 @@
}
+inline
static Addr compute_cfa ( D3UnwindRegs* uregs,
Addr min_accessible, Addr max_accessible,
DebugInfo* di, DiCfSI* cfsi )
@@ -2237,7 +2239,7 @@
ML_(ppDiCfSI)(di->cfsi_exprs, cfsi);
}
- VG_(memset)(&uregsPrev, 0, sizeof(uregsPrev));
+ VG_(bzero_inline)(&uregsPrev, sizeof(uregsPrev));
/* First compute the CFA. */
cfa = compute_cfa(uregsHere,
Modified: trunk/include/pub_tool_libcbase.h
===================================================================
--- trunk/include/pub_tool_libcbase.h 2010-03-14 15:53:53 UTC (rev 11092)
+++ trunk/include/pub_tool_libcbase.h 2010-03-14 17:19:02 UTC (rev 11093)
@@ -110,6 +110,36 @@
extern void* VG_(memset) ( void *s, Int c, SizeT sz );
extern Int VG_(memcmp) ( const void* s1, const void* s2, SizeT n );
+/* Zero out up to 8 words quickly in-line. Do not use this for blocks
+ of size which are unknown at compile time, since the whole point is
+ for it to be inlined, and then for gcc to remove all code except
+ for the relevant 'sz' case. */
+inline __attribute__((always_inline))
+static void VG_(bzero_inline) ( void* s, SizeT sz )
+{
+ if (LIKELY(0 == (((Addr)sz) & (Addr)(sizeof(UWord)-1)))
+ && LIKELY(0 == (((Addr)s) & (Addr)(sizeof(UWord)-1)))) {
+ UWord* p = (UWord*)s;
+ switch (sz / (SizeT)sizeof(UWord)) {
+ case 8: p[0] = p[1] = p[2] = p[3]
+ = p[4] = p[5] = p[6] = p[7] = 0UL; return;
+ case 7: p[0] = p[1] = p[2] = p[3]
+ = p[4] = p[5] = p[6] = 0UL; return;
+ case 6: p[0] = p[1] = p[2] = p[3]
+ = p[4] = p[5] = 0UL; return;
+ case 5: p[0] = p[1] = p[2] = p[3] = p[4] = 0UL; return;
+ case 4: p[0] = p[1] = p[2] = p[3] = 0UL; return;
+ case 3: p[0] = p[1] = p[2] = 0UL; return;
+ case 2: p[0] = p[1] = 0UL; return;
+ case 1: p[0] = 1; return;
+ case 0: return;
+ default: break;
+ }
+ }
+ VG_(memset)(s, 0, sz);
+}
+
+
/* ---------------------------------------------------------------------
Address computation helpers
------------------------------------------------------------------ */
|
|
From: Bart V. A. <bva...@ac...> - 2010-03-15 07:11:55
|
On Sun, Mar 14, 2010 at 6:19 PM, <sv...@va...> wrote:
>
> Author: sewardj
> Date: 2010-03-14 17:19:02 +0000 (Sun, 14 Mar 2010)
> New Revision: 11093
>
> Log:
> Inline most functions in VG_(use_CF_info). May or may not give a
> 3% performance increase for Helgrind in default (detailed-history)
> mode.
[ ... ]
> +/* Zero out up to 8 words quickly in-line. Do not use this for blocks
> + of size which are unknown at compile time, since the whole point is
> + for it to be inlined, and then for gcc to remove all code except
> + for the relevant 'sz' case. */
> +inline __attribute__((always_inline))
> +static void VG_(bzero_inline) ( void* s, SizeT sz )
> +{
> + if (LIKELY(0 == (((Addr)sz) & (Addr)(sizeof(UWord)-1)))
> + && LIKELY(0 == (((Addr)s) & (Addr)(sizeof(UWord)-1)))) {
> + UWord* p = (UWord*)s;
> + switch (sz / (SizeT)sizeof(UWord)) {
> + case 8: p[0] = p[1] = p[2] = p[3]
> + = p[4] = p[5] = p[6] = p[7] = 0UL; return;
> + case 7: p[0] = p[1] = p[2] = p[3]
> + = p[4] = p[5] = p[6] = 0UL; return;
> + case 6: p[0] = p[1] = p[2] = p[3]
> + = p[4] = p[5] = 0UL; return;
> + case 5: p[0] = p[1] = p[2] = p[3] = p[4] = 0UL; return;
> + case 4: p[0] = p[1] = p[2] = p[3] = 0UL; return;
> + case 3: p[0] = p[1] = p[2] = 0UL; return;
> + case 2: p[0] = p[1] = 0UL; return;
> + case 1: p[0] = 1; return;
> + case 0: return;
> + default: break;
> + }
> + }
> + VG_(memset)(s, 0, sz);
> +}
Hello Julian,
In the above I see "p[0] = 1" for case 0. Is that correct ?
Bart.
|
|
From: Julian S. <js...@ac...> - 2010-03-15 09:01:56
|
On Monday 15 March 2010, Bart Van Assche wrote: > In the above I see "p[0] = 1" for case 0. Is that correct ? Err, no! Thanks for spotting that. J |