|
From: <sv...@va...> - 2007-02-08 16:26:43
|
Author: sewardj
Date: 2007-02-08 16:25:56 +0000 (Thu, 08 Feb 2007)
New Revision: 6577
Log:
Specialise VG_(ssort) for 4-word elements. This removes about 80% of
all calls to VG_(memcpy). Thanks to cachegrind for showing somebody
was calling VG_(memcpy) a huge number of times, and to callgrind for
finding out who :-)
Modified:
trunk/coregrind/m_libcbase.c
Modified: trunk/coregrind/m_libcbase.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_libcbase.c 2007-02-08 12:42:11 UTC (rev 6576)
+++ trunk/coregrind/m_libcbase.c 2007-02-08 16:25:56 UTC (rev 6577)
@@ -490,10 +490,34 @@
#undef ASSIGN
#undef COMPAR
=20
+ } else if ( (4*sizeof(UWord)) =3D=3D size ) {
+ /* special-case 4 word-elements. This captures a lot of cases
+ from symbol table reading/canonicalisaton, because both DiLoc
+ and DiSym are 4 word structures. */
+ HChar* a =3D base;
+ HChar v[size];
+
+ #define ASSIGN(dst, dsti, src, srci) \
+ do { UWord* dP =3D (UWord*)&dst[size*(dsti)]; \
+ UWord* sP =3D (UWord*)&src[size*(srci)]; \
+ dP[0] =3D sP[0]; \
+ dP[1] =3D sP[1]; \
+ dP[2] =3D sP[2]; \
+ dP[3] =3D sP[3]; \
+ } while (0)
+
+ #define COMPAR(dst, dsti, src, srci) \
+ compar( &dst[size*(dsti)], &src[size*(srci)] )
+
+ SORT;
+
+ #undef ASSIGN
+ #undef COMPAR
+
// General case
} else {
- char* a =3D base;
- char v[size]; // will be at least 'size' bytes
+ HChar* a =3D base;
+ HChar v[size]; // will be at least 'size' bytes
=20
#define ASSIGN(dst, dsti, src, srci) \
VG_(memcpy)( &dst[size*(dsti)], &src[size*(srci)], size );
|