|
From: <sv...@va...> - 2015-02-04 17:52:50
|
Author: sewardj
Date: Wed Feb 4 17:52:42 2015
New Revision: 14901
Log:
Vectorise the memcmp template implementation. For scenarios requiring
a lot of memcmp-ing (firefox reftests) this is measurably faster.
Modified:
trunk/shared/vg_replace_strmem.c
Modified: trunk/shared/vg_replace_strmem.c
==============================================================================
--- trunk/shared/vg_replace_strmem.c (original)
+++ trunk/shared/vg_replace_strmem.c Wed Feb 4 17:52:42 2015
@@ -959,18 +959,33 @@
int VG_REPLACE_FUNCTION_EZU(20190,soname,fnname) \
( const void *s1V, const void *s2V, SizeT n ) \
{ \
- int res; \
- UChar a0; \
- UChar b0; \
- const UChar* s1 = s1V; \
- const UChar* s2 = s2V; \
+ const Addr WS = sizeof(UWord); /* 8 or 4 */ \
+ const Addr WM = WS - 1; /* 7 or 3 */ \
+ Addr s1A = (Addr)s1V; \
+ Addr s2A = (Addr)s2V; \
+ \
+ if (((s1A | s2A) & WM) == 0) { \
+ /* Both areas are word aligned. Skip over the */ \
+ /* equal prefix as fast as possible. */ \
+ while (n >= WS) { \
+ UWord w1 = *(UWord*)s1A; \
+ UWord w2 = *(UWord*)s2A; \
+ if (w1 != w2) break; \
+ s1A += WS; \
+ s2A += WS; \
+ n -= WS; \
+ } \
+ } \
+ \
+ const UChar* s1 = (const UChar*) s1A; \
+ const UChar* s2 = (const UChar*) s2A; \
\
while (n != 0) { \
- a0 = s1[0]; \
- b0 = s2[0]; \
+ UChar a0 = s1[0]; \
+ UChar b0 = s2[0]; \
s1 += 1; \
s2 += 1; \
- res = ((int)a0) - ((int)b0); \
+ int res = ((int)a0) - ((int)b0); \
if (res != 0) \
return res; \
n -= 1; \
|
|
From: Florian K. <fl...@ei...> - 2015-02-04 18:09:24
|
On 04.02.2015 18:52, sv...@va... wrote: > Modified: trunk/shared/vg_replace_strmem.c > ============================================================================== > --- trunk/shared/vg_replace_strmem.c (original) > +++ trunk/shared/vg_replace_strmem.c Wed Feb 4 17:52:42 2015 .... > + const Addr WS = sizeof(UWord); /* 8 or 4 */ \ > + const Addr WM = WS - 1; /* 7 or 3 */ \ SizeT would be a more appropriate type as that is what sizeof returns (well actually size_t; but ...) Florian |