|
From: <sv...@va...> - 2013-06-10 08:34:37
|
sewardj 2013-06-10 09:34:26 +0100 (Mon, 10 Jun 2013)
New Revision: 13425
Log:
Improve performance of CRC32 computations for files in the local filesystem.
Modified files:
branches/DISRV/coregrind/m_debuginfo/image.c
branches/DISRV/coregrind/m_debuginfo/priv_image.h
branches/DISRV/coregrind/m_libcbase.c
Modified: branches/DISRV/coregrind/m_debuginfo/priv_image.h (+11 -0)
===================================================================
--- branches/DISRV/coregrind/m_debuginfo/priv_image.h 2013-06-09 15:29:10 +01:00 (rev 13424)
+++ branches/DISRV/coregrind/m_debuginfo/priv_image.h 2013-06-10 09:34:26 +01:00 (rev 13425)
@@ -80,6 +80,17 @@
void ML_(img_get)(/*OUT*/void* dst,
DiImage* img, DiOffT offset, SizeT size);
+/* A version of ML_(img_get) that is significantly cheaper when
+ fetching a lot of data, at the cost of being more difficult to use.
+ Fetches between 1 and |size| bytes from |img| at |offset| and
+ places them in |dst|. |size| must be at least 1. The number of
+ bytes read is returned, and the caller must be able to deal with
+ any number between 1 and |size|. |offset| must be a valid offset
+ in the image; if not the function will not return. This function
+ will not read off the end of the image. */
+SizeT ML_(img_get_some)(/*OUT*/void* dst,
+ DiImage* img, DiOffT offset, SizeT size);
+
/* Copy a C string out of the image, into ML_(dinfo_zalloc)'d space.
The caller owns the string and must free it with ML_(dinfo_free).
|offset| may be DiOffT_INVALID, in which case this returns NULL. */
Modified: branches/DISRV/coregrind/m_libcbase.c (+18 -2)
===================================================================
--- branches/DISRV/coregrind/m_libcbase.c 2013-06-09 15:29:10 +01:00 (rev 13424)
+++ branches/DISRV/coregrind/m_libcbase.c 2013-06-10 09:34:26 +01:00 (rev 13425)
@@ -556,8 +556,24 @@
d = (UChar*)dI;
}
- while (sz--)
- *d++ = *s++;
+ /* If we're unlucky, the alignment constraints for the fast case
+ above won't apply, and we'll have to to it all here. Hence the
+ unrolling. */
+ while (sz >= 4) {
+ d[0] = s[0];
+ d[1] = s[1];
+ d[2] = s[2];
+ d[3] = s[3];
+ d += 4;
+ s += 4;
+ sz -= 4;
+ }
+ while (sz >= 1) {
+ d[0] = s[0];
+ d += 1;
+ s += 1;
+ sz -= 1;
+ }
return dest;
}
Modified: branches/DISRV/coregrind/m_debuginfo/image.c (+29 -3)
===================================================================
--- branches/DISRV/coregrind/m_debuginfo/image.c 2013-06-09 15:29:10 +01:00 (rev 13424)
+++ branches/DISRV/coregrind/m_debuginfo/image.c 2013-06-10 09:34:26 +01:00 (rev 13425)
@@ -751,6 +751,31 @@
}
}
+SizeT ML_(img_get_some)(/*OUT*/void* dst,
+ DiImage* img, DiOffT offset, SizeT size)
+{
+ vg_assert(img);
+ vg_assert(size > 0);
+ vg_assert(ML_(img_valid)(img, offset, size));
+ UChar* dstU = (UChar*)dst;
+ /* Use |get| in the normal way to get the first byte of the range.
+ This guarantees to put the cache entry containing |offset| in
+ position zero. */
+ dstU[0] = get(img, offset);
+ /* Now just read as many bytes as we can (or need) directly out of
+ entry zero, without bothering to call |get| each time. */
+ CEnt* ce = img->ces[0];
+ vg_assert(ce && ce->used >= 1);
+ vg_assert(is_in_CEnt(ce, offset));
+ SizeT nToCopy = size - 1;
+ SizeT nAvail = (SizeT)(ce->used - (offset + 1 - ce->off));
+ vg_assert(nAvail >= 0 && nAvail <= ce->used-1);
+ if (nAvail < nToCopy) nToCopy = nAvail;
+ VG_(memcpy)(&dstU[1], &ce->data[offset + 1 - ce->off], nToCopy);
+ return nToCopy + 1;
+}
+
+
SizeT ML_(img_strlen)(DiImage* img, DiOffT off)
{
vg_assert(ML_(img_valid)(img, off, 1));
@@ -905,11 +930,12 @@
vg_assert(avail > 0 && avail <= img_szB);
if (avail > 1024) avail = 1024;
UChar buf[1024];
- ML_(img_get)(buf, img, curr_off, avail);
+ SizeT nGot = ML_(img_get_some)(buf, img, curr_off, avail);
+ vg_assert(nGot >= 1 && nGot <= avail);
UInt i;
- for (i = 0; i < (UInt)avail; i++)
+ for (i = 0; i < (UInt)nGot; i++)
crc = crc32_table[(crc ^ buf[i]) & 0xff] ^ (crc >> 8);
- curr_off += avail;
+ curr_off += nGot;
}
return ~crc & 0xFFFFFFFF;
} else {
|