Hi,
nice library, thanks.
But there is a problem with memcpy function.
It's two times slower than the same function
from standard CRT:
// From WCRT (memcpy.c)
void *memcpy_WCRT(void *s, const void *ct, size_t n) {
char *s_ = (char*)s;
const char *ct_ = (const char*)ct;
size_t i;
for (i = 0; i < n; ++i) s_[i] = ct_[i];
return s;
}
// From standard Microsoft library
void inline memcpy_stdlib(void *dst, const void *src,
size_t n) {
__asm{
mov edi, dst
mov esi, src
mov ecx, n
mov edx, ecx
shr ecx, 2
rep movsd
mov ecx, edx
and ecx, 3
rep movsb
}
}
char buff[4096], src[] = "'Twas brillig, and the slithy toves
did gyre and gimble in the wabe: all mimsy were the
borogoves, and the mome raths outgrabe.";
#define T 10000000
int main(int argc, char* argv[]) {
DWORD z; char *p;
z = GetTickCount();
srand(z);
p = buff;
for(int i=0; i<T; ++i) {
size_t n = sizeof(src) - rand() % 20;
memcpy(p, src, n);
p += n;
if(p > buff + sizeof(buff) - sizeof(src))
p = buff;
}
z = GetTickCount() - z;
printf("memcpy: %d\n", z);
z = GetTickCount();
p = buff;
for(int i=0; i<T; ++i) {
size_t n = sizeof(src) - rand() % 20;
memcpy_WCRT(p, src, n);
p += n;
if(p > buff + sizeof(buff) - sizeof(src))
p = buff;
}
z = GetTickCount() - z;
printf("memcpy_WCRT: %d\n", z);
return 0;
}
Try this code. I get the following results on Duron 800:
memcpy takes 1.7 - 1.8 seconds to execute.
memcpy_WCRT takes 4,1-4,2 sec.
memcpy_WCRT is 20 bytes long, and size of standard
memcpy
function is 22 bytes. Does it really worth to make
function
two bytes smaller, but two times slower???
Peter.
Nobody/Anonymous ( nobody ) - 2005-05-08 12:03
5
Open
None
Joergen Ibsen
None
None
Public
|
Date: 2005-05-24 19:38 Logged In: YES |
| Field | Old Value | Date | By |
|---|---|---|---|
| assigned_to | nobody | 2005-05-24 19:36 | jibz |