Hi,
nice library, thanks.
But there is a problem with memcpy function.
It's two times slower than the same function
from standard CRT:
// From WCRT (memcpy.c)
void *memcpy_WCRT(void *s, const void *ct, size_t n) {
char *s_ = (char*)s;
const char *ct_ = (const char*)ct;
size_t i;
for (i = 0; i < n; ++i) s_[i] = ct_[i];
return s;
}
// From standard Microsoft library
void inline memcpy_stdlib(void *dst, const void *src,
size_t n) {
__asm{
mov edi, dst
mov esi, src
mov ecx, n
mov edx, ecx
shr ecx, 2
rep movsd
mov ecx, edx
and ecx, 3
rep movsb
}
}
char buff[4096], src[] = "'Twas brillig, and the slithy toves
did gyre and gimble in the wabe: all mimsy were the
borogoves, and the mome raths outgrabe.";
#define T 10000000
int main(int argc, char* argv[]) {
DWORD z; char *p;
z = GetTickCount();
srand(z);
p = buff;
for(int i=0; i<T; ++i) {
size_t n = sizeof(src) - rand() % 20;
memcpy(p, src, n);
p += n;
if(p > buff + sizeof(buff) - sizeof(src))
p = buff;
}
z = GetTickCount() - z;
printf("memcpy: %d\n", z);
z = GetTickCount();
p = buff;
for(int i=0; i<T; ++i) {
size_t n = sizeof(src) - rand() % 20;
memcpy_WCRT(p, src, n);
p += n;
if(p > buff + sizeof(buff) - sizeof(src))
p = buff;
}
z = GetTickCount() - z;
printf("memcpy_WCRT: %d\n", z);
return 0;
}
Try this code. I get the following results on Duron 800:
memcpy takes 1.7 - 1.8 seconds to execute.
memcpy_WCRT takes 4,1-4,2 sec.
memcpy_WCRT is 20 bytes long, and size of standard
memcpy
function is 22 bytes. Does it really worth to make
function
two bytes smaller, but two times slower???
Peter.
Logged In: YES
user_id=695110
I initially wrote most of the code in plain C to keep it
relatively portable, but recently more and more of the code
has become asm to improve speed and size.
I'll have a look at the mem* functions, thanks.