RE: [Algorithms] non-branching zbuf code tricks?
Brought to you by:
vexxed72
|
From: Alex C. <al...@ar...> - 2001-01-10 09:09:58
|
The intel compiler produces some interesting code for:
void foobar(int z, int *restrict zBuf, int len)
{
for (int i = 0; i < len; i++)
zBuf[i] = z < zBuf[i] ? z : zBuf[i];
}
1284: void foobar(int z, int *restrict zBuf, int len)
1285: {
004CED10 push esi
004CED11 push ebx
004CED12 sub esp,10h
004CED15 mov esi,dword ptr [esp+1Ch]
1286: for (int i = 0; i < len; i++)
004CED19 xor ebx,ebx
004CED1B test esi,esi
004CED1D jle foobar+80h (004ced90)
004CED23 cmp esi,6
004CED26 jl foobar+70h (004ced80)
004CED28 lea eax,[esi-6]
004CED2B mov dword ptr [esp+0Ch],ebp
004CED2F nop
1287: zBuf[i] = z < zBuf[i] ? z : zBuf[i];
004CED30 mov ebp,dword ptr [edx+ebx*4]
004CED33 cmp ecx,ebp
004CED35 cmovl ebp,ecx
004CED38 mov dword ptr [edx+ebx*4],ebp
004CED3B mov ebp,dword ptr [edx+ebx*4+4]
004CED3F cmp ecx,ebp
004CED41 cmovl ebp,ecx
004CED44 mov dword ptr [edx+ebx*4+4],ebp
004CED48 mov ebp,dword ptr [edx+ebx*4+8]
004CED4C cmp ecx,ebp
004CED4E cmovl ebp,ecx
004CED51 mov dword ptr [edx+ebx*4+8],ebp
004CED55 mov ebp,dword ptr [edx+ebx*4+0Ch]
004CED59 cmp ecx,ebp
004CED5B cmovl ebp,ecx
004CED5E mov dword ptr [edx+ebx*4+0Ch],ebp
004CED62 mov ebp,dword ptr [edx+ebx*4+10h]
004CED66 cmp ecx,ebp
004CED68 cmovl ebp,ecx
004CED6B mov dword ptr [edx+ebx*4+10h],ebp
1286: for (int i = 0; i < len; i++)
004CED6F add ebx,5
004CED72 cmp ebx,eax
004CED74 jle foobar+20h (004ced30)
004CED76 mov ebp,dword ptr [esp+0Ch]
004CED7A lea esi,[esi]
1287: zBuf[i] = z < zBuf[i] ? z : zBuf[i];
004CED80 mov eax,dword ptr [edx+ebx*4]
004CED83 cmp ecx,eax
004CED85 cmovl eax,ecx
004CED88 mov dword ptr [edx+ebx*4],eax
1286: for (int i = 0; i < len; i++)
004CED8B inc ebx
004CED8C cmp ebx,esi
004CED8E jl foobar+70h (004ced80)
1288: }
004CED90 add esp,10h
004CED93 pop ebx
004CED94 pop esi
004CED95 ret 4
If you have the time, I'd reccomend evaluating the Intel compiler.
I tend to find it produces good code and it often has better error
messages than VC++, it also integrates into the VC IDE which is great.
Alex Clarke, Programmer
Argonaut Games PLC
|