From: George H. <geo...@us...> - 2006-12-08 10:27:35
|
Update of /cvsroot/win32forth/win32forth-stc/src In directory sc8-pr-cvs9.sourceforge.net:/tmp/cvs-serv10955/win32forth-stc/src Modified Files: float.f Log Message: gah: Optimisation using FCOMI FCMOV etc (work in progress) Index: float.f =================================================================== RCS file: /cvsroot/win32forth/win32forth-stc/src/float.f,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** float.f 7 Nov 2006 11:08:39 -0000 1.5 --- float.f 8 Dec 2006 10:27:31 -0000 1.6 *************** *** 102,105 **** --- 102,106 ---- previous + next-user @ cell naligned next-user ! cell NEWUSER FLOATSP \ floating point stack pointer in the user area (new) 256 constant fstack-elements \ 256 floating point elements in stack *************** *** 112,123 **** in-system macro: FSP_MEMORY ( -- ) FLOATSP [up] endm macro: FSTACK_MEMORY ( -- ) ! FLOATSTACK [ecx] [up] endm macro: +FSTACK_MEMORY ( n -- ) ! FLOATSTACK + [ecx] [up] endm in-previous --- 113,129 ---- in-system + \ These 3 macros are to make it easier to change the fsp caching register. + macro: fsp ecx endm + macro: fsp, ecx, endm + macro: [fsp] [ecx] endm + macro: FSP_MEMORY ( -- ) FLOATSP [up] endm macro: FSTACK_MEMORY ( -- ) ! FLOATSTACK [fsp] [up] endm macro: +FSTACK_MEMORY ( n -- ) ! FLOATSTACK + [fsp] [up] endm in-previous *************** *** 161,166 **** mov -4 [ebp], tos lea ebp, -4 [ebp] ! mov ecx, FSP_MEMORY ! cmp ecx, # b/float js short L$1 fld FSIZE b/float negate +FSTACK_MEMORY --- 167,172 ---- mov -4 [ebp], tos lea ebp, -4 [ebp] ! mov fsp, FSP_MEMORY ! cmp fsp, # b/float js short L$1 fld FSIZE b/float negate +FSTACK_MEMORY *************** *** 201,214 **** 0 value fsp-adjust ! \ makro to copy ST(0) on the separate float stack ! macro: (FPU>) fsp-cached? 0= if ! mov ecx, FSP_MEMORY ! true to fsp-cached? then fstp FSIZE fsp-adjust +FSTACK_MEMORY endm ! \ makro to move ST(0) on the separate float stack macro: FPU> (FPU>) --- 207,238 ---- 0 value fsp-adjust ! macro: +fsp-adjust ! b/float +to fsp-adjust ! endm ! ! macro: -fsp-adjust ! b/float negate +to fsp-adjust ! endm ! ! macro: ?cache-fsp fsp-cached? 0= if ! mov fsp, FSP_MEMORY ! true to fsp-cached? then endm ! ! macro: ?uncache-fsp ! fsp-adjust if ! add fsp, # fsp-adjust ! mov FSP_MEMORY , fsp ! 0 to fsp-adjust then ! false to fsp-cached? endm ! ! \ macro to copy ST(0) onto the separate float stack ! macro: (FPU>) ! ?cache-fsp fstp FSIZE fsp-adjust +FSTACK_MEMORY endm ! \ macro to move ST(0) onto the separate float stack macro: FPU> (FPU>) *************** *** 216,229 **** endm ! \ makro to move the top of the separate float stack into st(0) macro: >FPU ! fsp-cached? 0= if ! mov ecx, FSP_MEMORY ! true to fsp-cached? then b/float negate +to fsp-adjust fld FSIZE fsp-adjust +FSTACK_MEMORY endm ! \ makro to copy the top of the separate float stack into st(0) macro: (>FPU) >FPU --- 240,251 ---- endm ! \ macro to move the top of the separate float stack into st(0) macro: >FPU ! ?cache-fsp b/float negate +to fsp-adjust fld FSIZE fsp-adjust +FSTACK_MEMORY endm ! \ macro to copy the top of the separate float stack into st(0) macro: (>FPU) >FPU *************** *** 238,250 **** \ macro to end float words - macro: ?uncash-fsp - fsp-adjust if - add ecx, # fsp-adjust - mov FSP_MEMORY , ecx - 0 to fsp-adjust then - false to fsp-cached? endm - macro: float; ! ?uncash-fsp next ;c endm in-previous --- 260,265 ---- \ macro to end float words macro: float; ! ?uncache-fsp next ;c endm in-previous *************** *** 253,258 **** \ Input: eax = number of floats we need subr: fstack-check ! mov ecx, FSP_MEMORY ! cmp ecx, edx js short L$1 ret \ stack is fine, return to caller --- 268,273 ---- \ Input: eax = number of floats we need subr: fstack-check ! mov fsp, FSP_MEMORY ! cmp fsp, edx js short L$1 ret \ stack is fine, return to caller *************** *** 426,469 **** float; ! code FSWAP ( fs: r1 r2 -- r2 r1 ) ! fstack-check_2 \ TODO optimize 2>FPU ! fxch FPU> FPU> float; ! code FOVER ( fs: r1 r2 -- r1 r2 r3 ) fstack-check_2 ! ! \ TODO optimize ! mov ecx, FSP_MEMORY ! sub ecx, # B/FLOAT 2* ! fld FSIZE FSTACK_MEMORY ! add ecx, # B/FLOAT 2* fstp FSIZE FSTACK_MEMORY ! add ecx, # B/FLOAT ! mov FSP_MEMORY , ecx ! float; ! code FROT ( fs: r1 r2 r3 -- r2 r3 r1 ) fstack-check_3 ! ! \ TODO optimize ! mov ecx, FSP_MEMORY ! sub ecx, # B/FLOAT ! fld FSIZE FSTACK_MEMORY ! sub ecx, # B/FLOAT ! fld FSIZE FSTACK_MEMORY ! sub ecx, # B/FLOAT ! fld FSIZE FSTACK_MEMORY ! add ecx, # B/FLOAT 2* ! fstp FSIZE FSTACK_MEMORY ! sub ecx, # B/FLOAT 2* ! fstp FSIZE FSTACK_MEMORY ! add ecx, # B/FLOAT ! fstp FSIZE FSTACK_MEMORY ! float; --- 441,479 ---- float; ! code FSWAP ( fs: r1 r2 -- r2 r1 ) \ ANSI Floating ! \ *G Exchange the top 2 FP numbers. ! fstack-check_2 2>FPU ! +fsp-adjust FPU> + -fsp-adjust -fsp-adjust FPU> + +fsp-adjust float; ! code FOVER ( fs: r1 r2 -- r1 r2 r3 ) \ ANSI Floating ! \ *G Copy the 2nd FP stack number to the top of the FP stack. fstack-check_2 ! fld FSIZE b/float 2* negate +FSTACK_MEMORY fstp FSIZE FSTACK_MEMORY ! +fsp-adjust float; ! code FROT ( fs: r1 r2 r3 -- r2 r3 r1 ) \ ANSI Floating ! \ *G Rotate the top 3 FP stack numbers. fstack-check_3 ! -fsp-adjust ! fld FSIZE fsp-adjust +FSTACK_MEMORY ! -fsp-adjust ! fld FSIZE fsp-adjust +FSTACK_MEMORY ! -fsp-adjust ! fld FSIZE fsp-adjust +FSTACK_MEMORY ! +fsp-adjust +fsp-adjust ! fstp FSIZE fsp-adjust +FSTACK_MEMORY ! -fsp-adjust -fsp-adjust ! fstp FSIZE fsp-adjust +FSTACK_MEMORY ! +fsp-adjust ! fstp FSIZE fsp-adjust +FSTACK_MEMORY ! +fsp-adjust +fsp-adjust float; *************** *** 477,483 **** [THEN] call fstack-check ! sub ecx, edx fld FSIZE FSTACK_MEMORY ! add ecx, edx mov tos, 0 [ebp] lea ebp, 4 [ebp] --- 487,493 ---- [THEN] call fstack-check ! sub fsp, edx fld FSIZE FSTACK_MEMORY ! add fsp, edx mov tos, 0 [ebp] lea ebp, 4 [ebp] *************** *** 840,848 **** : f>= ( -- f ) ( fs: r1 r2 -- ) f< not ; ! : FMAX ( fs: r1 r2 -- r3 ) ! f2dup f< IF fswap THEN fdrop ; ! : FMIN ( fs: r1 r2 -- r3 ) ! f2dup f> IF fswap THEN fdrop ; \ \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ --- 850,880 ---- : f>= ( -- f ) ( fs: r1 r2 -- ) f< not ; ! code FMAX ( fs: r1 r2 -- r3 ) ! fstack-check_2 ! 2>FPU ! fld st(1) ! fld st(1) ! fadd ! fxch st(2) ! fcomi ! fcmovbe \ cf=1 or zf=1 ! fcmovu st(2) ! fpu> ! fcompp ! float; ! code FMIN ( fs: r1 r2 -- r3 ) ! fstack-check_2 ! 2>FPU ! fld st(1) ! fld st(1) ! fadd ! fxch st(2) ! fcomi ! fcmovnbe \ cf=0 and zf=0 ! fcmovu st(2) ! fpu> ! fcompp ! float; \ \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ *************** *** 1038,1053 **** fstack-check_1 fldln2 - mov edx, tos >FPU fld FSIZE sq2m1 ! fcomp st(1) ! fstsw ax ! sahf jp short L$3 ja short L$4 fld FSIZE sq2/2m1 ! fcomp st(1) ! fstsw ax ! sahf jb short L$4 fyl2xp1 --- 1070,1080 ---- fstack-check_1 fldln2 >FPU fld FSIZE sq2m1 ! fcomip st(1) jp short L$3 ja short L$4 fld FSIZE sq2/2m1 ! fcomip st(1) jb short L$4 fyl2xp1 *************** *** 1056,1071 **** L$4: fld1 \ add the "1" explicitly faddp st(1), st(0) - fabs fyl2x FPU> jmp short L$2 L$3: fcompp \ return arg if incomparable ! L$2: mov tos, edx ! float; code FLOG ( fs: r1 -- r2 ) fstack-check_1 fldlg2 - fabs \ ? error for x <= 0 >FPU fyl2x --- 1083,1096 ---- L$4: fld1 \ add the "1" explicitly faddp st(1), st(0) fyl2x FPU> jmp short L$2 L$3: fcompp \ return arg if incomparable ! L$2: float; ! code FLOG ( fs: r1 -- r2 ) fstack-check_1 fldlg2 >FPU fyl2x |