[Win32forth-cvs] win32forth-stc/demos/benchmarks MM.F, NONE, 1.1 Testpde.f, NONE, 1.1 bench.f, NONE

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/win32forth/win32forth-stc/demos/benchmarks
In directory sc8-pr-cvs9.sourceforge.net:/tmp/cvs-serv23946/demos/benchmarks

Added Files:
	MM.F Testpde.f bench.f fsl_util.f matrix.f pde1.f 
Log Message:
- 3reverse and 4reverse added to the kernel
- MM benchmark added
- Moved the benchmarks into a seperate folder

--- NEW FILE: pde1.f ---
\ pde1.f  ( win32forth version)
\
\ Numerical Solution of Electrostatics Boundary-Value Problems.
\ Solve Laplace's Equation in 2 Dimensions:
\
\	D_xx u(x,y) + D_yy u(x,y) = 0
\
\ Copyright (c) 2003 Krishna Myneni, Creative Consulting for Research 
\ and Education
\
\ Provided under the terms of the GNU General Public License.
\
\ This program demonstrates a method of solving one kind of a partial 
\ differential equation (PDE) for a function u(x,y), a function
\ of the two variables x and y. In Laplace's Equation above, 
\ D_xx represents taking the second partial derivative with respect to 
\ x of u(x,y), and D_yy the second partial derivative w.r.t. y. This 
\ equation holds for the electrostatic potential u(x,y) inside
\ a charge-free two dimensional region. If we know the values of
\ u(x,y) along a boundary enclosing the region, Laplace's equation
\ may be solved to obtain the values of u(x,y) at all interior points
\ of the region. 
\
\ In this demonstration, we can setup two different bounding regions:
\
\ 1) a hollow rectangular box with voltages defined on the edges,
\
\ 2) a hollow circular region with the top half boundary at one voltage,
\      and the bottom half boundary at a second voltage.
\
\ Very thin insulators are assumed to be separating the regions which 
\ are at different potentials on the bounding region.
\ 
\ Laplace's equation is solved by an iterative application of the 
\ "mean value theorem for the electrostatic potential" (see 
\ "Classical Electrodynamics", 2nd ed, by J.D. Jackson) to each grid 
\ point inside the boundary until the solution converges. For more 
\ information on solving PDEs and boundary value problems, 
\ see "Partial differential equations for engineers and scientists", 
\ by Stanley J. Farlow, 1982, Dover. The method of solving Laplace's 
\ equation used in this example is known as Liebmann's method.
\
\
\ K. Myneni, 1998-10-23
\
\ Adapted for gforth on 2003-12-15; graphics output removed. KM
\

include matrix.f
: FROUND>S FROUND F>D D>S ;

: fmat_copy ( a1 a2 -- | copy fmatrix a1 into a2)
    over mat_size@ * dfloats cell+ cell+ cmove ;

\ Create a floating pt matrix to hold the grid values

64 constant GRIDSIZE
GRIDSIZE dup fmatrix grid
GRIDSIZE dup fmatrix last_grid	\ copy of last grid values for convergence test

\ Rectangular Region Boundary Values

100e  FCONSTANT  TOP_EDGE	\ Top edge at   100.0 V
0e    FCONSTANT  RIGHT_EDGE	\ Right edge at   0.0 V
0e    FCONSTANT  BOTTOM_EDGE	\ Bottom edge at  0.0 V
50e   FCONSTANT  LEFT_EDGE	\ Left edge at   50.0 V

: inside_rectangle? ( row col -- flag | inside rectangular boundary?)
    dup 1 > swap GRIDSIZE < AND swap
    dup 1 > swap GRIDSIZE < AND AND
;

: set_rectangular_bvs ( -- | setup the rectangular boundary values)
    GRIDSIZE 1+ 1 do  TOP_EDGE    1 i grid fmat! loop
    GRIDSIZE 1+ 1 do  RIGHT_EDGE  i GRIDSIZE grid fmat! loop
    GRIDSIZE 1+ 1 do  BOTTOM_EDGE GRIDSIZE i grid fmat! loop
    GRIDSIZE 1+ 1 do  LEFT_EDGE   i 1 grid fmat! loop
;

: init_rectangular_grid ( -- | set up the starting grid values )
    set_rectangular_bvs
    TOP_EDGE BOTTOM_EDGE RIGHT_EDGE LEFT_EDGE f+ f+ f+ 4e f/
    GRIDSIZE 1+ 1 do
      GRIDSIZE 1+ 1 do
        j i inside_rectangle? IF fdup j i grid fmat! THEN
      loop
    loop fdrop ;

\ Circular Region Boundary Values

100e  FCONSTANT  TOP_HALF	\ Top half of boundary region at 100. V
0e    FCONSTANT  BOTTOM_HALF    \ Bottom half at 0.0 V
GRIDSIZE 2 - 2/ CONSTANT RADIUS  \ Radius of boundary region

: inside_circle? ( row col -- flag | inside circular boundary? )
     GRIDSIZE 2/ - dup * swap 
     GRIDSIZE 2/ - dup * + s>f fsqrt fround>s
     RADIUS < ;

: set_circular_bvs ( -- | setup the circular boundary region )
    GRIDSIZE 1+ 1 do
      GRIDSIZE 1+ 1 do
        j i inside_circle? 0= IF
	  j GRIDSIZE 2/ < IF TOP_HALF ELSE BOTTOM_HALF THEN
	  j i grid fmat!
	THEN 
      LOOP
    LOOP ;

: init_circular_grid ( -- | set starting values of the grid)
    set_circular_bvs
    TOP_HALF BOTTOM_HALF f+ 2e f/
    GRIDSIZE 1+ 1 do
      GRIDSIZE 1+ 1 do
        j i inside_circle? IF fdup j i grid fmat! THEN
      loop
    loop fdrop ;

defer inside?

: circ ( -- | use the two semi-circle boundary values )
    grid fmat_zero
    ['] inside_circle? is inside? 
    init_circular_grid ;

: rect ( -- | use rectangular boundary values )
    grid fmat_zero
    ['] inside_rectangle? is inside?
    init_rectangular_grid ;

: nearest@ ( i j -- f1 f2 f3 f4 | fetch the nearest neighbor grid values )
    2>R
    2R@ 1- 1 MAX grid fmat@         \ fetch left nearest neighbor
    2R@ 1+ GRIDSIZE MIN grid fmat@  \ fetch right nearest neighbor
    2R@ SWAP 1- 1 MAX SWAP grid fmat@  \ fetch up nearest neighbor
    2R> SWAP 1+ GRIDSIZE MIN SWAP grid fmat@ \ fetch down nearest neighbor
;	    	  

\ Apply the mean value theorem once to each of the interior grid values:
\   Replace each grid value with the average of the four nearest
\   neighbor values.

: iterate ( -- ) 
	GRIDSIZE 1+ 1 ?do
	  GRIDSIZE 1+ 1 ?do
	    j i inside? IF
	      j i nearest@	\ fetch four nearest neighbors
	      f+ f+ f+ 4e f/	\ take average of the four values
	      j i grid fmat!	\ store at this position
	    THEN
	  loop
	loop
;

fvariable tol	\ tolerance for solution
1e-3 tol f!

: converged? ( -- flag | test for convergence between current and last grid)
    GRIDSIZE 1+ 1 do
      GRIDSIZE 1+ 1 do
        j i inside? IF j i grid fmat@ j i last_grid fmat@ f-
	               fabs tol f@ f> IF FALSE unloop unloop EXIT THEN
		    THEN
      loop
    loop TRUE ;

\ Iterate until the solution converges to the specified tolerance 
\ at all interior points.

: solve ( -- )
	begin
          grid last_grid fmat_copy
	  iterate
	  converged?
	until
;

fvariable temp 

: grid_minmax ( -- fmin fmax | find min and max of grid values )
	1 1 grid fmat@ fdup
	GRIDSIZE 1+ 1 do
	  GRIDSIZE 1+ 1 do
	    j i grid fmat@ fswap fover fmax ( 2>r) temp f! fmin ( 2r>) temp f@
	  loop
	loop
;

: display_grid ( -- | display the grid values as a character map )
	grid_minmax
	fover f- 
	15e fswap f/	\ scale factor to scale grid value from 0 to 15
	fswap

	GRIDSIZE 1+ 1 ?do
	  GRIDSIZE 1+ 1 ?do
	    fover fover
	    j i grid fmat@ fswap f- f*
	    fround>s dup 9 >
	    if 55 + else 48 + then emit
	  loop
	  cr
	loop

	fdrop fdrop
;

rect
CR CR
.( Numerical Solution of Electrostatics Boundary-Value Problems ) CR 
GRIDSIZE dup 3 .r char x emit .    
.( grid has been setup. Type: ) CR CR
.(    rect           to use the rectangular boundary values) CR
.(    circ           to use the circular boundary values) CR
.(    solve          to find the solution) CR
.(    display_grid   to view grid as a character map) CR
CR

--- NEW FILE: Testpde.f ---

only forth also definitions decimal

\ defined b/float nip 0= [if] 8 constant b/float [then]

\ needs stc/float

\ \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
\ Timing Routines                     \
\ \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

create TIME-BUF
here
        0 w,    \ +0  year
        0 w,    \ +2  month
        0 w,    \ +4  day of week
        0 w,    \ +6  day of month
        0 w,    \ +8  hour
        0 w,    \ +10 minute
        0 w,    \ +12 second
        0 w,    \ +14 milliseconds
here swap - constant TIME-LEN

create date$ 32 allot
create time$ 32 allot

: get-local-time ( -- )                 \ get the local computer date and time
        time-buf call GetLocalTime drop ;

: time&date     ( -- sec min hour day month year )
                get-local-time
                time-buf 12 + w@        \ seconds
                time-buf 10 + w@        \ minutes
                time-buf  8 + w@        \ hours
                time-buf  6 + w@        \ day of month
                time-buf  2 + w@        \ month of year
                time-buf      w@ ;      \ year

: .#"           ( n1 n2 -- a1 n3 )
                >r 0 <# r> 0 ?do # loop #> ;

: >date"        ( time_structure -- )
                >r 31 date$ null \ z" ddddd',' MMMM dd yyyy"
                r> null LOCALE_USER_DEFAULT
                call GetDateFormat date$ swap 1- ;

: .date         ( -- )
                get-local-time time-buf >date" type ;

: >month,day,year" ( time_structure -- )
                >r 31 date$  z" ddddd',' MMMM dd yyyy"
                r> null LOCALE_USER_DEFAULT
                call GetDateFormat date$ swap 1- ;

: .month,day,year ( -- )
                get-local-time time-buf >month,day,year" type ;

: >time"        ( time_structure -- )
                >r 31 time$ null
                r> null LOCALE_USER_DEFAULT
                call GetTimeFormat time$ swap 1- ;

: .time         ( -- )
                get-local-time time-buf >time" type ;

: >am/pm"       ( time_structure -- )
                >r 31 time$ z" h':'mmtt"
                r> null LOCALE_USER_DEFAULT
                call GetTimeFormat time$ swap 1- ;

: .am/pm        ( -- )
                get-local-time time-buf >am/pm" type ;

: ms@           ( -- ms )
                get-local-time
                time-buf
                dup   8 + w@     60 *           \ hours
                over 10 + w@ +   60 *           \ minutes
                over 12 + w@ + 1000 *           \ seconds
                swap 14 + w@ + ;                \ milli-seconds

0 value start-time

: time-reset    ( -- )
                ms@ to start-time ;

' time-reset alias timer-reset

: .elapsed      ( -- )
                ." Elapsed time: "
                ms@ start-time -
                1000 /mod
                  60 /mod
                  60 /mod 2 .#" type ." :"
                          2 .#" type ." :"
                          2 .#" type ." ."
                          3 .#" type ;

: elapse        ( -<commandline>- )
                time-reset interpret cr .elapsed ;

\ \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
\ Other utilities                                \
\ \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

: ROLL          ( n1 n2 .. nk k -- n2 n3 .. nk n1 )
\  Rotate k values on the stack, bringing the deepest to the top.
                DUP>R PICK SP@ DUP CELL+ R> CELLS CELL+ MOVE DROP ;

code k          ( -- n )
                mov     -4 [ebp], eax
                mov     eax, 20 [esp]
                add     eax, 24 [esp]
                lea     ebp, -4 [ebp]
                next ;c

\ Need to set directory until fload etc work with paths.
\ s" demos" Prepend<home>\ "chdir

fload pde1

--- NEW FILE: fsl_util.f ---
\ anew FSL-Utilities_1.04
\ fvariable t1

\ fsl_util.f            An auxiliary file for the Forth Scientific Library
\                       contains commonly needed definitions for Win32Forth.

\ dxor, dor, dand       double xor, or, and
\ sd*                   single * double = double_product
\ v: defines use( &     For defining and setting execution vectors
\ %                     Parse next token as a FLOAT
\ S>F  F>S              Conversion between (single) integer and float
\ F,                    Store FLOAT at (aligned) HERE
\ INTEGER DOUBLE FLOAT  For setting up ARRAY types
\ ARRAY DARRAY          For declaring static and dynamic arrays
\ }                     For getting an ARRAY or DARRAY element address
\ }MALLOC }FREE         Allocate and free dynamic arrays
\ &!                    For storing ARRAY aliases in a DARRAY
\ PRINT-WIDTH           The number of elements per line for printing arrays
\ }FPRINT               Print out a given array
\ Matrix                For declaring a 2-D array
\ }}                    gets a Matrix element address
\ }}MALLOC }}FREE       Allocate and free dynamic matrices
\ Public: Private: Reset_Search_Order   controls the visibility of words
\ |frame frame|             sets up/removes a local variable frame
\ a b c d e f g h           local FVARIABLE values
\ &a &b &c &d &e &f &g &h   local FVARIABLE addresses

\ This code conforms with ANS requiring:
\      1. The Floating-Point word set with separate floating stack
\      2. The Memory word set
\      3. The words \ pick tuck nip from Core Extensions
\      4. The word d+ from Double
\      5. The words defer is f# internal external module
\         and others which are implemented in Win32Forth,
\         marked by commented-out definitions and synonyms
\      6. The word anew which creates a marker, forgetting
\         everything after it if it already exists
\      7. The assembler to define D+C and UMD/MOD.
\
\ This code has an environmental dependency on CAPS being -1 ( ignore case )
\ which is the default in Win32Forth.

\ This code is released to the public domain Pierre Henri Michel., Abbat
\ February, 1998

\ March  8th, 2003 J.v.d.Ven: Optimized }} DSMATRIX using fmacro.f
\ March 10th, 2003 J.v.d.Ven: optimized DMMATRIX    using fmacro.f
\                             restored the old DSMATRIX which had no bug.
\ March 17th, 2003 J.v.d.Ven: Changed for the updated fmacro.f
\ March 31st, 2003 J.v.d.Ven: Changed UMD/MOD for the updated fmacro.f

\ needs fmacro.f

CR .( FSL_UTIL.F        V1.04          March 10th, 2003 )
( Modified from Skip Carter's fsl_util.seq 1.20 )

\ ====================== compilation control ===========================
\ for control of conditional compilation test code
FALSE VALUE TEST-CODE?
FALSE VALUE ?TEST-CODE           \ obsolete, for backward compatiblity

\ for control of conditional compilation of Dynamic Memory
TRUE CONSTANT HAS-MEMORY-WORDS?

\ for control of conditional compilation of dereferencing unallocated array error
FALSE CONSTANT DEBUG-ARRAYS?

\ =======================================================================

\ FSL Non ANS words

\ umd/mod     ( uquad uddiv -- udquot udmod ) unsigned quad divided by double
\ umd*        ( ud1 ud2 -- qprod )            unsigned double multiply
\ d*          ( d1 d2   -- dprod )            double multiply

CODE UMD/MOD   ( uquad uddiv -- udquot udmod )
( Modified from F-PC )
        SUB     EBP, # 8
        MOV     [EBP], EDX      \ save user pointer
        MOV     4 [EBP], EDI    \ save base pointer
        MOV     ECX, EBX
        POP     EDX
        POP     EAX
        POP     EBX
        POP     EDI
        PUSH    ESI
        PUSH    EBP
        MOV     EBP, ESP
        MOV     ESI, 8 [EBP]
        MOV     EBP, ECX
        CMP     EBP, EAX
        JA      @@6
        JNE     @@7
        CMP     EDX, EBX
        JA      @@6
@@7:    MOV     EAX, EDI
        MOV     EBX, ESI
        MOV     ESI, # -1
        MOV     EDI, ESI
        JMP     @@8
@@6:    MOV     ECX, # 40
        CLC
@@1:    RCL     ESI
        RCL     EDI
        RCL     EBX
        RCL     EAX
        JAE     @@3
@@2:    SUB     EBX, EDX
        SBB     EAX, EBP
        STC
        loop    @@1
        JMP     @@5
@@3:    CMP     EAX, EBP
        JB      @@4
        JNE     @@2
        CMP     EBX, EDX
        JAE     @@2
@@4:    CLC
        loop    @@1
@@5:    RCL     ESI
        RCL     EDI
@@8:    MOV     ECX, ESI
        POP     EBP
        POP     ESI
        POP     EDX
        PUSH    EBX
        PUSH    EAX
        PUSH    ECX
        MOV     EBX, EDI
        MOV     EDX, [EBP]
        MOV     EDI, 4 [EBP]
        ADD     EBP, # 8
        NEXT    C;

CODE D+C ( d d - d carry )
        POP     EAX
        ADD     4 [ESP], EAX
        ADC     0 [ESP], EBX
        XOR     EBX, EBX
        ADC     EBX, # 0
        NEXT    C;

: UMD* ( ud1 ud2 - uq )
  2 PICK OVER M* 2>R
  3 PICK UM* 2>R TUCK UM* 2>R
  UM* 0 2R> D+ 2R> D+C
  2R> D+ ;

: D* ( d d - d )
  3 PICK * ROT 2 PICK * +
  -ROT UM* ROT + ;

: dxor       ( d1 d2 -- d )             \ double xor
      ROT XOR -ROT XOR SWAP
;

: dor       ( d1 d2 -- d )              \ double or
      ROT OR -ROT OR SWAP
;

: dand     ( d1 d2 -- d )               \ double and
     ROT AND -ROT AND SWAP
;

\ single * double = double
: sd*   ( multiplicand  multiplier_double  -- product_double  )
             2 PICK * >R   UM*   R> +
;

\ : D0<        NIP 0< ;

: T*         TUCK UM* 2SWAP UM* SWAP >R 0 D+ R> ROT ROT ;
: T/         DUP >R UM/MOD ROT ROT R> UM/MOD NIP SWAP ;

\ : m*/        >R T* R> T/ ;

\ function vector definition
synonym v: defer
synonym defines is

: use(  STATE @ IF [COMPILE] ['] ELSE ' THEN ;  IMMEDIATE

: &     Postpone use( ; IMMEDIATE

\ pushes following value to the float stack
synonym % f#

\ : S>F   ( n -- | f: -- x )    \ integer to float
\         S>D  D>F
\ ;

\ : F>S    ( -- n | f: x -- )   \ float to integer
\         F>D DROP
\ ;

\ Store float at (aligned) HERE
\ already defined in F-PC
\ : F,   ( -- | f: x -- )         FALIGN HERE  1 FLOATS ALLOT F!  ;

\ : F=    F- F0= ;
: -FROT    FROT FROT ;
\ : F2*   % 2.0e0 F*     ;
\ : F2/   % 2.0e0 F/     ;
\ : F2DUP     FOVER FOVER ;
\ : F2DROP    FDROP FDROP ;

\ : CELL-    [ 1 CELLS ] LITERAL - ;           \ backup one cell

0 VALUE TYPE-ID               \ for building structures
FALSE VALUE STRUCT-ARRAY?

\ size of a regular integer
1 cells CONSTANT INTEGER

\ size of a double integer
2 cells CONSTANT DOUBLE

\ size of a regular float
\ synonym fvalue float   \ float is not defined in STC Forth yet
\ 1 floats CONSTANT FLOAT
( Note: This conflicts with the previous definition of float
  which declares a floating-point to-word. )

\ 1-D array definition
\    -----------------------------
\    | cell_size | data area     |
\    -----------------------------

: MARRAY ( n cell_size -- | -- addr )             \ monotype array
     CREATE
       DUP , * ALLOT
     DOES> CELL+
;

\    -----------------------------
\    | id | cell_size | data area |
\    -----------------------------

: SARRAY ( n cell_size -- | -- id addr )          \ structure array
     CREATE
       TYPE-ID ,
       DUP , * ALLOT
     DOES> DUP @ SWAP [ 2 CELLS ] LITERAL +
;

: ARRAY
     STRUCT-ARRAY? IF   SARRAY FALSE TO STRUCT-ARRAY?
                   ELSE MARRAY
                   THEN
;

\ word for creation of a dynamic array (no memory allocated)

\ Monotype
\    ------------------------
\    | data_ptr | cell_size |
\    ------------------------

: DMARRAY   ( cell_size -- )   CREATE  0 , ,
                               DOES>
                                    @ CELL+
;

\ Structures
\    ----------------------------
\    | data_ptr | cell_size | id |
\    ----------------------------

: DSARRAY   ( cell_size -- )  CREATE  0 , , TYPE-ID ,
                              DOES>
                                    DUP 2 CELLS+ @ SWAP
                                    @ CELL+
;

: DARRAY   ( cell_size -- )
     STRUCT-ARRAY? IF   DSARRAY FALSE TO STRUCT-ARRAY?
                   ELSE DMARRAY
                   THEN
;

: }FREE ( &array{ )
( Usage: array{ }free )
  >BODY DUP @ FREE DROP OFF ;

: }MALLOC ( &array{ #elements )
( Usage: & array{ 5 }malloc allocates an array of 5 elements )
  [ DEBUG-ARRAYS? ] [IF]
  OVER >BODY @ IF ." Warning: array is already allocated" THEN [THEN]
  OVER }FREE ( deallocate the array to prevent memory leaks )
  OVER >BODY CELL+ @ ( get the element size )
  TUCK * CELL+ ( add room to store the element size )
  ALLOCATE IF ( there was an error)
    2DROP 0 SWAP >BODY ! ( store 0 in the array pointer )
  ELSE
    TUCK ! ( store the element size )
    SWAP >BODY ! ( store the array location )
  THEN ;

v: do-align
v: do-aligned

: default-alignments  & ALIGN defines do-align  & ALIGNED defines do-aligned ;
: float-alignments    & FALIGN defines do-align  & FALIGNED defines do-aligned ;

: XINTEGER  1 CELLS  default-alignments ;

: XDOUBLE   2 CELLS  default-alignments ;

: XFLOAT   1  FLOATS    float-alignments ;

: XARRAY ( n size -- | -- addr )       \ experimental array with alignment
     CREATE
       DUP , DO-ALIGN * ALLOT
     DOES> CELL+ DO-ALIGNED
;

\ word for aliasing arrays,
\  typical usage:  a{ & b{ &!  sets b{ to point to a{'s data

: &!    ( addr_a &b -- )
        SWAP CELL- SWAP >BODY  !
;

DEBUG-ARRAYS? [IF]
: unallocated? ( array-address - array-address )
( Use ABORT" or THROW as you like. )
  DUP 0=
\ IF -9 THROW THEN
  ABORT" Array or matrix is not allocated"
  ;
[THEN]

: }   ( addr n -- addr[n])       \ word that fetches 1-D array addresses
  OVER CELL-
  [ DEBUG-ARRAYS? ] [IF] unallocated? [THEN]
  @ * SWAP + ;

VARIABLE print-width     6 print-width !

: }fprint ( n 'addr -- )       \ print n elements of a float array
        SWAP 0 DO I print-width @ MOD 0= I AND IF CR THEN
                  DUP I } F@ F. LOOP
        DROP
;

: }iprint ( n 'addr -- )        \ print n elements of an integer array
       SWAP 0 DO I print-width @ MOD 0= I AND IF CR THEN
                 DUP I } @ . LOOP
       DROP
;

: }fcopy ( 'src 'dest n -- )    \ copy one array into another
         0 DO
                 OVER I } F@
                 DUP  I } F!
               LOOP
         2DROP
;

\ 2-D array definition,

\ Monotype
\    -----------------------------------
\    | m | cell_size |  data area |
\    -----------------------------------

: MMATRIX  ( n m size -- )           \ defining word for a 2-d matrix
        CREATE
           OVER , DUP ,
           * * ALLOT
        DOES>  [ 2 CELLS ] LITERAL +
;

\ Structures
\    -----------------------------------
\    | id | m | cell_size |  data area |
\    -----------------------------------

: SMATRIX  ( n m size -- )           \ defining word for a 2-d matrix
        CREATE TYPE-ID ,
           OVER , DUP ,
           * * ALLOT
        DOES>  DUP @ TO TYPE-ID
               [ 3 CELLS ] LITERAL +
;

: MATRIX  ( n m size -- )           \ defining word for a 2-d matrix
     STRUCT-ARRAY? IF   SMATRIX FALSE TO STRUCT-ARRAY?
                   ELSE MMATRIX
                   THEN

;

\  Old }}
: }}    ( addr i j -- addr[i][j] )    \ word to fetch 2-D array addresses
  >R >R                  \ indices to return stack temporarily
  DUP CELL- CELL-
  [ DEBUG-ARRAYS? ] [IF] unallocated? [THEN]
  2@     \ &a[0][0] size m
  R> * R> + * + ; 
((
code }}    ( addr i j -- addr[i][j] )    \ word to fetch 2-D array addresses
        >r >r            \ indices to return stack temporarily
        DUP 2CELLS-  
        2@   \ &a[0][0] size m
        r> * r> + * +
   next,
   end-code
))
\ ( addr i j -- addr[i][j] )    \ word to fetch 2-D array addresses \ indices to return stack temporarily
\ &a[0][0] size m
\ Dynamic 2-D array definition,
\    ------------------------------
\    | data_ptr | cell_size | (id) |
\    ------------------------------

\ word for creation of a dynamic array (no memory allocated)

\ Monotype
\    ------------------------
\    | data_ptr | cell_size |
\    ------------------------

\  Old  DMMATRIX
: DMMATRIX   ( cell_size -- )
  CREATE  0 , ,
  DOES>  @  2 CELLS+
;
((
code _DMMATRIX  ( adr -- addr+2cells)
     @ 2 ass-lit CELLS+
     next,
     end-code

: DMMATRIX   ( cell_size -- )
  qalign
  CREATE  0 , ,
  DOES> _DMMATRIX
 ;      
))

\ Structures
\    ----------------------------
\    | data_ptr | cell_size | id |
\    ----------------------------

: DSMATRIX   ( cell_size -- )
  CREATE  0 , , TYPE-ID ,
  DOES> DUP 2 CELLS+ @ SWAP @ 2 CELLS+
;

: DMATRIX   ( cell_size -- )
     STRUCT-ARRAY? IF   DSMATRIX FALSE TO STRUCT-ARRAY?
                   ELSE DMMATRIX
                   THEN
;

synonym }}FREE }FREE

: }}MALLOC ( &matrix{{ rows cols )
( Allocates a matrix. The element size is known at compile time
  and is stored at &matrix{{; the array dimensions are specified
  at runtime. )
  [ DEBUG-ARRAYS? ] [IF]
  2 PICK >BODY @ IF ." Warning: matrix is already allocated" THEN [THEN]
  2 PICK }}FREE ( deallocate the array to prevent memory leaks )
  2 PICK >BODY CELL+ @ ( get the element size )
  2DUP 2>R * * 2 CELLS+ ( add room to store the element size and row length )
  ALLOCATE IF ( there was an error)
    2R> 3DROP 0 SWAP >BODY ! ( store 0 in the array pointer )
  ELSE
    R> R> 2 PICK 2! ( store the element size and row length )
    SWAP >BODY ! ( store the array location )
  THEN ;

: }}fprint ( n m 'addr -- )       \ print n×m elements of a float 2-D array
        ROT ROT SWAP 0 DO
                         DUP 0 DO
                                  OVER J I  }} F@ F.
                         LOOP

                         CR
                  LOOP
        2DROP
;

: }}iprint ( n m 'addr -- )       \ print n×m elements of a float 2-D array
  ROT ROT SWAP 0 DO
    DUP 0 DO
      OVER J I  }} @ .
    LOOP
    CR
  LOOP
  2DROP
;

: }}fcopy ( 'src 'dest n m  -- )      \ copy n×m elements of 2-D array src to dest
        SWAP 0 DO
                 DUP 0 DO
                            2 PICK J I  }} F@
                            OVER J I }} F!
                        LOOP
                  LOOP
        DROP 2DROP
;

\ Code for hiding words that the user does not need to access
\ into a hidden wordlist.
\ Private:
\          will add HIDDEN to the search order and make HIDDEN
\          the compilation wordlist.  Words defined after this will
\          compile into the HIDDEN vocabulary.
\ Public:
\          will restore the compilation wordlist to what it was before
\          HIDDEN got added, it will leave HIDDEN in the search order
\          if it was already there.   Words defined after this will go
\          into whatever the original vocabulary was, but HIDDEN words
\          are accessable for compilation.
\ Reset_Search_Order
\          This will restore the compilation wordlist and search order
\          to what they were before HIDDEN got added.  HIDDEN words will
\          no longer be visible.

\ These three words can be invoked in any order, multiple times, in a
\ file, but Reset_Search_Order should finally be called last in order to
\ restore things back to the way they were before the file got loaded.

\ WARNING: you can probably break this code by setting vocabularies while
\          Public: or Private: are still active.

synonym Private: internal
synonym Public: external
synonym Reset_Search_Order module

\  Code for local fvariables, loosely based upon Wil Baden's idea presented
\  at FORML 1992.
\  The idea is to have a fixed number of variables with fixed names.
\  I believe the code shown here will work with any, case insensitive,
\  ANS Forth.

\  FRAME| always pushes 8 floats onto the flocal stack; if your
\  arguments are A and B you can use C through H for local storage.

\  Note: The variables are in the opposite order than used by { ... } .

\  example:  : test  2e 3e FRAME| a b |  a f. b f. |FRAME ;
\            test <cr> 3.0000 2.0000 ok

\  PS: Don't forget to use |FRAME before an EXIT .

8 CONSTANT /flocals

: (frame) ( n -- ) FLOATS ALLOT ;

: FRAME|
        0 >R
        BEGIN   BL WORD  COUNT  1 =
                SWAP C@  [CHAR] | =
                AND 0=
        WHILE   POSTPONE F,  R> 1+ >R
        REPEAT
        /FLOCALS R> - DUP 0< ABORT" too many flocals"
        POSTPONE LITERAL  POSTPONE (frame) ; IMMEDIATE

: |FRAME ( -- ) [ /FLOCALS NEGATE ] LITERAL (FRAME) ;

: &h            HERE [ 1 FLOATS ] LITERAL - ;
: &g            HERE [ 2 FLOATS ] LITERAL - ;
: &f            HERE [ 3 FLOATS ] LITERAL - ;
: &e            HERE [ 4 FLOATS ] LITERAL - ;
: &d            HERE [ 5 FLOATS ] LITERAL - ;
: &c            HERE [ 6 FLOATS ] LITERAL - ;
: &b            HERE [ 7 FLOATS ] LITERAL - ;
: &a            HERE [ 8 FLOATS ] LITERAL - ;

: a             &a F@ ;
: b             &b F@ ;
: c             &c F@ ;
: d             &d F@ ;
: e             &e F@ ;
: f             &f F@ ;
: g             &g F@ ;
: h             &h F@ ;

( Note: B and E were previously defined as words interfacing with the editor. )

--- NEW FILE: bench.f ---

\ IN-SYSTEM

create TIME-BUF
here
        0 w,    \ +0  year
        0 w,    \ +2  month
        0 w,    \ +4  day of week
        0 w,    \ +6  day of month
        0 w,    \ +8  hour
        0 w,    \ +10 minute
        0 w,    \ +12 second
        0 w,    \ +14 milliseconds
here swap - constant TIME-LEN

create date$ 32 allot
create time$ 32 allot

: get-local-time ( -- )                 \ get the local computer date and time
[...1054 lines suppressed...]

   cr cr
   .ann ." This system's application performance" .specifics CR
   .header
   [$  
     $SIEVE$  $FIB$  $SORT$  $RAND$  $CODE77$ $DHRY$
\   [ ANSSYSTEM ] [IF]  $DHRY$  [THEN]
   CR  ." Total:"  1 $]
;

decimal
cr cr .( Benchmark code size = ) code-here start-here - . .( bytes.) cr

BENCHMARK

CR CR .( To run the benchmark program again, type BENCHMARK )

--- NEW FILE: MM.F ---
\ MM.FTH - Forth Floating point matrix multiply benchmark

0 [IF] ======================================================
This file is maintained by:
  Stephen Pelc
  MicroProcessor Engineering
  133 Hill Lane
  Southampton SO15 5AF
  England
  tel:   +44 (0)23 8063 1441
  fax:   +44 (0)23 8033 9691
  email: st...@mp...

The Forth MM benchmark was contributed by Marcel Hendrix, derived
from a C benchmark by Mark Smotherman. The code may be freely
redistributed. If you modify this code, and want the changes to
be incorporated in future releases, please send the changes to
Stephen Pelc.

The code is used to test MPE's VFX code generator and optimiser.
Except for legacy reasons, no attempt is made to maintain this
code for non-optimising and non-ANS systems.

The code is maintained to test ANS Forth systems. It does not take
advantage of system specific extensions, except for one (see below).
However, it uses the utility file from the Forth Scientific Library,
which is intended to be hacked for specific systems as much as you
can. Consequently, when you contribute results, please also contribute
the FSL utility file as well.

*************************************
On 2.8GHz P4, 512Mb DDR266 RAM, XPpro
*************************************

SwiftForth 2.00.3 19may2000
standard FPMATH.F and FSL-UTIL.F
all-tests
500x500 mm - normal algorithm                       3.282 secs.
500x500 mm - temporary variable in loop             9.781 secs.
500x500 mm - unrolled inner loop, factor of 4       9.969 secs.
500x500 mm - unrolled inner loop, factor of 8       9.922 secs.
500x500 mm - unrolled inner loop, factor of 16      9.906 secs.
500x500 mm - pointers used to access matrices       4.047 secs.
500x500 mm - pointers used, unrolled by 4           4.141 secs.
500x500 mm - transposed B matrix                    8.891 secs.
500x500 mm - interchanged inner loops               8.797 secs.
500x500 mm - blocking, step size of 20              9.234 secs.
500x500 mm - Robert's algorithm                     2.812 secs.
500x500 mm - T. Maeno's algorithm, subarray 20x20   3.766 secs.
500x500 mm - Generic Maeno, subarray 20x20          3.125 secs.
500x500 mm - D. Warner's algorithm, subarray 20x20  9.344 secs.
=========================================================
Total                                              97.017 secs. ok

iForth version 1.12.8722, generated 23:39:13, June 8, 2002.
optimised FSL_UTIL.FRT
500x500 mm - normal algorithm                       1.735 secs.
500x500 mm - temporary variable in loop             3.296 secs.
500x500 mm - unrolled inner loop, factor of 4       3.172 secs.
500x500 mm - unrolled inner loop, factor of 8       3.125 secs.
500x500 mm - unrolled inner loop, factor of 16      3.094 secs.
500x500 mm - pointers used to access matrices       1.734 secs.
500x500 mm - pointers used, unrolled by 4           1.578 secs.
500x500 mm - transposed B matrix                    1.687 secs.
500x500 mm - interchanged inner loops               2.938 secs.
500x500 mm - blocking, step size of 20              3.125 secs.
500x500 mm - Robert's algorithm                     0.593 secs.
500x500 mm - T. Maeno's algorithm, subarray 20x20   0.875 secs.
500x500 mm - Generic Maeno, subarray 20x20          0.922 secs.
500x500 mm - D. Warner's algorithm, subarray 20x20  1.765 secs.
=========================================================
Total                                              29.639 secs. ok

VFX Forth Version: 3.70 [build 1659] Build date: 5 July 2004
with optimising NDP387 and VFUTILS.FTH
all-tests
500x500 mm - normal algorithm                       1.641 secs.
500x500 mm - temporary variable in loop             2.766 secs.
500x500 mm - unrolled inner loop, factor of 4       2.563 secs.
500x500 mm - unrolled inner loop, factor of 8       2.562 secs.
500x500 mm - unrolled inner loop, factor of 16      2.563 secs.
500x500 mm - pointers used to access matrices       2.984 secs.
500x500 mm - pointers used, unrolled by 4           1.578 secs.
500x500 mm - transposed B matrix                    1.187 secs.
500x500 mm - interchanged inner loops               2.172 secs.
500x500 mm - blocking, step size of 20              2.328 secs.
500x500 mm - Robert's algorithm                     0.578 secs.
500x500 mm - T. Maeno's algorithm, subarray 20x20   0.578 secs.
500x500 mm - Generic Maeno, subarray 20x20          0.656 secs.
500x500 mm - D. Warner's algorithm, subarray 20x20  1.282 secs.
=========================================================
Total                                              25.438 secs. ok

===============================================================
[THEN]

\ ************************************************
\ Select system to be tested, set FORTHSYSTEM
\ to value of selected target.
\ Set SPECIFICS false to avoid system dependencies.
\ Set SPECIFICS true to show off implementation tricks.
\ Set HACKING false to use the base source code.
\ Set HACKING true to optimise the source code.
\ ************************************************

1  constant VfxForth3           \ MPE VFX Forth v3.x
2  constant Pfw22               \ MPE ProForth 2.2
3  constant SwiftForth20        \ FI SwiftForth 2.0
4  constant SwiftForth15        \ FI SwiftForth 1.5
5  constant Win32Forth          \ Win32Forth 4.2
6  constant BigForth            \ BigForth 11 July 1999
7  constant BigForth-Linux      \ BigForth 11 July 1999
8  constant iForth              \ iForth 1.12 5 Aug 2001
9  constant iForth20            \ iForth 2.0 8 June 2002
10  constant SwiftForth22       \ FI SwiftForth 2.2.2.9

\ VfxForth3 constant ForthSystem \ select system to test
\ iForth20 constant ForthSystem
\ SwiftForth22 constant ForthSystem
 Win32Forth constant ForthSystem

 false constant specifics       \ true to use system dependent code
 false constant hacking         \ true to use "guru" level code that
                                \ makes assumptions of an optimising compiler.
 true constant ANSSystem        \ Some Forth 83 systems cannot compile
                                \ all the test examples without carnal
                                \ knowledge, especially if the compiler
                                \ checks control structures.

: .specifics    \ -- ; display trick state
  ."  using"  specifics 0=
  if  ."  no"  then
  ."  extensions"
;

: .hacking      \ -- ; display hack state
  ."  using"  hacking 0=
  if  ."  no"  then
  ."  hackery"
;

: .testcond     \ -- ; display test conditions
  .specifics ."  and" .hacking
;

\ *****************************
\ VFX Forth for Windows harness
\ *****************************

VfxForth3 ForthSystem = [IF]

[defined] +idata [if]
  +idata        \ enable P4 data options
  variable zzz  \ preallocate first IDATA buffer
[then]

true constant ndp?      \ -- flag ; true if NDP stack version

c" C:\Products\PfwVfx.dev\WindowsBox\Sources\Lib"
  setmacro LibDir
c" C:\Products\PfwVfx.dev\WindowsBox\Sources\Lib\FSL\library"
  setmacro FslDir
c" C:\Products\PfwVfx.dev\WindowsBox\Sources\Lib"
  setmacro NdpDir

ndp? [if]
  S" %NdpDir%\Ndp387" INCLUDED
[else]
  S" %NdpDir%\Hfp387" INCLUDED
[then]

  char . dp-char !                      \ select ANS number conversion
  char . fp-char !

-short-branches                         \ disable short forward branches
S" %FslDir%\VfxUtil" INCLUDED           \ FSL harness for VFX Forth 3.x

: HTAB  out @ - spaces ; \ n -- ; step to position n
: DEC.  ( n -- ) BASE @ >R DECIMAL . R> BASE ! ;

extern: DWORD PASCAL GetTickCount( void );

: COUNTER       \ -- ms
  GetTickCount ;

: DFVARIABLE    #16 buffer:  ;

[THEN]

\ ******************************
\ iForth 2.0 for Windows harness
\ ******************************

iForth20 ForthSystem = [IF]

NEEDS -miscutil
NEEDS -dynlink
  0 VALUE 'counter
  S" kernel32.dll" LIBRARY-OPEN THROW ( dll)
  S" GetTickCount" ROT LIBRARY-FIND THROW TO 'counter

: counter ( -- ms ) 0 'counter FOREIGN ;

: defined       \ caddr -- flag
  find nip
;

\ include c:\dfwforth\examples\fsl\fsl_util.frt
include c:\dfwforth\include\fsl_util.frt

: &  ; IMMEDIATE

[THEN]

\ **********************
\ SwiftForth 2.0 harness
\ **********************

SwiftForth20 ForthSystem = [IF]

include C:\MyApps\SwiftForth20\Lib\Options\fpmath.f
: f<>   F= 0=  ;

include C:\MyApps\SwiftForth20\Lib\FSLib\Library\fsl-util.f

: HTAB    get-xy drop - spaces  ; \ n -- ; step to position n
: DEC.  ( n -- ) BASE @ >R DECIMAL . R> BASE ! ;

[THEN]

\ **********************
\ SwiftForth 2.2 harness
\ **********************

SwiftForth22 ForthSystem = [IF]

\ FPCONFIG.F should be in the BENCHMRK folder
include C:\MyApps\SwiftForth2229\Lib\Options\fpmath.f
: f<>   F= 0=  ;

include C:\MyApps\SwiftForth2229\Unsupported\FSLib\Library\fsl-util.f

: HTAB    get-xy drop - spaces  ; \ n -- ; step to position n
: DEC.  ( n -- ) BASE @ >R DECIMAL . R> BASE ! ;

[THEN]

\ ******************
\ Win32Forth harness
\ ******************

Win32Forth ForthSystem = [IF]

fload fsl_util.f

: COUNTER       \ -- ms
  Call GetTickCount ;

: >pos          \ n -- ; step to position n
  getxy drop - spaces ;

: HTAB  #TAB ; \ n -- ; step to position n

: M/            \ d n1 -- quot
  fm/mod nip ;

: buffer:       \ n -- ; -- addr
  create
    here  over allot  swap erase ;

: [o/n]         \ -- ; stop optimiser treating * DROP etc as no code
; immediate

: SendMessage   \ h m w l -- res
  4reverse      \ was swap 2swap swap   \ Win32Forth uses reverse order
  Call SendMessage ;

: GetTickCount  \ -- ms
  Call GetTickCount ;

: DEC.  ( n -- ) BASE @ >R DECIMAL . R> BASE ! ;

: F<>   ( F: r -- ) ( -- bool ) F= 0= ;

: DFVARIABLE ( -- ) CREATE 8 ALLOT ;

: 2+ ( n1 -- n2 ) 2 + ;

[THEN]

\ ********************
\ Start of common code
\ ********************

500 CONSTANT N  \ -- n ; number of iterations to use

\ ******************
\ Timing and display
\ ******************

50 CONSTANT TCOL

0 VALUE _time_
VARIABLE TotalTime

: TIMER-RESET ( -- )
  COUNTER TO _time_  ;

: #? ( d -- d )
  2DUP OR 0=
  IF  BL HOLD  ELSE  #  THEN ;

: .secs         \ ms --
  0 <#  BL HOLD # # # [char] . HOLD # #? #?  #> TYPE
  ." secs."
;

: .ELAPSED ( -- )
  TCOL HTAB
  COUNTER _time_ -
  dup TotalTime +!
  .secs
;

: .algo         \ caddr u --
\ Display size and algorithm from string
  CR N 0 .R  ." x"  N 0 .R  SPACE TYPE
;

\ *****
\ TOOLS
\ *****

DEFINED DF@+  NIP 0= [IF] : DF@+  ( addr -- addr' ) ( F: -- r ) DUP DF@ DFLOAT+ ; [THEN]
DEFINED DF+!  NIP 0= [IF] : DF+!  ( addr -- )       ( F: r -- ) DUP DF@ F+ DF!  ; [THEN]
DEFINED DF!+  NIP 0= [IF] : DF!+  ( addr -- addr' ) ( F: r -- ) DUP DF! DFLOAT+ ; [THEN]
DEFINED DF+!+ NIP 0= [IF] : DF+!+ ( addr -- addr' ) ( F: r -- ) DUP DF@ F+ DF!+ ; [THEN]

: *DSUM  ( addr1 addr2 count -- addr1' addr2' ) ( F: -- n )
  0e 0 ?DO  SWAP DF@+ SWAP DF@+ F* F+  LOOP ;

: *DSUML ( addr1 addr2 count stride2 -- addr1' addr2' ) ( F: -- r )
  LOCALS| stride2 |
  0e 0 ?DO
    SWAP DF@+
    SWAP DUP DF@ stride2 +
    F* F+
  LOOP ;

CHAR x CONSTANT 'x'
CHAR n CONSTANT 'n'
CHAR v CONSTANT 'v'
CHAR u CONSTANT 'u'
CHAR p CONSTANT 'p'
CHAR t CONSTANT 't'
CHAR i CONSTANT 'i'
CHAR b CONSTANT 'b'
CHAR m CONSTANT 'm'
CHAR r CONSTANT 'r'
CHAR w CONSTANT 'w'
CHAR z CONSTANT 'z'

\ =====================================================================================

DOUBLE DMATRIX  a{{
DOUBLE DMATRIX  b{{
DOUBLE DMATRIX  c{{
DOUBLE DMATRIX  d{{
DOUBLE DMATRIX bt{{

0 [IF] ================================================
\ Useful test bits

: .a{{          \ -- ; display A{{ matrix
  cr ." A{{"
  N 0 ?DO
    cr  N 0 ?DO
      a{{ J I }} DF@ F.
    LOOP
  LOOP
;

: .b{{          \ -- ; display B{{ matrix
  cr ." B{{"
  N 0 ?DO
    cr  N 0 ?DO
      b{{ J I }} DF@ F.
    LOOP
  LOOP
;

: .c{{          \ -- ; display B{{ matrix
  cr ." C{{"
  N 0 ?DO
    cr  N 0 ?DO
      c{{ J I }} DF@ F.
    LOOP
  LOOP
;

: .bt{{         \ -- ; display B{{ matrix
  cr ." BT{{"
  N 0 ?DO
    cr  N 0 ?DO
      bt{{ J I }} DF@ F.
    LOOP
  LOOP
;

[THEN]

: SET-COEFFICIENTS ( -- )
\ Set coefficients so that result matrix should have row entries
\ equal to (1/2)*n*(n-1)*i in row i
  N 0 ?DO
    N 0 ?DO
      J S>F FDUP b{{ J I }} DF!  a{{ J I }} DF!
    LOOP
  LOOP ;

: FLUSH-CACHE ( -- )
  N 0 ?DO
    N 0 ?DO
      0e d{{ J I }} DF!
    LOOP
  LOOP ;

FVARIABLE row_sum
FVARIABLE sum

: CHECK-RESULT ( -- )
  0e row_sum F!
  N N 1- * 2/ S>F sum F!
  N 0 ?DO
    I S>F sum F@ F* row_sum F!
    N 0 ?DO
      a{{ J I }} DF@  J S>F  F<> IF
        CR ." error in result entry a{{ " J DEC. I DEC. ." }}: "
        a{{ J I }} DF@ F. ." <> "  J S>F F.
        UNLOOP UNLOOP EXIT
      THEN
      b{{ J I }} DF@ J S>F  F<> IF
        CR ." error in result entry b{{ " J DEC. I DEC. ." }}: "
        b{{ J I }} DF@ F. ." <> "  J S>F F.
        UNLOOP UNLOOP EXIT
      THEN
      c{{ J I }} DF@ row_sum F@ F<> IF
        CR ." error in result entry c{{ " J DEC. I DEC. ." }}: "
        c{{ J I }} DF@ F. ." <> " row_sum F@ F.
        UNLOOP UNLOOP EXIT
      THEN
    LOOP
  LOOP  ;

: NORMAL() ( -- )
  s" mm - normal algorithm" .algo
  TIMER-RESET
  N 0 ?DO
    N 0 ?DO
      a{{ J 0 }} b{{ 0 I }} N N DFLOATS *DSUML 2DROP  c{{ J I }} DF!
    LOOP
  LOOP
  .ELAPSED  ;

: TNSQ() ( -- )
  0 LOCALS| K |
  s" mm - temporary variable in loop" .algo
  TIMER-RESET
  N 0 ?DO  I TO K
    N 0 ?DO
      a{{ J 0 }} DF@  b{{ 0 I }} DF@ F*
      N 1 ?DO
        a{{ K I }} DF@  b{{ I J }} DF@ F* F+
      LOOP
      c{{ J I }} DF!
    LOOP
  LOOP
  .ELAPSED ;

: UNROLL4() ( -- )
  0 0 LOCALS| K S |
  s" mm - unrolled inner loop, factor of 4" .algo
  TIMER-RESET
  N 0 ?DO  I TO K
    N 0 ?DO  0e  0 TO S
      N 3 - 0 ?DO  I TO S
        a{{ K I     }} DF@  b{{ I     J }} DF@ F* F+
        a{{ K I 1+  }} DF@  b{{ I 1+  J }} DF@ F* F+
        a{{ K I 2+  }} DF@  b{{ I 2+  J }} DF@ F* F+
        a{{ K I 3 + }} DF@  b{{ I 3 + J }} DF@ F* F+
      4 +LOOP
      N  S 4 + ?DO
        a{{ K I     }} DF@  b{{ I     J }} DF@ F* F+
      LOOP
      c{{ J I }} DF!
    LOOP
  LOOP
  .ELAPSED ;

: UNROLL8() ( -- )
  0 0 LOCALS| K S |
  s" mm - unrolled inner loop, factor of 8" .algo
  TIMER-RESET
  N 0 ?DO  I TO K
    N 0 ?DO  0e  0 TO S
      N 7 - 0 ?DO  I TO S
        a{{ K I     }} DF@  b{{ I     J }} DF@ F* F+
        a{{ K I 1+  }} DF@  b{{ I 1+  J }} DF@ F* F+
        a{{ K I 2+  }} DF@  b{{ I 2+  J }} DF@ F* F+
        a{{ K I 3 + }} DF@  b{{ I 3 + J }} DF@ F* F+
        a{{ K I 4 + }} DF@  b{{ I 4 + J }} DF@ F* F+
        a{{ K I 5 + }} DF@  b{{ I 5 + J }} DF@ F* F+
        a{{ K I 6 + }} DF@  b{{ I 6 + J }} DF@ F* F+
        a{{ K I 7 + }} DF@  b{{ I 7 + J }} DF@ F* F+
      8 +LOOP
      N  S 8 + ?DO
        a{{ K I     }} DF@  b{{ I     J }} DF@ F* F+
      LOOP
      c{{ J I }} DF!
    LOOP
  LOOP
  .ELAPSED ;

: UNROLL16() ( -- )
  0 0 LOCALS| K S |
  s" mm - unrolled inner loop, factor of 16" .algo
  TIMER-RESET
  N 0 ?DO  I TO K
    N 0 ?DO  0e 0 TO S
      N 15 - 0 ?DO  I TO S
        a{{ K I      }} DF@  b{{ I      J }} DF@ F* F+
        a{{ K I  1+  }} DF@  b{{ I  1+  J }} DF@ F* F+
        a{{ K I  2+  }} DF@  b{{ I  2+  J }} DF@ F* F+
        a{{ K I  3 + }} DF@  b{{ I  3 + J }} DF@ F* F+
        a{{ K I  4 + }} DF@  b{{ I  4 + J }} DF@ F* F+
        a{{ K I  5 + }} DF@  b{{ I  5 + J }} DF@ F* F+
        a{{ K I  6 + }} DF@  b{{ I  6 + J }} DF@ F* F+
        a{{ K I  7 + }} DF@  b{{ I  7 + J }} DF@ F* F+
        a{{ K I  8 + }} DF@  b{{ I  8 + J }} DF@ F* F+
        a{{ K I  9 + }} DF@  b{{ I  9 + J }} DF@ F* F+
        a{{ K I 10 + }} DF@  b{{ I 10 + J }} DF@ F* F+
        a{{ K I 11 + }} DF@  b{{ I 11 + J }} DF@ F* F+
        a{{ K I 12 + }} DF@  b{{ I 12 + J }} DF@ F* F+
        a{{ K I 13 + }} DF@  b{{ I 13 + J }} DF@ F* F+
        a{{ K I 14 + }} DF@  b{{ I 14 + J }} DF@ F* F+
        a{{ K I 15 + }} DF@  b{{ I 15 + J }} DF@ F* F+
      16 +LOOP
      N S  16 + ?DO
        a{{ K I      }} DF@  b{{ I      J }} DF@ F* F+
      LOOP
      c{{ J I }} DF!
    LOOP
  LOOP
  .ELAPSED ;

: UNROLL ( n -- )
  CASE
     4 OF UNROLL4()  ENDOF
     8 OF UNROLL8()  ENDOF
    16 OF UNROLL16() ENDOF
          CR ." mm - unrolled inner loop, factor of " DUP DEC. ." not implemented"
  ENDCASE ;

specifics [if]
  VfxForth3 ForthSystem = [if]  -fasti  [then]
[then]

: PNSQ4() ( -- )
  0 LOCALS| S |
  s" mm - pointers used, unrolled by 4" .algo
  TIMER-RESET
  N 0 ?DO
    N 0 ?DO
      0e  a{{ J 0 }}  b{{ 0 I }}  0 TO S
      N 3 -   0 ?DO  I TO S
        SWAP DF@+ SWAP DUP DF@ F* F+ N DFLOATS +
        SWAP DF@+ SWAP DUP DF@ F* F+ N DFLOATS +
        SWAP DF@+ SWAP DUP DF@ F* F+ N DFLOATS +
        SWAP DF@+ SWAP DUP DF@ F* F+ N DFLOATS +
      4 +LOOP
      N   S 4 + ?DO
        SWAP DF@+ SWAP DUP DF@ F* F+ N DFLOATS +
      LOOP
      c{{ J I }} DF!  2DROP
    LOOP
  LOOP
  .ELAPSED ;

: PNSQ() ( n -- )
  DUP 4 =
  IF  DROP PNSQ4() EXIT  THEN
  s" mm - pointers used to access matrices" .algo
  ?DUP IF
    ." , unroll factor of " DEC. ." not allowed" EXIT
  THEN
  TIMER-RESET
  N 0 ?DO
    N 0 ?DO
      0e  a{{ J 0 }}  b{{ 0 I }}
      N 0 ?DO
        SWAP DF@+ SWAP
        DUP DF@ N DFLOATS +
        F* F+
\        DUP DF@
\        F* F+
\        N DFLOATS +
      LOOP
      c{{ J I }} DF!  2DROP
    LOOP
  LOOP
  .ELAPSED ;

specifics [if]
  VfxForth3 ForthSystem = [if]  +fasti  [then]
[then]

: TRANSPOSE() ( -- )
  0 LOCALS| K |
  s" mm - transposed B matrix" .algo
  TIMER-RESET
  N 0 ?DO
    N 0 ?DO
      b{{ J I }} DF@  bt{{ I J }} DF!
    LOOP
  LOOP
  N 0 ?DO  I TO K
    N 0 ?DO
      a{{ J 0 }} DF@  bt{{ I 0 }} DF@ F*
      N 1 ?DO
        a{{ K I }} DF@  bt{{ J I }} DF@ F* F+
      LOOP
      c{{ J I }} DF!
    LOOP
  LOOP
  .ELAPSED ;

\ from Monica Lam ASPLOS-IV paper
: REG_LOOPS() ( -- )
  0 LOCALS| K |
  s" mm - interchanged inner loops" .algo
  TIMER-RESET
  N 0 ?DO
    N 0 ?DO
      0e c{{ J I }} DF!
    LOOP
  LOOP
  N 0 ?DO  I TO K
    N 0 ?DO
      a{{ J I }} DF@
      N 0 ?DO
        FDUP b{{ J I }} DF@ F* c{{ K I }} DF+!
      LOOP
      FDROP
    LOOP
  LOOP
  .ELAPSED ;

: TILING() ( step -- )
  s" mm - blocking, step size of " .algo  DUP DEC.
  DUP 4 N 1+ WITHIN 0= IF
    drop ." is unreasonable"  EXIT
  THEN
  TIMER-RESET
  0 0 0 LOCALS| K kk jj step |
  N 0 ?DO  N 0 ?DO  0e c{{ J I }} DF!  LOOP LOOP
  N 0 ?DO  I TO kk
    N 0 ?DO  I TO jj
      N 0 ?DO   I TO K
        kk step + N MIN  kk ?DO
          a{{ J I }} DF@
          jj step + N MIN  jj ?DO
            FDUP b{{ J I }} DF@ F*  c{{ K I }} DF+!
          LOOP
          FDROP
        LOOP
      LOOP
    step +LOOP
  step +LOOP
  .ELAPSED
;

\ ********************************************/
\ * Contributed by Robert Debath 26 Nov 1995 */
\ * rd...@ci...              */
\ ********************************************/

: ROBERT() ( -- )
  s" mm - Robert's algorithm" .algo
  TIMER-RESET
  N 0 ?DO  N 0 ?DO
    b{{ J I }} DF@  bt{{ I J }} DF!
  LOOP LOOP
  N 0 ?DO  N 0 ?DO
    a{{ J 0 }} bt{{ I 0 }} N *DSUM 2DROP c{{ J I }} DF!
  LOOP LOOP
  .ELAPSED
;

0 [IF] ===========================================================================
 * Matrix Multiply by Dan Warner, Dept. of Mathematics, Clemson University
 *
 *    mmbu2.f multiplies matrices a and b
 *    a and b are n by n matrices
 *    nb is the blocking parameter.
 *    the tuning guide indicates nb = 50 is reasonable for the
 *    ibm model 530 hence 25 should be reasonable for the 320
 *    since the 320 has 32k rather than 64k of cache.
 *    Inner loops unrolled to depth of 2
 *    The loop functions without clean up code at the end only
 *    if the unrolling occurs to a depth k which divides into n
 *    in this case n must be divisible by 2.
 *    The blocking parameter nb must divide into n if the
 *    multiply is to succeed without clean up code at the end.
 *
 * converted to c by Mark Smotherman
 * note that nb must also be divisible by 2 => cannot use 25, so use 20
=========================================================================== [THEN]

DFVARIABLE s10
DFVARIABLE s00
DFVARIABLE s01
DFVARIABLE s11

: WARNER() ( nb -- )
  0 0 0 0 LOCALS| K ii jj kk nb |
  s" mm - D. Warner's algorithm, subarray" .algo  nb 0 .R  'x' EMIT  nb 0 .R
  N nb MOD  N 2 MOD OR IF
    cr ."   the matrix size " N DEC.
    ." must be divisible both by the block size " nb DEC. ." and 2."
    EXIT
  THEN
  nb 2 MOD IF
    ."   block size must be evenly divisible by 2"
    EXIT
  THEN
  TIMER-RESET
  N 0 ?DO  I TO ii
    N 0 ?DO  I TO jj
      nb ii + ii ?DO
        nb jj + jj ?DO
          0e c{{ J I }} DF!
        LOOP
      LOOP
      N 0 ?DO  I TO kk
        nb ii + ii ?DO  I TO K
          nb jj + jj ?DO
            c{{ J     I    }} DF@ s00 DF!
            c{{ J     I 1+ }} DF@ s01 DF!
            c{{ J 1+  I    }} DF@ s10 DF!
            c{{ J 1+  I 1+ }} DF@ s11 DF!
            nb kk + kk ?DO
              a{{ K    I }} DF@  b{{ I J    }} DF@ F* s00 DF+!
              a{{ K    I }} DF@  b{{ I J 1+ }} DF@ F* s01 DF+!
              a{{ K 1+ I }} DF@  b{{ I J    }} DF@ F* s10 DF+!
              a{{ K 1+ I }} DF@  b{{ I J 1+ }} DF@ F* s11 DF+!
            LOOP
            s00 DF@ c{{ J     I    }} DF!
            s01 DF@ c{{ J     I 1+ }} DF!
            s10 DF@ c{{ J 1+  I    }} DF!
            s11 DF@ c{{ J 1+  I 1+ }} DF!
          2 +LOOP
        2 +LOOP
      nb +LOOP
    nb +LOOP
  nb +LOOP
  .ELAPSED ;

0 [IF] ===========================================================================
Matrix Multiply tuned for SS-10/30;
 *                      Maeno Toshinori
 *                      Tokyo Institute of Technology
 *
 * Using gcc-2.4.1 (-O2), this program ends in 12 seconds on SS-10/30.
 *
 * in original algorithm - sub-area for cache tiling
 * #define      L       20
 * #define      L2      20
 * three 20x20 matrices reside in cache; two may be enough
=========================================================================== [THEN]

DFVARIABLE t0
DFVARIABLE t1
DFVARIABLE t2
DFVARIABLE t3
DFVARIABLE t4
DFVARIABLE t5
DFVARIABLE t6
DFVARIABLE t7

: MAENO() ( nb -- )
  0 0 0 0 0 LOCALS| K it kt i2 kk lparm |
  s" mm - T. Maeno's algorithm, subarray" .algo  lparm 0 .R 'x' EMIT lparm 0 .R
  N lparm MOD  N 4 MOD OR IF
    cr ."   the matrix size " N DEC.
    ." must be divisible both by the block size " lparm DEC. ." and 4."
    EXIT
  THEN
  lparm 4 MOD IF
    cr ."   block size must be evenly divisible by 4"
    EXIT
  THEN
  TIMER-RESET
  N 0 ?DO N 0 ?DO  0e c{{ J I }} DF!  LOOP LOOP
  N 0 ?DO  I TO i2
    N 0 ?DO  I TO kk
      i2 lparm + TO it
      kk lparm + TO kt
      N 0 ?DO  I TO K
        it i2 ?DO
          0e t0 DF!  0e t1 DF!  0e t2 DF!  0e t3 DF!
          0e t4 DF!  0e t5 DF!  0e t6 DF!  0e t7 DF!
          kt  kk ?DO
            a{{ J I }} DF@
            FDUP b{{ I K }} DUP DF@+ F* t0 DF+!
            FDUP                DF@+ F* t1 DF+!
            FDUP                DF@+ F* t2 DF+!
                                DF@  F* t3 DF+!
            a{{ J 1+ I }}  DF@
            FDUP                DF@+ F* t4 DF+!
            FDUP                DF@+ F* t5 DF+!
            FDUP                DF@+ F* t6 DF+!
                                DF@  F* t7 DF+!
          LOOP
          t0 DF@ c{{ I    J }}  DF+!+
          t1 DF@                DF+!+
          t2 DF@                DF+!+
          t3 DF@                DF+!
          t4 DF@ c{{ I 1+ J }}  DF+!+
          t5 DF@                DF+!+
          t6 DF@                DF+!+
          t7 DF@                DF+!
        2 +LOOP
      4 +LOOP
    lparm +LOOP
  lparm +LOOP
  .ELAPSED ;

: MM_MAENO      \ pip1 pip2 pop3 nb --
\ Takes pointers to two FSL input arrays, an FSL output array and a
\ block size nb.
  0 0 0 0 0 LOCALS| K it kt i2 kk lparm pop3{{ pip2{{ pip1{{ |
  N 0 ?DO N 0 ?DO  0e pop3{{ J I }} DF!  LOOP LOOP
  N 0 ?DO  I TO i2
    N 0 ?DO  I TO kk
      i2 lparm + TO it
      kk lparm + TO kt
      N 0 ?DO  I TO K
        it i2 ?DO
          0e t0 DF!  0e t1 DF!  0e t2 DF!  0e t3 DF!
          0e t4 DF!  0e t5 DF!  0e t6 DF!  0e t7 DF!
          kt  kk ?DO
            pip1{{ J I }} DF@
            FDUP pip2{{ I K }}  DUP DF@+ F* t0 DF+!
            FDUP                DF@+ F* t1 DF+!
            FDUP                DF@+ F* t2 DF+!
                                DF@  F* t3 DF+!
            pip1{{ J 1+ I }}    DF@
            FDUP                DF@+ F* t4 DF+!
            FDUP                DF@+ F* t5 DF+!
            FDUP                DF@+ F* t6 DF+!
                                DF@  F* t7 DF+!
          LOOP
          t0 DF@ pop3{{ I    J }}  DF+!+
          t1 DF@                DF+!+
          t2 DF@                DF+!+
          t3 DF@                DF+!
          t4 DF@ pop3{{ I 1+ J }}  DF+!+
          t5 DF@                DF+!+
          t6 DF@                DF+!+
          t7 DF@                DF+!
        2 +LOOP
      4 +LOOP
    lparm +LOOP
  lparm +LOOP
;

: MAENO2()      \ nb --
  s" mm - Generic Maeno, subarray " .algo  dup 0 .R  'x' EMIT  dup 0 .R
  N over MOD  N 4 MOD OR IF
    cr ."   the matrix size " N DEC.
    ." must be divisible both by the block size " DEC. ." and 4."
    EXIT
  THEN
  dup 4 MOD IF
    drop
    cr ."   block size must be evenly divisible by 4"
    EXIT
  THEN
  TIMER-RESET
  >r a{{ b{{ c{{ r> MM_MAENO
  .ELAPSED ;

: MM ( char n -- )
  DEPTH 0= ABORT" no algorithm chosen"
  DEPTH 2 < IF 0 THEN
  LOCALS| ur |
  &  a{{ N N }}malloc \ malloc-fail?
  &  b{{ N N }}malloc \ malloc-fail? OR
  & bt{{ N N }}malloc \ malloc-fail? OR
  &  c{{ N N }}malloc \ malloc-fail? OR
  &  d{{ N N }}malloc \ malloc-fail? OR ABORT" MM :: out of core"
  SET-COEFFICIENTS
  FLUSH-CACHE
  CASE
    'n' OF NORMAL()    ENDOF
    'v' OF TNSQ()      ENDOF
    'u' OF ur UNROLL   ENDOF
    'p' OF ur PNSQ()   ENDOF
    't' OF TRANSPOSE() ENDOF
    'i' OF REG_LOOPS() ENDOF
    'b' OF ur TILING() ENDOF
    'm' OF ur MAENO()  ENDOF
    'z' OF ur MAENO2() ENDOF
    'r' OF ROBERT()    ENDOF
    'w' OF ur WARNER() ENDOF
           CR ." `" DUP EMIT ." ' is an invalid algorithm"
  ENDCASE
  CHECK-RESULT
  &  d{{ }}free
  &  c{{ }}free
  & bt{{ }}free
  &  b{{ }}free
  &  a{{ }}free
  key? drop                     \ Permits o/p update on some systems
;

: ALL-TESTS ( -- )
  page
  0 TotalTime !
  'n'    mm    'v'    mm    'u'  4 mm    'u'  8 mm
  'u' 16 mm    'p'    mm    'p'  4 mm    't'    mm
  'i'    mm    'b' 20 mm    'r'    mm
  'm' 20 mm    'z' 20 mm    'w' 20 mm
  cr  TCOL 7 + 0
  ?DO  ." ="  LOOP
  cr ." Total" .testcond TCOL HTAB  TotalTime @ .secs
;

: .ABOUT
  CR ." Try: 'n'    mm  -- normal"
  CR ."      'v'    mm  -- with a temporary variable in the inner loop"
  CR ."      'u'  n mm  -- with unrolled (by n) inner loop, n = {4,8,16}"
  CR ."      'p'    mm  -- using pointers instead of array notation"
  CR ."      'p'  4 mm  -- using pointers instead of array notation, unrolled by 4 [new]"
  CR ."      't'    mm  -- with transposed b matrix"
  CR ."      'i'    mm  -- with switched inner loops"
  CR ."      'b'  n mm  -- using blocking by n, 4 < n < " N DEC.
  CR ."      'r'    mm  -- using Robert's algorithm"
  CR ."      'r'  8 mm  -- using Robert's algorithm unrolled by 8"
  CR ."      'm'  n mm  -- using Maeno's algorithm with blocking factor n"
  CR ."      'z'  n mm  -- using Maeno's algorithm with blocking factor n - generic form"
  CR ."      'w'  n mm  -- using Warner's algorithm with blocking factor n"
  CR
  CR ." ALL-TESTS       -- test all algorithms" ;

.ABOUT

( * End of Source * )

--- NEW FILE: matrix.f ---
\
\ matrix.f  (Win32Forth version)
\
\ Integer and floating point matrix manipulation routines for Forth
\   systems which use a separate floating point stack and which do NOT
\   have kForth style data typing.
\
\ Copyright (c) 1998--2002 Krishna Myneni
\
\ Revisions:
\
\	12-29-1998
\	3-29-1999   added rc>frc KM
\	12-25-1999  updated  KM
\	05-10-2000  fixed determ for singular matrix  KM
\	05-17-2000  added defining words for matrices  KM
\	08-10-2001  improved efficiency of several matrix words;
\	              about 10% faster execution in real apps  KM
\       12-09-2002  begin port to Forths with separate fp stack;
\		      changed all references to dfloats to floats  KM
\       12-14-2002  finished port to pfe and gforth. cleaned up
\                     determ and matinv by using values and the
\		      loop indexing word K	 KM
\       12-16-2002  fixed fmat_addr when size of float is not 2 cells KM
\
\ Notes:
\
\   Usage:
\	n m matrix name
\	n m fmatrix name
\
\   Examples:
\
\	3 5 matrix alpha 	( create a 3 by 5 integer matrix called alpha )
\	3 3 fmatrix beta	( create a 3 by 3 floating pt matrix, beta )

\ Memory storage format for matrices:
\   The first four bytes contains ncols and the next four bytes contains
\   nrows. The matrix data is stored next in row order.
\
\ Indexing Convention:
\	Top left element is 1, 1
\	Bottom right element is nrows, ncols
\
\ matinv and determ are based on routines from P.R. Bevington,
\   "Data Reduction and Error Analysis for the Physical Sciences".
\
\ The word K is assumed to be defined. It provides the second outer
\   loop index, as an extension of I, J, ...
\
\ Some Forths may require the following defs:
\
\ : s>f s>d d>f ;
: ?allot here swap allot ;
: float- [ 1 floats ] literal - ;

: rc_index ( n -- rc | generate an rc with a running index 1 to n )
	dup 1+ 1 do i swap loop ;

: rc_neg ( rc1 -- rc2 | negate the values in the rc )
	sp@ cell+ over 0 do dup @ negate over ! cell+ loop drop ;

: rc_dup ( rc -- rc rc | duplicate an rc on the stack )
	dup 1+ dup 0 do dup pick swap loop drop ;

: rc_max ( rc -- n | find max value in rc )
	1- dup 0> if 0 do max loop else drop then ;

: rc_min ( rc -- n | find min value in rc )
	1- dup 0> if 0 do min loop else drop then ;

: frc_index ( n -- | generate fp running index )
	    ( F: -- 1e 2e ... ne )
	dup 1+ 1 do i s>f loop ;

: frc_neg ( n -- n | negate the values in the frc )
	  ( F: f1 f2 ... fn -- -f1 -f2 ... -fn )
	dup dup 1- floats floatsp + swap
	0 do dup f@ fnegate dup f! float- loop drop ;

: frc_dup ( n -- n n | duplicate an frc on the stacks )
	  ( F: f1 f2 ... fn -- f1 f2 ... fn f1 f2 ... fn )
	dup dup 1- floats floatsp + swap 0 do dup f@ float- loop drop dup ;

: frc_max ( n --  | find max value in frc )
	  ( F: f1 f2 ... fn -- fmax )
	1- dup 0> if 0 do fmax loop else drop then ;

: frc_min ( n --  | find min value in frc )
	  ( F: f1 f2 ... fn -- fmin )
	1- dup 0> if 0 do fmin loop else drop then ;

: rc>frc ( m1 m2 ... mn n -- n | convert integer rc to frc )
	 ( F: -- f1 f2 ... fn )
	dup 0 do dup i - roll s>f loop ;

: mat_size@ ( a -- nrows ncols | gets the matrix size )
	dup cell+ @ swap @ ;

: mat_size! ( nrows ncols a -- | set up the matrix size )
	dup >r ! r> cell+ ! ;

: mat_addr ( i j a -- a2 | returns address of the i j element of a )
	>r cells swap 1- r@ @ * cells + cell+ r> + ;

: mat@ ( i j a -- n | returns the i j element of a )
	mat_addr @ ;

: mat! ( n i j a -- | store n as the i j element of a )
	mat_addr ! ;

: mat_zero ( a -- | zero all entries in matrix )
	dup mat_size@ * >r 1 1 rot mat_addr r>
	0 do 0 over ! cell+ loop drop ;

: row@ ( i a -- rc | fetch row i onto the stack as an rc )
	dup @ >r 1 swap mat_addr r> dup
	0 do over @ -rot swap cell+ swap loop nip ;

: row! ( rc i a -- | store rc as row i of matrix a )
	dup @ dup >r swap mat_addr r>
	0 do rot over ! 4 - loop 2drop ;

: col@ ( j a -- rc | fetch column j onto the stack as an rc )
	dup mat_size@ cells 2>r 1 -rot mat_addr 2r>
	swap dup >r
	0 do over @ -rot swap over + swap loop 2drop r> ;

: col! ( rc j a -- | store rc as column j of matrix a )
	dup mat_size@ cells >r dup >r -rot mat_addr r> r>
	swap
	0 do >r rot over ! r@ - r> loop 2drop drop ;

: row_swap ( i j a -- | swap rows i and j of matrix a )
	tuck 2dup 2>r 2over 2>r
	2>r row@ 2r> row@
	2r> row! 2r> row! ;

: col_swap ( i j a -- | swap columns i and j of matrix a )
	tuck 2dup 2>r 2over 2>r
	2>r col@ 2r> col@
	2r> col! 2r> col! ;

: mat. ( a -- | print out the mat...

[truncated message content]

[Win32forth-cvs] win32forth-stc/demos/benchmarks MM.F, NONE, 1.1 Testpde.f, NONE, 1.1 bench.f, NONE

[Win32forth-cvs] win32forth-stc/demos/benchmarks MM.F, NONE, 1.1 Testpde.f, NONE, 1.1 bench.f, NONE, 1.1 fsl_util.f, NONE, 1.1 matrix.f, NONE, 1.1 pde1.f, NONE, 1.1