Re: [Ocaml-lib-devel] New Library 1: XList

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

"Nicolas Cannasse" <war...@fr...> writes:
> - while talking about performance, could someone do some test program for it
> ? I would like to test for example the improvement of inlining the setcdr

I have a "beta" implementation of something like the Perl Benchmark
module:

 http://www.bagley.org/~doug/ocaml/benchmark/

It can help to answer such questions as: "is it faster to do it this
way or that way?"

I've attached an example below, which indicates, as one might expect,
that inlining helps with byte code, but not so much with optimized
code.

The example also has an alternative for your "init" function which
seems a little faster.

Cheers,
Doug
-- 

(* foo.ml ...

COMPILE:
 ocamlc -I +site-lib/benchmark unix.cma benchmark.cma -o foo foo.ml
 ocamlopt -I +site-lib/benchmark unix.cmxa benchmark.cmxa -o foo.opt foo.ml

RESULTS:
> ./foo
Throughputs for init1, init2 ...
     init1: 21 WALL (20.97 usr +  0.02 sys = 20.99 CPU) @ 156.93/s (n=3294)
     init2: 21 WALL (20.91 usr +  0.02 sys = 20.93 CPU) @ 71.91/s (n=1505)

        Rate init2 init1
init2 71.9/s    --  -54%
init1  157/s  118%    --

Throughputs for duplicate/ext, duplicate/inl ...
duplicate/ext: 21 WALL (20.82 usr +  0.02 sys = 20.84 CPU) @ 81.33/s (n=1695)
duplicate/inl: 21 WALL (20.95 usr +  0.01 sys = 20.96 CPU) @ 102.96/s (n=2158)

                Rate duplicate/ext duplicate/inl
duplicate/ext 81.3/s            --          -21%
duplicate/inl  103/s           27%            --

> ./foo.opt
Throughputs for init1, init2 ...
     init1: 22 WALL (20.92 usr +  0.02 sys = 20.94 CPU) @ 299.52/s (n=6272)
     init2: 22 WALL (21.57 usr +  0.01 sys = 21.58 CPU) @ 146.15/s (n=3154)

       Rate init2 init1
init2 146/s    --  -51%
init1 300/s  105%    --

Throughputs for duplicate/ext, duplicate/inl ...
duplicate/ext: 21 WALL (21.17 usr +  0.01 sys = 21.18 CPU) @ 126.25/s (n=2674)
duplicate/inl: 21 WALL (21.16 usr +  0.01 sys = 21.17 CPU) @ 128.72/s (n=2725)

               Rate duplicate/ext duplicate/inl
duplicate/ext 126/s            --           -2%
duplicate/inl 129/s            2%            --

 *)

open Printf
open Benchmark

let setcdr : 'a list -> 'a list -> unit = fun c v -> 
    Obj.set_field (Obj.repr c) 1 (Obj.repr v)

let rec duplicate_aux dst = function
    | [] -> dst
    | h :: t -> 
        let r = [ h ] in
        setcdr dst r;
        duplicate_aux r t

let duplicate = function
    | [] -> assert false
    | h :: t ->
        let r = [ h ] in
        r, (duplicate_aux r t)

let rec duplicate_aux_i dst = function
    | [] -> dst
    | h :: t -> 
        let r = [ h ] in
	Obj.set_field (Obj.repr dst) 1 (Obj.repr r);
        duplicate_aux_i r t

let duplicate_i = function
    | [] -> assert false
    | h :: t ->
        let r = [ h ] in
        r, (duplicate_aux_i r t)

let init1 n f =
   let rec loop seq m =
     if m < 0 then seq else loop ((f m) :: seq) (pred m) in
   loop [] (pred n)

let rec init2 size f =
	let rec loop dst n =
		if n < size then
			let h = [ f n ] in
			setcdr dst h;
			loop h (n+1)
	in
	if size = 0 then [] 
	else if size < 0 then invalid_arg "ExtList.init"
	else
		let h = [ f 0 ] in
		loop h 1;
		h

let _ =
  let test_init1 x = init1 x (fun x -> x)
  and test_init2 x = init2 x (fun x -> x) in

  let res = throughputN 20
	      [("init1", test_init1, 10000);
               ("init2", test_init2, 10000)] in
  print_newline ();
  tabulate res;
  print_newline ();

  let l1 = init1 10000 (fun x -> x) in
  let res = throughputN 20
	      [("duplicate/ext", duplicate, l1);
               ("duplicate/inl", duplicate_i, l1)] in
  print_newline ();
  tabulate res