Thread: [Ocaml-lib-devel] Extlib.list.unique

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Hello evrybody.

Thanks for that lib I use evryday.

But, I notice that ExtList.unique is in O(n^2) this is a problem for me,
I often use list over 100000 elements(and some time over million).
So I wrote a replacement function that is faster O(n+n*log(n)).
It use about 5X the space of the parameter list. On my computer (512 Mo)
the limit before the stak overflow is just over a million.

let delete_double_unsort ?(last=false) ?(cmp=Pervasives.compare) l =
  let lg = List.length l in
      let tcn = Array.make lg (List.hd l,0) in
      let rec add_num n = function
	  [] -> ()
	| h :: t -> tcn.(n) <- (h,n) ; (add_num (succ n) t)
      in
      let () = add_num 0 l in
      let compare x y = cmp (fst x) (fst y) in
      let tcn' = Array.copy tcn in
      let () = Array.stable_sort compare tcn in
      let plg = lg -1 in
      let rec get_dbl ac n =
	if n == plg then ac
	else
	  let n' = n+1 in
	  let h1 = tcn.(n) in
	  let h2 = tcn.(n') in
	  if (cmp (fst h1) (fst h2)) == 0
	  then get_dbl ((snd (if last then h1 else h2))::ac) n'
	  else get_dbl ac n'
      in
      let dbl = get_dbl [] 0 in
      let sdbl = List.sort Pervasives.compare dbl in
      let rec delete n = function
	  [] ->
	    let ac = ref [] in
	    for i = n to lg -1 do
	      ac := (fst tcn'.(i)) :: !ac
	    done;
	    List.rev !ac
	| na::ta when na == (snd tcn'.(n)) -> delete (n+1) ta
	| l ->  (fst tcn'.(n)) :: (delete (n+1) l)
      in delete 0 sdbl
;;

As you can see this function support a additionnal option that let you
select if you want delete the first or the last occurence of a term.
For the "equivalence" with ExtList.unique set "~last:true".

for 100000 elements :
Extlib.list.unique : 165.850 seconds
this function : 0.380 seconds
for 1000000 elements :
Extlib.list.unique : stop by error after 2 hours...
this function : 9.600 seconds

Even if you don't want to include it in the ExtLib I would be glade if
someone know how to improve it.

Here few test code:
(*
let bench n p =
  Random.self_init();
  let rec new_list n =
    if n == 0 then [] else (Random.int 100000)::(new_list (pred n))
  in
  let a_bench n =
    let l  = new_list n in Gc.compact();
    (*let t1 = Sys.time() in  ignore (delete_double_unsort_list l);*)
    (*let t1 = Sys.time() in  ignore (delete_double l);*)
    let t1 = Sys.time() in ignore ();(*(ExtLib.List.unique l);*)
    let t2 = Sys.time() in Gc.compact();
    (*let t3 = Sys.time() in ignore (ExtLib.List.unique l);*)
    let t3 = Sys.time() in ignore (delete_double_unsort l);
    let t4 = Sys.time() in
     (t2-.t1,t4-.t3)
  in
  let rec make_bench r n p =
    if p == 0 then r else make_bench ((a_bench n)::r) n (pred p)
  in
  let somme r =
    let a,b = List.split r in
      List.fold_left (+.) 0. a, List.fold_left (+.) 0. b
  in
  let t1,t2 = somme (make_bench [] n p) in
    Printf.printf "\n[%.3f] [%.3f]\n" t1 t2
;;
bench (int_of_string Sys.argv.(1)) (int_of_string Sys.argv.(2))
*)
(*
let verif n =
  Random.self_init();
  let rec new_list n =
    if n == 0 then [] else (Random.int 10)::(new_list (pred n))
  in
  let test l =
    let p l =  List.iter (Printf.printf "%d ") l in
   (*   p l ;print_newline (); *)
    let l1 = delete_double_unsort_list ~last:true l
    and l2 = delete_double_unsort ~last:true l
    and l3 = ExtLib.List.unique l
    in
 (*   p l1;print_newline ();
    p l2;print_newline ();
    p l3;print_newline (); *)
    (l1 = l2 && l1 = l3)
  in
    while (test (new_list n)) do (); done
;;
verif (int_of_string Sys.argv.(1));;
*)

___________________________________________________________________________ 
Appel audio GRATUIT partout dans le monde avec le nouveau Yahoo! Messenger 
Téléchargez cette version sur http://fr.messenger.yahoo.com

Thread: [Ocaml-lib-devel] Extlib.list.unique

ocaml-lib-devel