In principle, I like the idea.  I don't know if I like the implementation, though.  It seems a bit too complex, and the variable names are difficult to decipher (e.g. "ac" means "accu/accumulator", but it isn't clear what tcn means; if you want to use short names, might as well use "x", "y", etc.)  Maybe this is going too far, but could you overload the O(n^2) implementation with the faster one, for cases where you don't have an ordering over elements? i.e. something like this:

let unique ?(last=false) ?(cmp=None) l =
    match cmp with
    | None -> (* old unique *)
    | Some c ->
        let cmp_node x y = c (fst x) (fst y) in
        let cmp_idx x y = (snd x) (snd y) in

        (* Sorted order, with an index for the original ordering *)
        let l' = stable_sort cmp_node (mapi (fun i x -> (x,i)) l) in

        let rec remove_dup accu = function
        | []    -> accu
        | x::xs    ->
            match accu with
            | []        ->
                remove_dup [x] xs
            | (y::ys) as yl ->
                if (cmp_node x y) = 0 then
                    if last then
                        remove_dup (x::ys) xs
                        remove_dup yl xs
                    remove_dup (x::yl) xs
            (* Remove duplicates, restore original order,
               and remove the original index. *)
            map fst (stable_sort cmp_idx (remove_dup [] l'))


On 10/21/05, sejourne_kevin <> wrote:
Hello evrybody.

Thanks for that lib I use evryday.

But, I notice that ExtList.unique is in O(n^2) this is a problem for me,
I often use list over 100000 elements(and some time over million).
So I wrote a replacement function that is faster O(n+n*log(n)).
It use about 5X the space of the parameter list. On my computer (512 Mo)
the limit before the stak overflow is just over a million.

let delete_double_unsort ?(last=false) ?( l =
  let lg = List.length l in
      let tcn = Array.make lg (List.hd l,0) in
      let rec add_num n = function
          [] -> ()
        | h :: t -> tcn.(n) <- (h,n) ; (add_num (succ n) t)
      let () = add_num 0 l in
      let compare x y = cmp (fst x) (fst y) in
      let tcn' = Array.copy tcn in
      let () = Array.stable_sort compare tcn in
      let plg = lg -1 in
      let rec get_dbl ac n =
        if n == plg then ac
          let n' = n+1 in
          let h1 = tcn.(n) in
          let h2 = tcn.(n') in
          if (cmp (fst h1) (fst h2)) == 0
          then get_dbl ((snd (if last then h1 else h2))::ac) n'
          else get_dbl ac n'
      let dbl = get_dbl [] 0 in
      let sdbl = List.sort dbl in
      let rec delete n = function
          [] ->
            let ac = ref [] in
            for i = n to lg -1 do
              ac := (fst tcn'.(i)) :: !ac
            List.rev !ac
        | na::ta when na == (snd tcn'.(n)) -> delete (n+1) ta
        | l ->  (fst tcn'.(n)) :: (delete (n+1) l)
      in delete 0 sdbl

As you can see this function support a additionnal option that let you
select if you want delete the first or the last occurence of a term.
For the "equivalence" with ExtList.unique set "~last:true".

for 100000 elements :
Extlib.list.unique : 165.850 seconds
this function : 0.380 seconds
for 1000000 elements :
Extlib.list.unique : stop by error after 2 hours...
this function : 9.600 seconds

Even if you don't want to include it in the ExtLib I would be glade if
someone know how to improve it.

Here few test code:
let bench n p =
   Random.self_init ();
  let rec new_list n =
    if n == 0 then [] else ( 100000)::(new_list (pred n))
  let a_bench n =
    let l  = new_list n in Gc.compact();
    (*let t1 = Sys.time() in  ignore (delete_double_unsort_list l);*)
    (*let t1 = Sys.time() in  ignore (delete_double l);*)
    let t1 = Sys.time() in ignore ();(*(ExtLib.List.unique l);*)
    let t2 = Sys.time() in Gc.compact ();
    (*let t3 = Sys.time() in ignore (ExtLib.List.unique l);*)
    let t3 = Sys.time() in ignore (delete_double_unsort l);
    let t4 = Sys.time() in
  let rec make_bench r n p =
    if p == 0 then r else make_bench ((a_bench n)::r) n (pred p)
  let somme r =
    let a,b = List.split r in
      List.fold_left (+.) 0. a, List.fold_left (+.) 0. b
  let t1,t2 = somme (make_bench [] n p) in
    Printf.printf "\n[%.3f] [%.3f]\n" t1 t2
bench (int_of_string Sys.argv.(1)) (int_of_string Sys.argv.(2))
let verif n =
  let rec new_list n =
    if n == 0 then [] else ( 10)::(new_list (pred n))
  let test l =
    let p l =  List.iter (Printf.printf "%d ") l in
   (*   p l ;print_newline (); *)
    let l1 = delete_double_unsort_list ~last:true l
    and l2 = delete_double_unsort ~last:true l
    and l3 = ExtLib.List.unique l
(*   p l1;print_newline ();
    p l2;print_newline ();
    p l3;print_newline (); *)
    (l1 = l2 && l1 = l3)
    while (test (new_list n)) do (); done
verif (int_of_string Sys.argv.(1));;

Appel audio GRATUIT partout dans le monde avec le nouveau Yahoo! Messenger
TÚlÚchargez cette version sur

This SF.Net email is sponsored by:
Power Architecture Resource Center: Free content, downloads, discussions,
and more.
ocaml-lib-devel mailing list