From: Amit D. <ami...@gm...> - 2005-10-22 16:09:32
|
In principle, I like the idea. I don't know if I like the implementation, though. It seems a bit too complex, and the variable names are difficult to decipher (e.g. "ac" means "accu/accumulator", but it isn't clear what tcn means; if you want to use short names, might as well use "x", "y", etc.) Maybe this is going too far, but could you overload the O(n^2) implementation with the faster one, for cases where you don't have an ordering over elements? i.e. something like this: let unique ?(last=3Dfalse) ?(cmp=3DNone) l =3D match cmp with | None -> (* old unique *) | Some c -> let cmp_node x y =3D c (fst x) (fst y) in let cmp_idx x y =3D Pervasives.compare (snd x) (snd y) in (* Sorted order, with an index for the original ordering *) let l' =3D stable_sort cmp_node (mapi (fun i x -> (x,i)) l) in let rec remove_dup accu =3D function | [] -> accu | x::xs -> match accu with | [] -> remove_dup [x] xs | (y::ys) as yl -> if (cmp_node x y) =3D 0 then if last then remove_dup (x::ys) xs else remove_dup yl xs else remove_dup (x::yl) xs in (* Remove duplicates, restore original order, and remove the original index. *) map fst (stable_sort cmp_idx (remove_dup [] l')) -Amit On 10/21/05, sejourne_kevin <sej...@ya...> wrote: > > Hello evrybody. > > Thanks for that lib I use evryday. > > But, I notice that ExtList.unique is in O(n^2) this is a problem for me, > I often use list over 100000 elements(and some time over million). > So I wrote a replacement function that is faster O(n+n*log(n)). > It use about 5X the space of the parameter list. On my computer (512 Mo) > the limit before the stak overflow is just over a million. > > > let delete_double_unsort ?(last=3Dfalse) ?(cmp=3DPervasives.compare) l = =3D > let lg =3D List.length l in > let tcn =3D Array.make lg (List.hd l,0) in > let rec add_num n =3D function > [] -> () > | h :: t -> tcn.(n) <- (h,n) ; (add_num (succ n) t) > in > let () =3D add_num 0 l in > let compare x y =3D cmp (fst x) (fst y) in > let tcn' =3D Array.copy tcn in > let () =3D Array.stable_sort compare tcn in > let plg =3D lg -1 in > let rec get_dbl ac n =3D > if n =3D=3D plg then ac > else > let n' =3D n+1 in > let h1 =3D tcn.(n) in > let h2 =3D tcn.(n') in > if (cmp (fst h1) (fst h2)) =3D=3D 0 > then get_dbl ((snd (if last then h1 else h2))::ac) n' > else get_dbl ac n' > in > let dbl =3D get_dbl [] 0 in > let sdbl =3D List.sort Pervasives.compare dbl in > let rec delete n =3D function > [] -> > let ac =3D ref [] in > for i =3D n to lg -1 do > ac :=3D (fst tcn'.(i)) :: !ac > done; > List.rev !ac > | na::ta when na =3D=3D (snd tcn'.(n)) -> delete (n+1) ta > | l -> (fst tcn'.(n)) :: (delete (n+1) l) > in delete 0 sdbl > ;; > > As you can see this function support a additionnal option that let you > select if you want delete the first or the last occurence of a term. > For the "equivalence" with ExtList.unique set "~last:true". > > > for 100000 elements : > Extlib.list.unique : 165.850 seconds > this function : 0.380 seconds > for 1000000 elements : > Extlib.list.unique : stop by error after 2 hours... > this function : 9.600 seconds > > > > Even if you don't want to include it in the ExtLib I would be glade if > someone know how to improve it. > > > > Here few test code: > (* > let bench n p =3D > Random.self_init (); > let rec new_list n =3D > if n =3D=3D 0 then [] else (Random.int <http://Random.int> 100000)::(new_= list > (pred n)) > in > let a_bench n =3D > let l =3D new_list n in Gc.compact(); > (*let t1 =3D Sys.time() in ignore (delete_double_unsort_list l);*) > (*let t1 =3D Sys.time() in ignore (delete_double l);*) > let t1 =3D Sys.time() in ignore ();(*(ExtLib.List.unique l);*) > let t2 =3D Sys.time() in Gc.compact (); > (*let t3 =3D Sys.time() in ignore (ExtLib.List.unique l);*) > let t3 =3D Sys.time() in ignore (delete_double_unsort l); > let t4 =3D Sys.time() in > (t2-.t1,t4-.t3) > in > let rec make_bench r n p =3D > if p =3D=3D 0 then r else make_bench ((a_bench n)::r) n (pred p) > in > let somme r =3D > let a,b =3D List.split r in > List.fold_left (+.) 0. a, List.fold_left (+.) 0. b > in > let t1,t2 =3D somme (make_bench [] n p) in > Printf.printf "\n[%.3f] [%.3f]\n" t1 t2 > ;; > bench (int_of_string Sys.argv.(1)) (int_of_string Sys.argv.(2)) > *) > (* > let verif n =3D > Random.self_init(); > let rec new_list n =3D > if n =3D=3D 0 then [] else ( Random.int <http://Random.int> 10)::(new_lis= t > (pred n)) > in > let test l =3D > let p l =3D List.iter (Printf.printf "%d ") l in > (* p l ;print_newline (); *) > let l1 =3D delete_double_unsort_list ~last:true l > and l2 =3D delete_double_unsort ~last:true l > and l3 =3D ExtLib.List.unique l > in > (* p l1;print_newline (); > p l2;print_newline (); > p l3;print_newline (); *) > (l1 =3D l2 && l1 =3D l3) > in > while (test (new_list n)) do (); done > ;; > verif (int_of_string Sys.argv.(1));; > *) > > > > > > > > _________________________________________________________________________= __ > Appel audio GRATUIT partout dans le monde avec le nouveau Yahoo! Messenge= r > > T=E9l=E9chargez cette version sur http://fr.messenger.yahoo.com > > > > ------------------------------------------------------- > This SF.Net email is sponsored by: > Power Architecture Resource Center: Free content, downloads, discussions, > and more. http://solutions.newsforge.com/ibmarch.tmpl > _______________________________________________ > ocaml-lib-devel mailing list > oca...@li... > https://lists.sourceforge.net/lists/listinfo/ocaml-lib-devel > |