From: Janne H. <ja...@hy...> - 2004-12-27 16:07:43
|
I wonder if anyone had a chance to look at this? I'd like to submit this before I embark on other bugfixes on the same module. I'm a little hesitant, since I'm new in ExtLib town. Summary for the patch: fixes the completely broken BitSet.compare. Has been tested relatively well with ExtLib testing suite. ciao, janne Janne Hellsten wrote: > Here is a patch that should fix BitSet.compare bugs. I made a > relatively extensive tester for it and couldn't find any problems. > > The compare now works as if the bitvector was a BigInt. I am guessing > that that has been the original intent as well. > > Perhaps someone can take a look before I commit the changes? > > ciao, > janne > >> Compare seems completely broken to me too, at least if one >> expects it to work like you suggest (which is IMHO the >> correct way). >> It doesn't seem to properly account for the two bitsets >> possibly being of different lengths, it just starts >> comparing at the MSB even if the difference between the >> lengths of the sets is more than a byte. It gets more >> complicated because you can't even use |t1|>|t2| as an >> indication that t1>t2, because there is AFAICT no >> guarantee that the leading bit (or even just one bit in >> the first byte of the representation!) is set. > > > ------------------------------------------------------------------------ > > Index: bitSet.ml > =================================================================== > RCS file: /cvsroot/ocaml-lib/extlib-dev/bitSet.ml,v > retrieving revision 1.12 > diff -u -r1.12 bitSet.ml > --- bitSet.ml 22 Dec 2004 07:59:46 -0000 1.12 > +++ bitSet.ml 22 Dec 2004 21:52:46 -0000 > @@ -109,40 +109,56 @@ > else > false > > -(* we can't use Pervasives.compare because bitsets might be of different > - sizes but are actually the same integer *) > + > +exception Break_int of int > + > +(* Find highest set element or raise Not_found *) > +let find_msb t = > + (* Find highest set bit in a byte. Does not work with zero. *) > + let byte_msb b = > + assert (b <> 0); > + let rec loop n = > + if b land (1 lsl n) = 0 then > + loop (n-1) > + else n in > + loop 7 in > + let n = t.len - 1 > + and buf = t.data in > + try > + for i = n downto 0 do > + let byte = bget buf i in > + if byte <> 0 then raise (Break_int ((i lsl log_int_size)+(byte_msb byte))) > + done; > + raise Not_found > + with > + Break_int n -> n > + | _ -> raise Not_found > + > let compare t1 t2 = > - let size1 = t1.len and size2 = t2.len in > - let size = (if size1 < size2 then size1 else size2) in > - let rec loop2 n = > - if n >= size2 then > - 0 > - else if bget t2.data n <> 0 then > - 1 > - else > - loop2 (n+1) > - in > - let rec loop1 n = > - if n >= size1 then > - 0 > - else if bget t1.data n <> 0 then > - -1 > - else > - loop1 (n+1) > - in > - let rec loop n = > - if n = size then > - (if size1 > size2 then loop1 n else loop2 n) > - else > - let d = bget t2.data n - bget t1.data n in > - if d = 0 then > - loop (n+1) > - else if d < 0 then > - -1 > - else > - 1 > - in > - loop 0 > + let some_msb b = try Some (find_msb b) with Not_found -> None in > + match (some_msb t1, some_msb t2) with > + (None, Some _) -> -1 (* 0-y -> -1 *) > + | (Some _, None) -> 1 (* x-0 -> 1 *) > + | (None, None) -> 0 (* 0-0 -> 0 *) > + | (Some a, Some b) -> (* x-y *) > + if a < b then -1 > + else if a > b then 1 > + else > + begin > + (* MSBs differ, we need to scan arrays until we find a > + difference *) > + let ndx = a lsr log_int_size in > + assert (ndx < t1.len && ndx < t2.len); > + try > + for i = ndx downto 0 do > + let b1 = bget t1.data i > + and b2 = bget t2.data i in > + if b1 <> b2 then raise (Break_int (compare b1 b2)) > + done; > + 0 > + with > + Break_int res -> res > + end > > let equals t1 t2 = > compare t1 t2 = 0 |