Thread: [Assorted-commits] SF.net SVN: assorted: [223] scala-commons/trunk/src/commons/Collections. scala
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-01-11 03:59:13
|
Revision: 223 http://assorted.svn.sourceforge.net/assorted/?rev=223&view=rev Author: yangzhang Date: 2008-01-10 19:59:19 -0800 (Thu, 10 Jan 2008) Log Message: ----------- added multimap Modified Paths: -------------- scala-commons/trunk/src/commons/Collections.scala Modified: scala-commons/trunk/src/commons/Collections.scala =================================================================== --- scala-commons/trunk/src/commons/Collections.scala 2008-01-11 03:57:11 UTC (rev 222) +++ scala-commons/trunk/src/commons/Collections.scala 2008-01-11 03:59:19 UTC (rev 223) @@ -453,6 +453,17 @@ (h, took) //(h, sortCounts(h) take n) } + + def multimap[a,b](xs: List[(a,b)]) = { + val h = new mut.HashMap[a, mut.ArrayBuffer[b]] { + override def default(k: a) = { + this(k) = new mut.ArrayBuffer[b] + this(k) + } + } + for ((k,v) <- xs) h(k) += v + h + } //def merge[a](in: Iterator[Iterator[a]]) = { // val iters = new Array(in: _*) // val cur = Array(in map (_ next): _*) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-03 07:41:48
|
Revision: 295 http://assorted.svn.sourceforge.net/assorted/?rev=295&view=rev Author: yangzhang Date: 2008-02-02 23:41:35 -0800 (Sat, 02 Feb 2008) Log Message: ----------- documented a bunch of stuff to prep for scalax Modified Paths: -------------- scala-commons/trunk/src/commons/Collections.scala Modified: scala-commons/trunk/src/commons/Collections.scala =================================================================== --- scala-commons/trunk/src/commons/Collections.scala 2008-02-03 05:24:11 UTC (rev 294) +++ scala-commons/trunk/src/commons/Collections.scala 2008-02-03 07:41:35 UTC (rev 295) @@ -1,111 +1,268 @@ package commons -// TODO organize import scala.collection.{mutable => mut} +// TODO: Is there a way to avoid having to explicitly handle both Iterators and +// Iterables? + +// TODO: Pretty much anywhere I take a number, I'd like to make the facility +// more general, but without (say) incurring the reflection costs associated +// with structural typing. + object Collections { -/* - implicit def iterator2iterable[a](s: Iterator[a]) = Stream.fromIterator(s) - case class II[a](i: Iterator[a]) { - type A = a - def ++[b >: a](o: Iterable[b]) = II(i ++ o) - def /:[b](z: b)(op: (b, a) => b) = i /: b - def :\[b](z: b)(op: (a, b) => b) = i :\ b - def addString (buf: scala.compat.StringBuilder - start: String, sep: String, end: String) = - i addString (buf, start, sep, end) - def concat [B >: A] (that: scala.Iterable[B]) = II(i append that) - def copyToBuffer [B >: A] (dest: scala.collection.mutable.Buffer[B]) : scala.Unit = - i copyToBuffer dest - def drop (n: scala.Int) : scala.Iterable[A] = i drop n - def dropWhile (p: (A) => scala.Boolean) : scala.Iterable[A] = i dropWhile n - abstract def elements : scala.Iterator[A] = - Creates a new iterator over all elements contained in this object. - def exists (p: (A) => scala.Boolean) : scala.Boolean - Apply a predicate p to all elements of this iterable object and return true, iff there is at least one element for which p yields true. - def filter (p: (A) => scala.Boolean) : scala.Iterable[A] - Returns all the elements of this iterable that satisfy the predicate p. The order of the elements is preserved. - def find (p: (A) => scala.Boolean) : scala.Option[A] - Find and return the first element of the iterable object satisfying a predicate, if any. - def findIndexOf (p: (A) => scala.Boolean) : scala.Int - Returns index of the first element satisying a predicate, or -1. - def flatMap [B] (f: (A) => scala.Iterable[B]) : scala.Iterable[B] - Applies the given function f to each element of this iterable, then concatenates the results. - def foldLeft [B] (z: B)(op: (B, A) => B) : B - Combines the elements of this iterable object together using the binary function f, from left to right, and starting with the value z. - def foldRight [B] (z: B)(op: (A, B) => B) : B - Combines the elements of this list together using the binary function f, from right to left, and starting with the value z. - def forall (p: (A) => scala.Boolean) : scala.Boolean - Apply a predicate p to all elements of this iterable object and return true, iff the predicate yields true for all elements. - def foreach (f: (A) => scala.Unit) : scala.Unit - Apply a function f to all elements of this iterable object. - def indexOf [B >: A] (elem: B) : scala.Int - Returns the index of the first occurence of the specified object in this iterable object. - def map [B] (f: (A) => B) : scala.Iterable[B] - Returns the iterable resulting from applying the given function f to each element of this iterable. - def mkString (sep: java.lang.String) : java.lang.String - Returns a string representation of this iterable object. The string representations of elements (w.r.t. the method toString()) are separated by the string sep. - def mkString (start: java.lang.String, sep: java.lang.String, end: java.lang.String) : java.lang.String - Returns a string representation of this iterable object. The resulting string begins with the string start and is finished by the string end. Inside, the string representations of elements (w.r.t. the method toString()) are separated by the string sep. - - Ex: - List(1, 2, 3).mkString("(", "; ", ")") = "(1; 2; 3)" - def reduceLeft [B >: A] (op: (B, B) => B) : B - Combines the elements of this iterable object together using the binary operator op, from left to right - def reduceRight [B >: A] (op: (B, B) => B) : B - Combines the elements of this iterable object together using the binary operator op, from right to left - def sameElements [B >: A] (that: scala.Iterable[B]) : scala.Boolean - Checks if the other iterable object contains the same elements. - def take (n: scala.Int) : scala.Iterable[A] - Returns an iterable consisting only over the first n elements of this iterable, or else the whole iterable, if it has less than n elements. - def takeWhile (p: (A) => scala.Boolean) : scala.Iterable[A] - Returns the longest prefix of this iterable whose elements satisfy the predicate p. - def toList : scala.List[A] - } -*/ - def split(x: String) = x.trim.split("\\s+") - // TODO how to make the following redundant? (so that views can chain) + // TODO: Make faster. + /** + * Split a String into words. Use this when possible, since its + * implementation may become more efficient than the regex-based + * String.split() in the future. + */ + def words(x: String) = x.trim.split("\\s+") + + // + // Views + // + + // TODO: Is there a way to "chain" views? Then Iterable2FilterMap is + // unnecessary and can be removed. implicit def Iterable2FilterMap[T](s: Iterable[T]) = new FilterMap(s.elements) implicit def Iterable2Iterator[T](s: Iterable[T]) = s.elements implicit def Iterator2FilterMap[T](s: Iterator[T]) = new FilterMap(s) + class FilterMap[T](s: Iterator[T]) { + /** + * Given the partial function f, filter out elements over which f is + * undefined, and map the remaining elements with f. Note that this + * evaluates f twice for each element. + */ def filterMap[B](f: PartialFunction[T,B]) = s filter (f.isDefinedAt) map f + /** + * Given the function f, filter out elements for which f returns None, and + * map the remaining elements with f, extracting the value from the Some. + * This evaluates f once for each element. + */ def filterMapOption[B](f: Function[T,Option[B]]) = - s map f filter (None !=) + s map f filter (None !=) map (_.get) } + implicit def Iterator2XIterator[T](s: Iterator[T]) = new XIterator(s) + // TODO: Following doesn't compile due to recursive call. How? + // implicit def XIterator2Iterator[T](x: XIterator[T]) = x.i + class XIterator[a](i: Iterator[a]) { + /** + * Iterate over the iterator and return the last element. + */ def last = { var last = i.next for (val x <- i) { last = x } last } } + implicit def pair2xpair(p: (Int,Int)) = new XPair(p._1,p._2) implicit def xpair2pair(p: XPair) = (p.x,p.y) + case class XPair(x: Int, y: Int) { + /** + * Add a pair of Ints. + */ def +(p: (Int, Int)) = Pair(x+p.x, y+p.y) } + + implicit def MapToRichMap[k,v](m: Map[k,v]) = new RichMap(m) + implicit def RichMapToMap[k,v](m: RichMap[k,v]) = m.m + + class RichMap[k,v](val m: Map[k,v]) { + def toArray = { + val a = new Array[(k,v)](m size) + m copyToArray (a,0) + a + } + } + + implicit def StreamToRichStream[a](s: Stream[a]) = new RichStream(s) + implicit def RichStreamToStream[a](s: RichStream[a]) = s.s + + class RichStream[a](val s: Stream[a]) { + // TODO: Fix this; it's dangerously inefficient and could cause a stack + // overflow (see StreamRecursion test in sandbox)! + // TODO: Rename; confusing enough with groupBy != Haskell's groupBy, which + // is in turn == groupByPairwise. + /** + * This splits up the stream by using spanBy + * + * <code>[1..9].groupBy(_/3) == [[1,2],[3,4,5],[6,7,8]]</code> + */ + def groupBy[b](f: a => b) = { + def r(s: Stream[a]): Stream[Stream[a]] = s match { + case Stream.cons(h,t) => { + val cur = f(h) + // TODO: The next line induces a bug in Scala. + // val (x,y): (Seq[a],Seq[a]) = spanBy(s)(f(_) == cur) + val (x,y) = spanBy(s)(f(_) == cur) + Stream.cons( + Stream fromIterator x.elements, + r(Stream fromIterator y.elements) + ) + } + case Stream.empty => Stream.empty + } + r(s) + } + } + + // TODO: Perhaps this belongs in a different module. + + // + // Simple statistics + // + + import scala.util.Sorting + def sum(xs: Iterator[Int]) = xs.foldLeft(0)(_+_) + def sum(xs: Seq[Double]) = xs reduceLeft ((x:Double,y:Double)=>x+y) def mean(xs: Seq[Int]) = sum(xs.elements) / xs.size + def median(xs: Seq[Long]) = xs(xs.length / 2) + + /** + * Destructively sort the array, and returns a tuple of the (mean, median, + * standard deviation, variance, minimum, and maximum) of a list. + */ + def sortStats(xs: Array[Double]) = { + Sorting quickSort xs + val (mean, variance) = meanAndVariance(xs) + val (min, median, max) = (xs(xs.length / 2), xs(0), xs.last) + val sdev = Math sqrt variance + (mean, median, sdev, variance, min, max) + } + + /** + * Return a tuple of the mean and variance of the given sequence. + */ + def meanAndVariance(xs: Seq[Double]) = { + val mean = sum(xs) / xs.length + val variance = sum( xs map (x => Math pow (x - mean, 2)) ) / xs.length + (mean, variance) + } + + // TODO: Can I write functions that are generic over n-tuples? + /** + * Returns the two smallest elements. + */ + def mins(xs: Iterable[Long]) = { + var min1 = java.lang.Long.MAX_VALUE // smallest + var min2 = java.lang.Long.MAX_VALUE // second smallest + for (x <- xs) { + if (x < min1) { min2 = min1; min1 = x } + else if (x > min1 && x < min2) { min2 = x } + } + (min1,min2) + } + + // TODO: Rename. + /** + * Destructively sort the array by the tuples' second elements, and return + * the array. + */ + def sortCounts[a](xs: Array[(a,Int)]) = { + Sorting.stableSort(xs, (p: (a,Int), q: (a,Int)) => p._2 > q._2) + xs + } + + // TODO: Rename. + /** + * Return an array of the mappings in the Map, sorted by their entry-values. + */ + def sortCounts[a](h: mut.Map[a,Int]): Seq[(a,Int)] = + sortCounts(h toArray) + + /** + * Given an iterator, return a HashMap from each distinct element to its + * count. + */ + def hist[a](xs: Iterator[a]) = { + val h = new mut.HashMap[a,Int] { override def default(k: a) = 0 } + for (x <- xs) { h(x) = h(x) + 1 } + h + } + + /** + * Return a list of the most popular elements along with their histogram + * counts. + */ + def topHist[a](xs: Iterator[a], n: Int): (mut.HashMap[a,Int], Seq[(a,Int)]) = { + val h = hist(xs) + val sorted = sortCounts(h) + val took = take(sorted, n) + (h, took) + } + + /** + * Return a list of the most popular element. An arbitrary element may be + * returned in case of ties. + */ + def mostPopular[a](xs: Iterator[a]) = topHist(xs, 1)._2 toList 0 _1 + + // + // Logic + // + + /** + * Negat a predicate function. + */ def not[a](pred: a => Boolean)(x: a) = !pred(x) - // TODO make the following into rich iterator methods? - // 3 [a,b,c,d,e] -> ([a,b,c],[d,e]) - def span[a](n: Int)(xs: Seq[a]) = { - (xs take n, xs drop n) - } - def spanstr[a](n: Int)(xs: String) = { + + /** + * Same as <code>xs forall (x => x)</code> + */ + def all(xs: Seq[Boolean]) = xs forall (x => x) + + // + // Sequences + // + + /** + * A combination of take and drop. + * + * <code>span(3)([a,b,c,d,e]) == ([a,b,c],[d,e])</code> + */ + def span[a](n: Int)(xs: Seq[a]) = (xs take n, xs drop n) + + /** + * Same as span but for Strings, which are treated as an array of chars. + * + * <code>spanstr(3)("abcde") == ("abc","de") + */ + def spanstr[a](n: Int)(xs: String) = (xs take n mkString ("","",""), xs drop n mkString ("","","")) - } - // [1,3,5,6,7,8] odd -> ([1,3,5],[6,7,8]) - // [] _ -> ([],[]) + + // TODO where does this go??? + /** + * [1,3,5,6,7,8] odd -> ([1,3,5],[6,7,8]) + * [] _ -> ([],[]) + */ + + /** + * A combination of takeWhile and dropWhile. + * + * <code>spanBy([1,2,3,4,5])((_ != 3)) == ([1,2],[3,4,5])</code> + */ def spanBy[a](xs: Seq[a])(pred: a => Boolean): (Seq[a],Seq[a]) = { val take = xs takeWhile pred val rest = xs dropWhile pred (take, rest) } + + /** + * Split up the sequence by the given predicate. Similar to String.split, but + * matching is per-element. + * + * <code> + * splitBy([1..10], (_ % 4 < 2)) == [[2,3],[6,7],[8,9]] + * </code> + */ def splitBy[a](xs: Seq[a])(pred: a => Boolean): Stream[Seq[a]] = { val consider = xs dropWhile pred if (consider.length == 0) { @@ -115,39 +272,100 @@ Stream.cons(take, splitBy(rest)(pred)) } } - // returns all length-n slices of xs - // [a,b,c,d,e],3 -> [[a,b,c],[b,c,d],[c,d,e]] - def slices[a]( xs: Seq[a], n: Int ) = - 0 to (xs.length - n) map { i => xs slice (i,i+n) } + + // TODO: What's a better name? + /** + * Same as splitBy, but uses the negation of the pred. Note that this is + * quite different from Haskell's groupBy. + * + * <code>groupBy([1,2,3,4,5], (_ % 4 < 2)) == [[1],[4,5],[8,9]]</code> + */ def groupBy[a](xs: Seq[a])(pred: a => Boolean): Stream[Seq[a]] = splitBy(xs)(not(pred)) - // note this is pretty different from the above groupBy/splitBy + + // TODO: these was renamed from slices. Make sure nothing was broken. + /** + * Return all length-n "chunks" of xs as a stream. + * + * <code>chunks([a..h], 3) == [[a,b,c],[d,e,f],[g,h]]</code> + */ + def chunks[a](n: Int)(xs: Seq[a]): Stream[Seq[a]] = { + if (xs.length == 0) Stream.empty + else Stream.cons(xs take n, chunks(n)(xs drop n)) + } + + // TODO Type-check this code; is it actually a projection? + /** + * Return all length-n slices of xs. + * + * <code>slice([a,b,c,d,e], 3) == [[a,b,c],[b,c,d],[c,d,e]]</code> + */ + def slices[a](xs: Seq[a], n: Int) = + 0 to (xs.length - n) map { i => xs slice (i,i+n) } + + /** + * Same as slices(_,2) but yields tuples. + * + * <code> + * pairwise([a,b,c]) == [(a,b),(c,d)] + * pairwise([a]) == [] + * pairwise([]) == [] + * </code> + */ + def pairwise[a](xs: Iterable[a]) = xs.elements zip (xs.elements drop 1) + + /** + * Same as Haskell's groupBy. Groups together equal elements, letting the + * user supply a custom equality test. Note this is quite different from + * groupBy/splitBy. + * + * <code> + * groupPairwiseBy([1..9], (_/3 == _/3)) == [[1,2],[3,4,5],[6,7,8],[9]]] + * </code> + */ def groupPairwiseBy[a](xs: Seq[a])(pred: (a,a) => Boolean): List[List[a]] = (xs foldLeft List(List[a]())) { (a,y) => a match { case List(List()) => List(List(y)) case (x :: xs) :: os if x == y => (y :: x :: xs) :: os case xs => List(y) :: xs }} -/* - (xs foldLeft List(List())) { (xs,y) => { - if (xs.head == Nil || y == xs.head.head) (y :: xs.head) :: xs.tail - else List(y) :: xs - }} -*/ + + /** + * @deprecated Use <code>Iterable mkString ""</code> + */ + def str(xs: Iterable[Char]) = xs mkString "" + + /** + * Return an infinite stream, where each element evaluates gen. + */ def repeat[a](gen: => a): Stream[a] = Stream.cons(gen, repeat(gen)) - def str(xs: Iterable[Char]) = xs mkString ("","","") + + /** + * Return a stream of length at least n whose first elements are from the + * given iterator, but if the iterator has fewer than n elements, then the + * remaining elements are repeat(gen). + */ def pad[a](n: Int, e: => a, s: Iterator[a]) = Stream.concat(Stream.fromIterator(s), repeat(e)) take n + + /** + * Truncate elements off the end of the sequence that satisfy the given + * predicate. A reverse dropWhile. + * + * <code>truncateWhile([1,2,3,4,5], (_ != 3)) == [1,2,3]</code> + */ def truncateWhile[a](seq: Seq[a])(pred: a => Boolean) = (seq.reverse dropWhile pred).reverse + + /** + * Find the index of the last element in the sequence that satisfies the + * given predicate. + */ def lastIndexWhere[a](seq: Seq[a])(pred: a => Boolean) = { val i = seq.reverse findIndexOf pred if (i < 0) i else seq.length - i - 1 } - def slices[a](n: Int)(xs: Seq[a]): Stream[Seq[a]] = { - if (xs.length == 0) Stream.empty - else Stream.cons(xs take n, slices(n)(xs drop n)) - } + def argmax(xs: Seq[Int]) = xs.elements.zipWithIndex reduceLeft { (a:(Int,Int),b:(Int,Int)) => (a,b) match { @@ -160,51 +378,31 @@ case ((x,i),(y,j)) => if (x < y) a else b } } - implicit def MapToRichMap[k,v](m: Map[k,v]) = new RichMap(m) - implicit def RichMapToMap[k,v](m: RichMap[k,v]) = m.m - class RichMap[k,v](val m: Map[k,v]) { - def toArray = { - val a = new Array[(k,v)](m size) - m copyToArray (a,0) - a - } - } - implicit def StreamToRichStream[a](s: Stream[a]) = new RichStream(s) - implicit def RichStreamToStream[a](s: RichStream[a]) = s.s - class RichStream[a](val s: Stream[a]) { - // TODO fix this; it's dangerously inefficient and could cause a stack - // overflow (see StreamRecursion sandbox test)! - def groupBy[b](f: a => b) = { - def r(s: Stream[a]): Stream[Stream[a]] = s match { - case Stream.cons(h,t) => { - val cur = f(h) - // SCALABUG val (x,y): (Seq[a],Seq[a]) = spanBy(s)(f(_) == cur) - val (x,y) = spanBy(s)(f(_) == cur) - Stream.cons( - Stream fromIterator x.elements, - r(Stream fromIterator y.elements) - ) - } - case Stream.empty => Stream.empty - } - r(s) - } - } + + /** + * Note that this operates by duplicating the iterator and traversing each + * copy. + */ def unzip[a,b](pairs: Iterator[(a,b)]): (Iterator[a],Iterator[b]) = { val (i,j) = pairs.duplicate (i map (_._1), j map (_._2)) } - //unzip( Stream fromIterator pairs ) + def unzip[a,b](pairs: Stream[(a,b)]): (Stream[a],Stream[b]) = (pairs map (_._1), pairs map (_._2)) - // 0 1 2 3 4 5 6 7 8 9 - // [a,b,c,c,c,d,d,e,f,f] -> [[0],[1],[2,3,4],[5,6],[7],[8,9]] - // [] -> [] + + /** + * Indexes the result of groupBy. + * + * 0 1 2 3 4 5 6 7 8 9 + * [a,b,c,c,c,d,d,e,f,f] -> [[0],[1],[2,3,4],[5,6],[7],[8,9]] + * [] -> [] + */ def indexGroups[a,b](xs: Seq[a])(f: a => b) = { val i = Iterator from 0 Stream fromIterator xs.elements groupBy f map (_ map (x => i.next)) } - def all(xs: Seq[Boolean]) = xs forall (x => x) + def zipx[a](xss: Seq[Iterable[a]]): Stream[Seq[a]] = { val is = xss map (_.elements) def rec: Stream[Seq[a]] = @@ -219,14 +417,7 @@ yield xss.elements map (_(i)) List(iters: _*) } - def hist[a](xs: Iterator[a]) = { - val h = new mut.HashMap[a,Int] { override def default(k: a) = 0 } - for (x <- xs) { h(x) = h(x) + 1 } - h - } -// def zipx[a](xs: Iterator[a]) = { -// val xs -// } + def coord[a](i: Int, xss: Iterable[Seq[a]]): (Int, Int) = { var x = i var y = 0 @@ -236,41 +427,19 @@ } (x,y) } - def swap[a,b](p: (a,b)) = (p._2, p._1) - //import java.util.{Collection => JCol, Iterator => JIter} - //// Java collections - //implicit def JavaCollectionToList[a>:Null<:Object](xs: JCol[a]): List[a] = - // xs.iterator.toList - //implicit def JavaIteratorToStream[a>:Null<:Object](it: JIter[a]): Stream[a] = { - // it.hasNext match { - // case true => Stream.cons(it.next, JavaIteratorToStream(it)) - // case false => Stream.empty; - // } - //} - //import java.util.{Set => JSet} - //import scala.collection.mutable.Set - //implicit def jsets2sets[a >: Null <: Object](xs: JSet[a]): Set[a] = - // Set(xs: _*) - - //import java.util.{List => JList, ArrayList} - //implicit def SeqToJList[a>:Null<:Object](xs: Seq[a]): JList[a] = { - // val list = new ArrayList[a] - // xs foreach (list.add(_)) - // xs - //} - + /** + * Convenience constructor for converting an Iterator into an ArrayBuffer. + */ def newArrayBuffer[a](xs: Iterator[a]) = { val buf = new mut.ArrayBuffer[a] buf ++= xs buf } - //def newArrayBuffer[a](x: a): ArrayBuffer[a] = newArrayBuffer(List(x)) -/* def iter2richiter[a](i: JIter[a]) = - RichIterator( - case class RichIterator[a](i: JIter[a]) extends Iterator[a] { - def - }*/ + /** + * Swap the elements of a tuple. + */ + def swap[a,b](p: (a,b)) = (p._2, p._1) object Tree { abstract class Tree[a] { @@ -331,6 +500,8 @@ case Not(t) => t flatten f case Leaf(x) => Stream cons (f(x), Stream empty) } + // TODO this looks a candidate for some nicer monadic-style sum + // f returns false if the leaf is to be pruned out, and true otherwise def prune[b](f: a => Option[b]): Option[BoolTree[b]] = this match { case And(ts) => { @@ -356,15 +527,6 @@ case class Not[a](t: BoolTree[a]) extends BoolTree[a] case class Leaf[a](x: a) extends BoolTree[a] - // TODO this looks a candidate for some nicer monadic-style sum - // f returns false if the leaf is to be pruned out, and true otherwise - - // [a,b,c] -> [(a,b),(c,d)] - // [] -> [] - // [a] -> [] - def pairwise[a](xs: Iterable[a]) = xs.elements zip (xs.elements drop 1) - - // uniq def uniq[a](xs: Seq[a]) = groupPairwiseBy(xs)(_==_) map (_.head) // [b,a,c] -> (a,1) @@ -408,7 +570,13 @@ } loop } - // efficient (non-recursive) Iterator.dropWhile + + // Performance work-arounds. + + /** + * The standard library Iterator.dropWhile is inefficient and non-scalable, + * as it is recursive. This is an efficient (tail-recursive) implementation. + */ def dropWhile[a](iter: Iterator[a])(p: a => Boolean) = { def loop: Iterator[a] = { if (iter hasNext) { @@ -421,39 +589,16 @@ } loop } - // find the two smallest numbers - def mins(xs: Iterable[Long]) = { - var min1 = java.lang.Long.MAX_VALUE // smallest - var min2 = java.lang.Long.MAX_VALUE // second smallest - for (x <- xs) { - if (x < min1) { min2 = min1; min1 = x } - else if (x > min1 && x < min2) { min2 = x } - } - (min1,min2) - } - import scala.util.Sorting - def sortCounts[a](xs: Array[(a,Int)]) = { - Sorting.stableSort(xs, (p: (a,Int), q: (a,Int)) => p._2 > q._2) - xs - } - def sortCounts[a](h: mut.Map[a,Int]): Seq[(a,Int)] = - sortCounts(h toArray) - // TODO this is a + /** + * Take an elements from a list. + */ def take[a](src: Seq[a], n: Int) = { val count = n min src.length val dst = new Array[a](count) src.elements take count copyToArray (dst, 0) dst } - def mostPopular[a](xs: Iterator[a]) = topHist(xs, 1)._2 toList 0 _1 - def topHist[a](xs: Iterator[a], n: Int): (mut.HashMap[a,Int], Seq[(a,Int)]) = { - val h = hist(xs) - val sorted = sortCounts(h) - val took = take(sorted, n) - (h, took) - //(h, sortCounts(h) take n) - } def multimap[a,b](xs: List[(a,b)]) = { val h = new mut.HashMap[a, mut.ArrayBuffer[b]] { @@ -465,72 +610,5 @@ for ((k,v) <- xs) h(k) += v h } - //def merge[a](in: Iterator[Iterator[a]]) = { - // val iters = new Array(in: _*) - // val cur = Array(in map (_ next): _*) - // def r: Stream[a] = { - // if (iters.length == 0) Stream.empty - // else { - // val minindex, minvalue = argmin(cur) - // cur(minindex) = iters(minindex).next - // Stream.cons( - // } - // } - //} -// def r(xs: Stream[a], last: a): Stream[a] = { -// if (!xs.hasNext) Stream.empty -// else { -// val x = xs.next -// } -// else Stream.cons( -// } -// if (xs hasNext) { -// val out = new ArrayBuffer[a] -// var last = null -// for (x <- xs) { out += x } -// out -// } else { -// xs -// } -// } - -// class Lazy[a](x: => a) { -// var y: Option[a] = None -// def get = { if (y isEmpty) y = Some(x); y.get } -// } -// def lazy[a](x: => a) = new Lazy(x) -// implicit def force[a](lz: Lazy[a]) = lz.get def iterator2array[a](xs: Iterator[a]) = xs.toList.toArray - def sum(xs: Seq[Double]) = xs reduceLeft ((x:Double,y:Double)=>x+y) - def median(xs: Seq[Long]) = xs(xs.length / 2) - def sortStats(xs: Array[Double]) = { - Sorting quickSort xs - val (mean, variance) = meanAndVariance(xs) - val (min, median, max) = (xs(xs.length / 2), xs(0), xs.last) - val sdev = Math sqrt variance - (mean, median, sdev, variance, min, max) - } - def meanAndVariance(xs: Seq[Double]) = { - val mean = sum(xs) / xs.length - val variance = sum( xs map (x => Math pow (x - mean, 2)) ) / xs.length - (mean, variance) - } - -// class ImmutableTable[a](initRows: Seq[Seq[a]]) { -// val rows = initRows toArray // TODO make this immutable -// def cols = unzipx(rows) -// def toString = { -// val widths = for (col <- cols) yield Iterable max col -// val lines = -// for (row <- rows) yield { -// for (cell <- row) yield { -// pad asdf -// } -// } -// val s = "" -// } -// println(rows) -// } } - -//import Collections._ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-03 08:39:44
|
Revision: 297 http://assorted.svn.sourceforge.net/assorted/?rev=297&view=rev Author: yangzhang Date: 2008-02-03 00:39:46 -0800 (Sun, 03 Feb 2008) Log Message: ----------- fixed paragraphs Modified Paths: -------------- scala-commons/trunk/src/commons/Collections.scala Modified: scala-commons/trunk/src/commons/Collections.scala =================================================================== --- scala-commons/trunk/src/commons/Collections.scala 2008-02-03 07:41:48 UTC (rev 296) +++ scala-commons/trunk/src/commons/Collections.scala 2008-02-03 08:39:46 UTC (rev 297) @@ -92,7 +92,7 @@ // is in turn == groupByPairwise. /** * This splits up the stream by using spanBy - * + * <p> * <code>[1..9].groupBy(_/3) == [[1,2],[3,4,5],[6,7,8]]</code> */ def groupBy[b](f: a => b) = { @@ -225,28 +225,22 @@ /** * A combination of take and drop. - * + * <p> * <code>span(3)([a,b,c,d,e]) == ([a,b,c],[d,e])</code> */ def span[a](n: Int)(xs: Seq[a]) = (xs take n, xs drop n) /** * Same as span but for Strings, which are treated as an array of chars. - * + * <p> * <code>spanstr(3)("abcde") == ("abc","de") */ def spanstr[a](n: Int)(xs: String) = (xs take n mkString ("","",""), xs drop n mkString ("","","")) - // TODO where does this go??? /** - * [1,3,5,6,7,8] odd -> ([1,3,5],[6,7,8]) - * [] _ -> ([],[]) - */ - - /** * A combination of takeWhile and dropWhile. - * + * <p> * <code>spanBy([1,2,3,4,5])((_ != 3)) == ([1,2],[3,4,5])</code> */ def spanBy[a](xs: Seq[a])(pred: a => Boolean): (Seq[a],Seq[a]) = { @@ -258,7 +252,7 @@ /** * Split up the sequence by the given predicate. Similar to String.split, but * matching is per-element. - * + * <p> * <code> * splitBy([1..10], (_ % 4 < 2)) == [[2,3],[6,7],[8,9]] * </code> @@ -277,7 +271,7 @@ /** * Same as splitBy, but uses the negation of the pred. Note that this is * quite different from Haskell's groupBy. - * + * <p> * <code>groupBy([1,2,3,4,5], (_ % 4 < 2)) == [[1],[4,5],[8,9]]</code> */ def groupBy[a](xs: Seq[a])(pred: a => Boolean): Stream[Seq[a]] = @@ -286,7 +280,7 @@ // TODO: these was renamed from slices. Make sure nothing was broken. /** * Return all length-n "chunks" of xs as a stream. - * + * <p> * <code>chunks([a..h], 3) == [[a,b,c],[d,e,f],[g,h]]</code> */ def chunks[a](n: Int)(xs: Seq[a]): Stream[Seq[a]] = { @@ -297,7 +291,7 @@ // TODO Type-check this code; is it actually a projection? /** * Return all length-n slices of xs. - * + * <p> * <code>slice([a,b,c,d,e], 3) == [[a,b,c],[b,c,d],[c,d,e]]</code> */ def slices[a](xs: Seq[a], n: Int) = @@ -305,7 +299,7 @@ /** * Same as slices(_,2) but yields tuples. - * + * <p> * <code> * pairwise([a,b,c]) == [(a,b),(c,d)] * pairwise([a]) == [] @@ -318,7 +312,7 @@ * Same as Haskell's groupBy. Groups together equal elements, letting the * user supply a custom equality test. Note this is quite different from * groupBy/splitBy. - * + * <p> * <code> * groupPairwiseBy([1..9], (_/3 == _/3)) == [[1,2],[3,4,5],[6,7,8],[9]]] * </code> @@ -351,7 +345,7 @@ /** * Truncate elements off the end of the sequence that satisfy the given * predicate. A reverse dropWhile. - * + * <p> * <code>truncateWhile([1,2,3,4,5], (_ != 3)) == [1,2,3]</code> */ def truncateWhile[a](seq: Seq[a])(pred: a => Boolean) = @@ -393,7 +387,7 @@ /** * Indexes the result of groupBy. - * + * <p> * 0 1 2 3 4 5 6 7 8 9 * [a,b,c,c,c,d,d,e,f,f] -> [[0],[1],[2,3,4],[5,6],[7],[8,9]] * [] -> [] @@ -611,4 +605,10 @@ h } def iterator2array[a](xs: Iterator[a]) = xs.toList.toArray + + // TODO where does this go??? + // + // [1,3,5,6,7,8] odd -> ([1,3,5],[6,7,8]) + // [] _ -> ([],[]) + // } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-10 04:04:40
|
Revision: 348 http://assorted.svn.sourceforge.net/assorted/?rev=348&view=rev Author: yangzhang Date: 2008-02-09 20:04:44 -0800 (Sat, 09 Feb 2008) Log Message: ----------- added bunch of stuff to Collections Modified Paths: -------------- scala-commons/trunk/src/commons/Collections.scala Modified: scala-commons/trunk/src/commons/Collections.scala =================================================================== --- scala-commons/trunk/src/commons/Collections.scala 2008-02-10 03:54:41 UTC (rev 347) +++ scala-commons/trunk/src/commons/Collections.scala 2008-02-10 04:04:44 UTC (rev 348) @@ -330,11 +330,20 @@ def str(xs: Iterable[Char]) = xs mkString "" /** - * Return an infinite stream, where each element evaluates gen. + * Return an infinite stream, where each element is an evaluation of gen. */ def repeat[a](gen: => a): Stream[a] = Stream.cons(gen, repeat(gen)) /** + * Return a stream of n elements, where each element is an evaluation of gen. + * Note that gen may be evaluated more than n times. See: + * + * "How to add laziness to a strict language, without even being odd" + * http://homepages.inf.ed.ac.uk/wadler/papers/lazyinstrict/lazyinstrict.ps + */ + def replicate[a](n: Int, gen: => a): Stream[a] = repeat(gen) take n + + /** * Return a stream of length at least n whose first elements are from the * given iterator, but if the iterator has fewer than n elements, then the * remaining elements are repeat(gen). @@ -388,9 +397,9 @@ /** * Indexes the result of groupBy. * <p> - * 0 1 2 3 4 5 6 7 8 9 - * [a,b,c,c,c,d,d,e,f,f] -> [[0],[1],[2,3,4],[5,6],[7],[8,9]] - * [] -> [] + // 0 1 2 3 4 5 6 7 8 9 + // [a,b,c,c,c,d,d,e,f,f] -> [[0],[1],[2,3,4],[5,6],[7],[8,9]] + // [] -> [] */ def indexGroups[a,b](xs: Seq[a])(f: a => b) = { val i = Iterator from 0 @@ -437,6 +446,9 @@ object Tree { abstract class Tree[a] { + /** + * Show the tree as a string. + */ // Leaf("a") -> // a // @@ -457,9 +469,34 @@ } r(this) mkString "\n" } + /** + * Access a node via the given index path. + */ + def get(path: Seq[Int]): a = (this, path) match { + case (Leaf(x), Seq() ) => x + case (Branch(ts), Seq(x, xs@_*)) => ts(x) get xs + case _ => throw new Exception("invalid path") + } + /** + * Flatten the leaves into a single stream. + */ + def flatten: Stream[a] = this match { + case Branch(ts) => Stream concat (ts map (_.flatten)) + case Leaf(x) => Stream cons (x, Stream empty) + } } case class Branch[a](ts: Seq[Tree[a]]) extends Tree[a] case class Leaf[a](x: a) extends Tree[a] + + /** + * Build a tree whose i-th level branch has a fanout of xs(i). + */ + def treeFromFanouts[a](gen: => a, fanouts: Seq[Int]): Tree[a] = + fanouts match { + case Seq() => Leaf(gen) + case Seq(fanout, rest@_*) => + Branch(replicate(fanout, treeFromFanouts(gen, rest)).toArray) + } } case class TreeNode[a](value: a, children: Seq[TreeNode[a]]) { @@ -611,4 +648,49 @@ // [1,3,5,6,7,8] odd -> ([1,3,5],[6,7,8]) // [] _ -> ([],[]) // + + def camelToLower(s: String, sep: String) = { + val xs = + for (c <- s) + yield if (c.isUpperCase) sep + c.toLowerCase else c + xs mkString "" + } + def camelToUnder(s: String) = camelToLower(s, "_") + def camelToHyphen(s: String) = camelToLower(s, "-") + + // TODO: this isn't really rot encoding + def rot(n: Int, s: String) = s map (_ + n toChar) mkString + + def untilNull[a](f: => a) = new Iterator[a] { + var upcoming = f + override def hasNext = upcoming != null + override def next = { + val emit = upcoming + upcoming = f + emit + } + } + + /** + * Returns the positive (unsigned int) modulo. + */ + def mod(n: Int, m: Int) = { + val r = n % m + if (r < 0) r + m else r + } + + /** + * Returns pairs of elements. + * <p> + * <code> + * pairs([a,b,c,d]) == [(a,b),(c,d)] + * pairs([a,b,c,d,e]) == [(a,b),(c,d)] + * </code> + */ + def pairs[a](xs: Seq[a]): Stream[(a,a)] = { + xs match { + case Seq() => Stream empty + case Seq(a, b, rest @ _*) => Stream cons ((a,b), pairs(rest)) + } + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-15 02:31:29
|
Revision: 430 http://assorted.svn.sourceforge.net/assorted/?rev=430&view=rev Author: yangzhang Date: 2008-02-14 18:31:34 -0800 (Thu, 14 Feb 2008) Log Message: ----------- added: separateHeads, groupByHeaders, multimap, IdMapper, serialize, name-munging, sum/mean overloads Modified Paths: -------------- scala-commons/trunk/src/commons/Collections.scala Modified: scala-commons/trunk/src/commons/Collections.scala =================================================================== --- scala-commons/trunk/src/commons/Collections.scala 2008-02-15 02:30:16 UTC (rev 429) +++ scala-commons/trunk/src/commons/Collections.scala 2008-02-15 02:31:34 UTC (rev 430) @@ -123,7 +123,9 @@ def sum(xs: Iterator[Int]) = xs.foldLeft(0)(_+_) def sum(xs: Seq[Double]) = xs reduceLeft ((x:Double,y:Double)=>x+y) + def sum(xs: Iterator[Double]) = xs reduceLeft ((_:Double)+(_:Double)) def mean(xs: Seq[Int]) = sum(xs.elements) / xs.size + def mean(xs: Seq[Double]) = sum(xs.elements) / xs.size def median(xs: Seq[Long]) = xs(xs.length / 2) /** @@ -657,7 +659,10 @@ } def camelToUnder(s: String) = camelToLower(s, "_") def camelToHyphen(s: String) = camelToLower(s, "-") + def camelToSpaced(s: String) = camelToLower(s, " ") + def spacedToHyphen(s: String) = s replaceAll (" ", "-") + // TODO: this isn't really rot encoding def rot(n: Int, s: String) = s map (_ + n toChar) mkString @@ -693,4 +698,79 @@ case Seq(a, b, rest @ _*) => Stream cons ((a,b), pairs(rest)) } } + + /** + * Maps unique IDs to distinct objects. + */ + class IdMapper[a] extends mut.HashMap[a,Int] { + val i = Iterator from 0 + override def default(k: a) = { + val n = i.next + this(k) = n + n + } + } + + /** + * Serialize an Int to a String of four bytes (little-endian). + */ + def serializeInt(i: Int) = + Array( + (i >>> 0) & 0xff, + (i >>> 8) & 0xff, + (i >>> 16) & 0xff, + (i >>> 24) & 0xff + ).map(_.toChar).mkString + + /** + * For each x in xs, if p(x), then x is a header, and it owns all the xs + * after it until the next x for which p(x) holds. The return value is the + * header and its body. + * <p> + * <code> + * groupByHeaders([1,2,3,4,5,6,7,8,9])(_%3==0) == [[3,4,5],[6,7,8],[9]] + * </code> + */ + def groupByHeaders[a](xs: Seq[a])(p: a => Boolean) = { + val ys = new mut.ArrayBuffer[mut.ArrayBuffer[a]] + for (x <- xs) { + if (p(x)) ys += new mut.ArrayBuffer[a] + if (!ys.isEmpty) ys.last += x + } + ys + } +// def groupByHeaders[a](xs: Seq[a])(p: a => Boolean) = { +// def r(seq: Seq[a]): Stream[List[a]] = seq match { +// case Seq() => Stream.empty +// case Seq(y,ys@_*) => { +// val (as,bs) = spanBy(ys)(not(p)) +// Stream.cons(y :: as.toList, r(bs)) +// } +// } +// r(xs dropWhile (not(p))) +// } + + /** + * <code> + * separateHeads([[a,b,c],[d,e,f,g],[],[h]]) + * == [(a,[b,c]),(d,[e,f,g]),(h,[])] + * </code> + */ + def separateHeads[a](xss: Seq[Seq[a]]) = + for (xs <- xss; (Seq(a),b) = span(1)(xs)) yield (a,b) + + + /** + * Construct a multimap out of the given key-value pairs; the result maps + * keys to sets of values. + */ + def multimap[a,b](xs: Iterable[(a,b)]) = { + val h = new mut.HashMap[a,mut.Set[b]] with mut.MultiMap[a,b] { + override def makeSet = new mut.HashSet[b] + } + for ((k,v) <- xs) { + h add (k,v) + } + h + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-16 21:39:41
|
Revision: 458 http://assorted.svn.sourceforge.net/assorted/?rev=458&view=rev Author: yangzhang Date: 2008-02-16 13:39:45 -0800 (Sat, 16 Feb 2008) Log Message: ----------- renamed one of the multimaps to orderedMultimap Modified Paths: -------------- scala-commons/trunk/src/commons/Collections.scala Modified: scala-commons/trunk/src/commons/Collections.scala =================================================================== --- scala-commons/trunk/src/commons/Collections.scala 2008-02-16 21:38:54 UTC (rev 457) +++ scala-commons/trunk/src/commons/Collections.scala 2008-02-16 21:39:45 UTC (rev 458) @@ -633,7 +633,7 @@ dst } - def multimap[a,b](xs: List[(a,b)]) = { + def orderedMultimap[a,b](xs: List[(a,b)]) = { val h = new mut.HashMap[a, mut.ArrayBuffer[b]] { override def default(k: a) = { this(k) = new mut.ArrayBuffer[b] This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-05-08 19:34:53
|
Revision: 749 http://assorted.svn.sourceforge.net/assorted/?rev=749&view=rev Author: yangzhang Date: 2008-05-08 12:34:26 -0700 (Thu, 08 May 2008) Log Message: ----------- clarified some documentation; fixed splitBy() eagerness bug Modified Paths: -------------- scala-commons/trunk/src/commons/Collections.scala Modified: scala-commons/trunk/src/commons/Collections.scala =================================================================== --- scala-commons/trunk/src/commons/Collections.scala 2008-05-08 19:32:41 UTC (rev 748) +++ scala-commons/trunk/src/commons/Collections.scala 2008-05-08 19:34:26 UTC (rev 749) @@ -317,6 +317,7 @@ /** * Given a list of sublists, return pairs of the first element of each * sublist along with the remaining elements. Empty sublists are ignored. + * Useful in composition with groupByHeaders. * <p> * <code> * separateHeads([[a,b,c],[d,e,f,g],[],[h]]) @@ -399,10 +400,10 @@ * </code> */ def splitBy[a](xs: Seq[a])(pred: a => Boolean): Stream[Seq[a]] = { - val consider = xs dropWhile pred - if (consider.length == 0) { + if (xs.length == 0) { Stream.empty } else { + val consider = xs dropWhile pred val (take, rest) = spanBy(consider)(not(pred)) Stream.cons(take, splitBy(rest)(pred)) } @@ -500,6 +501,8 @@ * predicate. A reverse dropWhile. * <p> * <code>truncateWhile([1,2,3,4,5], (_ != 3)) == [1,2,3]</code> + * <p> + * <code>truncateWhile("!!hello!!", (=='!')) == "!!hello"</code> */ def truncateWhile[a](seq: Seq[a])(pred: a => Boolean) = (seq.reverse dropWhile pred).reverse This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |