From: <sa...@us...> - 2009-09-11 20:58:34
|
Revision: 7746 http://matplotlib.svn.sourceforge.net/matplotlib/?rev=7746&view=rev Author: sameerd Date: 2009-09-11 20:58:27 +0000 (Fri, 11 Sep 2009) Log Message: ----------- Added a recs_join function to join a single column of multiple record arrays Modified Paths: -------------- trunk/matplotlib/lib/matplotlib/cbook.py trunk/matplotlib/lib/matplotlib/mlab.py Modified: trunk/matplotlib/lib/matplotlib/cbook.py =================================================================== --- trunk/matplotlib/lib/matplotlib/cbook.py 2009-09-11 20:48:10 UTC (rev 7745) +++ trunk/matplotlib/lib/matplotlib/cbook.py 2009-09-11 20:58:27 UTC (rev 7746) @@ -1626,7 +1626,48 @@ import matplotlib.mlab as mlab return mlab.quad2cubic(q0x, q0y, q1x, q1y, q2x, q2y) +def align_iterators(func, *iterables): + """ + This generator takes a bunch of iterables that are ordered by func + It sends out ordered tuples (func(row), [rows from all iterators matching func(row)]) + + It is used by mlab.recs_join to join record arrays + """ + class myiter: + def __init__(self, it): + self.it = it + self.key = self.value = None + self.iternext() + def iternext(self): + try: + self.value = self.it.next() + self.key = func(self.value) + except StopIteration: + self.value = self.key = None + + def __call__(self, key): + retval = None + if key == self.key: + retval = self.value + self.iternext() + elif self.key and key > self.key: + raise ValueError, "Iterator has been left behind" + return retval + + # This can be made more efficient by not computing the minimum key for each iteration + iters = [myiter(it) for it in iterables] + minvals = minkey = True + while 1: + minvals = (filter(None, [it.key for it in iters])) + if minvals: + minkey = min(minvals) + yield (minkey, [it(minkey) for it in iters]) + else: + break + + + if __name__=='__main__': assert( allequal([1,1,1]) ) assert(not allequal([1,1,0]) ) Modified: trunk/matplotlib/lib/matplotlib/mlab.py =================================================================== --- trunk/matplotlib/lib/matplotlib/mlab.py 2009-09-11 20:48:10 UTC (rev 7745) +++ trunk/matplotlib/lib/matplotlib/mlab.py 2009-09-11 20:58:27 UTC (rev 7746) @@ -91,6 +91,9 @@ :meth:`rec_join` join two record arrays on sequence of fields +:meth:`recs_join` + a simple join of multiple recarrays using a single column as a key + :meth:`rec_groupby` summarize data by groups (similar to SQL GROUP BY) @@ -139,7 +142,7 @@ """ from __future__ import division -import csv, warnings, copy, os +import csv, warnings, copy, os, operator import numpy as np ma = np.ma @@ -1880,7 +1883,29 @@ return newrec +def recs_join(key, name, recs,missing=0.): + """ + *key* is the column name that acts as a key + *name* is the name that we want to join + *missing" is what the missing fields are replaced by + *recarrays* is a list of record arrays to join + returns a record array with columns [rowkey, name1, name2, ... namen] + + >>> r = recs_join("date", "close", recs=[r0, r1], missing=0.) + + """ + results = [] + def extract(r): + if r is None: return missing + else: return r[name] + + for rowkey, row in cbook.align_iterators(operator.attrgetter(key), *[iter(r) for r in recs]): + results.append([rowkey] + map(extract, row)) + names = ",".join([key] + ["%s%d" % (name, d) for d in range(len(recs))]) + return np.rec.fromrecords(results, names=names) + + def csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',', converterd=None, names=None, missing='', missingd=None, use_mrecords=False): This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |