From: <jd...@us...> - 2008-04-23 17:45:51
|
Revision: 5066 http://matplotlib.svn.sourceforge.net/matplotlib/?rev=5066&view=rev Author: jdh2358 Date: 2008-04-23 10:44:15 -0700 (Wed, 23 Apr 2008) Log Message: ----------- added manuels scatter pie example Modified Paths: -------------- trunk/matplotlib/lib/matplotlib/mlab.py trunk/matplotlib/unit/mlab_unit.py Added Paths: ----------- trunk/matplotlib/examples/scatter_piecharts.py Added: trunk/matplotlib/examples/scatter_piecharts.py =================================================================== --- trunk/matplotlib/examples/scatter_piecharts.py (rev 0) +++ trunk/matplotlib/examples/scatter_piecharts.py 2008-04-23 17:44:15 UTC (rev 5066) @@ -0,0 +1,40 @@ +""" +This example makes custom 'pie charts' as the markers for a scatter plotqu + +Thanks to Manuel Metz for the example +""" +import math +import numpy as np +import matplotlib.pyplot as plt + +# first define the ratios +r1 = 0.2 # 20% +r2 = r1 + 0.4 # 40% + +# define some sizes of the scatter marker +sizes = [60,80,120] + +# calculate the points of the first pie marker +# +# these are just the origin (0,0) + +# some points on a circle cos,sin +x = [0] + np.cos(np.linspace(0, 2*math.pi*r1, 10)).tolist() +y = [0] + np.sin(np.linspace(0, 2*math.pi*r1, 10)).tolist() +xy1 = zip(x,y) + +# ... +x = [0] + np.cos(np.linspace(2*math.pi*r1, 2*math.pi*r2, 10)).tolist() +y = [0] + np.sin(np.linspace(2*math.pi*r1, 2*math.pi*r2, 10)).tolist() +xy2 = zip(x,y) + +x = [0] + np.cos(np.linspace(2*math.pi*r2, 2*math.pi, 10)).tolist() +y = [0] + np.sin(np.linspace(2*math.pi*r2, 2*math.pi, 10)).tolist() +xy3 = zip(x,y) + + +fig = plt.figure() +ax = fig.add_subplot(111) +ax.scatter( np.arange(3), np.arange(3), marker=(xy1,0), s=sizes, facecolor='blue' ) +ax.scatter( np.arange(3), np.arange(3), marker=(xy2,0), s=sizes, facecolor='green' ) +ax.scatter( np.arange(3), np.arange(3), marker=(xy3,0), s=sizes, facecolor='red' ) +plt.show() Modified: trunk/matplotlib/lib/matplotlib/mlab.py =================================================================== --- trunk/matplotlib/lib/matplotlib/mlab.py 2008-04-23 16:54:21 UTC (rev 5065) +++ trunk/matplotlib/lib/matplotlib/mlab.py 2008-04-23 17:44:15 UTC (rev 5066) @@ -87,6 +87,7 @@ import numpy as npy + from matplotlib import nxutils from matplotlib import cbook @@ -2143,10 +2144,10 @@ def csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',', - converterd=None, names=None, missing=None): + converterd=None, names=None, missing='', missingd=None): """ Load data from comma/space/tab delimited file in fname into a - numpy record array and return the record array. + numpy (m)record array and return the record array. If names is None, a header row is required to automatically assign the recarray names. The headers will be lower cased, spaces will @@ -2172,13 +2173,24 @@ names, if not None, is a list of header names. In this case, no header will be read from the file + missingd - is a dictionary mapping munged column names to field values + which signify that the field does not contain actual data and should + be masked, e.g. '0000-00-00' or 'unused' + + missing - a string whose value signals a missing field regardless of + the column it appears in, e.g. 'unused' + if no rows are found, None is returned -- see examples/loadrec.py """ if converterd is None: converterd = dict() + if missingd is None: + missingd = {} + import dateutil.parser + import datetime parsedate = dateutil.parser.parse @@ -2226,14 +2238,28 @@ process_skiprows(reader) - dateparser = dateutil.parser.parse + def ismissing(name, val): + "Should the value val in column name be masked?" - def myfloat(x): - if x==missing: - return npy.nan + if val == missing or val == missingd.get(name) or val == '': + return True else: - return float(x) + return False + def with_default_value(func, default): + def newfunc(name, val): + if ismissing(name, val): + return default + else: + return func(val) + return newfunc + + dateparser = dateutil.parser.parse + mydateparser = with_default_value(dateparser, datetime.date(1,1,1)) + myfloat = with_default_value(float, npy.nan) + myint = with_default_value(int, -1) + mystr = with_default_value(str, '') + def mydate(x): # try and return a date object d = dateparser(x) @@ -2241,16 +2267,16 @@ if d.hour>0 or d.minute>0 or d.second>0: raise ValueError('not a date') return d.date() + mydate = with_default_value(mydate, datetime.date(1,1,1)) - - def get_func(item, func): + def get_func(name, item, func): # promote functions in this order - funcmap = {int:myfloat, myfloat:mydate, mydate:dateparser, dateparser:str} - try: func(item) + funcmap = {myint:myfloat, myfloat:mydate, mydate:mydateparser, mydateparser:mystr} + try: func(name, item) except: - if func==str: + if func==mystr: raise ValueError('Could not find a working conversion function') - else: return get_func(item, funcmap[func]) # recurse + else: return get_func(name, item, funcmap[func]) # recurse else: return func @@ -2266,7 +2292,7 @@ converters = None for i, row in enumerate(reader): if i==0: - converters = [int]*len(row) + converters = [myint]*len(row) if checkrows and i>checkrows: break #print i, len(names), len(row) @@ -2276,10 +2302,10 @@ if func is None: func = converterd.get(name) if func is None: - if not item.strip(): continue + #if not item.strip(): continue func = converters[j] if len(item.strip()): - func = get_func(item, func) + func = get_func(name, item, func) converters[j] = func return converters @@ -2307,7 +2333,7 @@ item = itemd.get(item, item) cnt = seen.get(item, 0) if cnt>0: - names.append(item + '%d'%cnt) + names.append(item + '_%d'%cnt) else: names.append(item) seen[item] = cnt+1 @@ -2327,15 +2353,24 @@ # iterate over the remaining rows and convert the data to date # objects, ints, or floats as approriate rows = [] + rowmasks = [] for i, row in enumerate(reader): if not len(row): continue if row[0].startswith(comments): continue - rows.append([func(val) for func, val in zip(converters, row)]) + rows.append([func(name, val) for func, name, val in zip(converters, names, row)]) + rowmasks.append([ismissing(name, val) for name, val in zip(names, row)]) fh.close() if not len(rows): return None - r = npy.rec.fromrecords(rows, names=names) + if npy.any(rowmasks): + try: from numpy.ma import mrecords + except ImportError: + raise RuntimeError('numpy 1.05 or later is required for masked array support') + else: + r = mrecords.fromrecords(rows, names=names, mask=rowmasks) + else: + r = npy.rec.fromrecords(rows, names=names) return r @@ -2529,26 +2564,59 @@ -def rec2csv(r, fname, delimiter=',', formatd=None): +def rec2csv(r, fname, delimiter=',', formatd=None, missing='', + missingd=None): """ - Save the data from numpy record array r into a comma/space/tab + Save the data from numpy (m)recarray r into a comma/space/tab delimited file. The record array dtype names will be used for column headers. fname - can be a filename or a file handle. Support for gzipped files is automatic, if the filename ends in .gz + + See csv2rec and rec2csv for information about missing and + missingd, which can be used to fill in masked values into your CSV + file. """ + + if missingd is None: + missingd = dict() + + def with_mask(func): + def newfunc(val, mask, mval): + if mask: + return mval + else: + return func(val) + return newfunc + formatd = get_formatd(r, formatd) funcs = [] for i, name in enumerate(r.dtype.names): - funcs.append(csvformat_factory(formatd[name]).tostr) + funcs.append(with_mask(csvformat_factory(formatd[name]).tostr)) fh, opened = cbook.to_filehandle(fname, 'w', return_opened=True) writer = csv.writer(fh, delimiter=delimiter) header = r.dtype.names writer.writerow(header) + + # Our list of specials for missing values + mvals = [] + for name in header: + mvals.append(missingd.get(name, missing)) + + ismasked = False + if len(r): + row = r[0] + ismasked = hasattr(row, '_fieldmask') + for row in r: - writer.writerow([func(val) for func, val in zip(funcs, row)]) + if ismasked: + row, rowmask = row.item(), row._fieldmask.item() + else: + rowmask = [False] * len(row) + writer.writerow([func(val, mask, mval) for func, val, mask, mval + in zip(funcs, row, rowmask, mvals)]) if opened: fh.close() Modified: trunk/matplotlib/unit/mlab_unit.py =================================================================== --- trunk/matplotlib/unit/mlab_unit.py 2008-04-23 16:54:21 UTC (rev 5065) +++ trunk/matplotlib/unit/mlab_unit.py 2008-04-23 17:44:15 UTC (rev 5066) @@ -55,5 +55,27 @@ print 'repr(dt.type)',repr(dt.type) self.failUnless( numpy.all(ra[name] == ra2[name]) ) # should not fail with numpy 1.0.5 + def test_csv2rec_masks(self): + # Make sure masked entries survive roundtrip + + csv = """date,age,weight,name +2007-01-01,12,32.2,"jdh1" +0000-00-00,0,23,"jdh2" +2007-01-03,,32.5,"jdh3" +2007-01-04,12,NaN,"jdh4" +2007-01-05,-1,NULL,""" + missingd = dict(date='0000-00-00', age='-1', weight='NULL') + fh = StringIO.StringIO(csv) + r1 = mlab.csv2rec(fh, missingd=missingd) + fh = StringIO.StringIO() + mlab.rec2csv(r1, fh, missingd=missingd) + fh.seek(0) + r2 = mlab.csv2rec(fh, missingd=missingd) + + self.failUnless( numpy.all( r2['date'].mask == [0,1,0,0,0] )) + self.failUnless( numpy.all( r2['age'].mask == [0,0,1,0,1] )) + self.failUnless( numpy.all( r2['weight'].mask == [0,0,0,0,1] )) + self.failUnless( numpy.all( r2['name'].mask == [0,0,0,0,1] )) + if __name__=='__main__': unittest.main() This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |