SF.net SVN: matplotlib: [4168] trunk/matplotlib

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 4168
          http://matplotlib.svn.sourceforge.net/matplotlib/?rev=4168&view=rev
Author:   jdh2358
Date:     2007-11-08 15:29:46 -0800 (Thu, 08 Nov 2007)

Log Message:
-----------
added recarray utils module

Modified Paths:
--------------
    trunk/matplotlib/API_CHANGES
    trunk/matplotlib/CHANGELOG
    trunk/matplotlib/examples/date_index_formatter.py
    trunk/matplotlib/examples/loadrec.py
    trunk/matplotlib/examples/mathtext_examples.py
    trunk/matplotlib/lib/matplotlib/axes.py
    trunk/matplotlib/lib/matplotlib/cbook.py
    trunk/matplotlib/lib/matplotlib/mlab.py

Modified: trunk/matplotlib/API_CHANGES
===================================================================

--- trunk/matplotlib/API_CHANGES	2007-11-08 23:25:44 UTC (rev 4167)
+++ trunk/matplotlib/API_CHANGES	2007-11-08 23:29:46 UTC (rev 4168)
@@ -1,3 +1,5 @@
+    Moved mlab.csv2rec -> recutils.csv2rec
+
     Added ax kwarg to pyplot.colorbar and Figure.colorbar so that
     one can specify the axes object from which space for the colorbar
     is to be taken, if one does not want to make the colorbar axes

Modified: trunk/matplotlib/CHANGELOG
===================================================================
--- trunk/matplotlib/CHANGELOG	2007-11-08 23:25:44 UTC (rev 4167)
+++ trunk/matplotlib/CHANGELOG	2007-11-08 23:29:46 UTC (rev 4168)
@@ -1,3 +1,6 @@
+2007-11-08 Moved csv2rec to recutils and added other record array
+           utilities - JDH
+
 2007-11-08 If available, use existing pyparsing installation - DSD 
 
 2007-11-07 Removed old enthought.traits from lib/matplotlib, added 

Modified: trunk/matplotlib/examples/date_index_formatter.py
===================================================================
--- trunk/matplotlib/examples/date_index_formatter.py	2007-11-08 23:25:44 UTC (rev 4167)
+++ trunk/matplotlib/examples/date_index_formatter.py	2007-11-08 23:29:46 UTC (rev 4168)
@@ -9,7 +9,7 @@
 """
 
 import numpy
-from matplotlib.mlab import csv2rec
+from matplotlib.recutils import csv2rec
 from pylab import figure, show
 from matplotlib.ticker import Formatter
 

Modified: trunk/matplotlib/examples/loadrec.py
===================================================================
--- trunk/matplotlib/examples/loadrec.py	2007-11-08 23:25:44 UTC (rev 4167)
+++ trunk/matplotlib/examples/loadrec.py	2007-11-08 23:29:46 UTC (rev 4168)
@@ -1,4 +1,4 @@
-from matplotlib.mlab import csv2rec
+from matplotlib.recutils import csv2rec
 from pylab import figure, show
 
 a = csv2rec('data/msft.csv')

Modified: trunk/matplotlib/examples/mathtext_examples.py
===================================================================
--- trunk/matplotlib/examples/mathtext_examples.py	2007-11-08 23:25:44 UTC (rev 4167)
+++ trunk/matplotlib/examples/mathtext_examples.py	2007-11-08 23:29:46 UTC (rev 4168)
@@ -49,7 +49,7 @@
     r'$\widehat{abc}\widetilde{def}$',
     r'$\Gamma \Delta \Theta \Lambda \Xi \Pi \Sigma \Upsilon \Phi \Psi \Omega$',
     r'$\alpha \beta \gamma \delta \epsilon \zeta \eta \theta \iota \lambda \mu \nu \xi \pi \kappa \rho \sigma \tau \upsilon \phi \chi \psi$',
-    ur'Generic symbol: $\u23ce \mathrm{\ue0f2 \U0001D538}$'
+    #ur'Generic symbol: $\u23ce \mathrm{\ue0f2 \U0001D538}$'
     ]
 
 from pylab import *
@@ -63,12 +63,13 @@
     axis([0, 3, -len(tests), 0])
     yticks(arange(len(tests)) * -1)
     for i, s in enumerate(tests):
-        print "%02d: %s" % (i, s)
+        print (i, s)
         text(0.1, -i, s, fontsize=20)
 
-    savefig('mathtext_example')
-    close('all')
-
+    #savefig('mathtext_example')
+    #close('all')
+    show()
+    
 if '--latex' in sys.argv:
     fd = open("mathtext_examples.ltx", "w")
     fd.write("\\documentclass{article}\n")

Modified: trunk/matplotlib/lib/matplotlib/axes.py
===================================================================
--- trunk/matplotlib/lib/matplotlib/axes.py	2007-11-08 23:25:44 UTC (rev 4167)
+++ trunk/matplotlib/lib/matplotlib/axes.py	2007-11-08 23:29:46 UTC (rev 4168)
@@ -4068,6 +4068,8 @@
 
            Optional kwargs control the PatchCollection properties:
         %(PatchCollection)s
+
+        A Collection instance is returned
         """
 
         if not self._hold: self.cla()

Modified: trunk/matplotlib/lib/matplotlib/cbook.py
===================================================================
--- trunk/matplotlib/lib/matplotlib/cbook.py	2007-11-08 23:25:44 UTC (rev 4167)
+++ trunk/matplotlib/lib/matplotlib/cbook.py	2007-11-08 23:29:46 UTC (rev 4168)
@@ -853,6 +853,7 @@
             raise ValueError(_safezip_msg % (Nx, i+1, len(arg)))
     return zip(*args)
 
+
 class MemoryMonitor:
     def __init__(self, nmax=20000):
         self._nmax = nmax
@@ -895,12 +896,14 @@
         x = npy.arange(i0, self._n, isub)
         return x, self._mem[i0:self._n:isub]
 
-    def plot(self, i0=0, isub=1):
-        from pylab import figure, show
-        fig = figure()
+    def plot(self, i0=0, isub=1, fig=None):
+        if fig is None:            
+            from pylab import figure, show
+            fig = figure()
+
         ax = fig.add_subplot(111)
         ax.plot(*self.xy(i0, isub))
-        show()
+        fig.canvas.draw()
 
 
 def print_cycles(objects, outstream=sys.stdout, show_progress=False):

Modified: trunk/matplotlib/lib/matplotlib/mlab.py
===================================================================
--- trunk/matplotlib/lib/matplotlib/mlab.py	2007-11-08 23:25:44 UTC (rev 4167)
+++ trunk/matplotlib/lib/matplotlib/mlab.py	2007-11-08 23:29:46 UTC (rev 4168)
@@ -1257,415 +1257,7 @@
     if unpack: return X.transpose()
     else: return X
 
-def csv2rec(fname, comments='#', skiprows=0, checkrows=5, delimiter=',',
-            converterd=None, names=None, missing=None):
-    """
-    Load data from comma/space/tab delimited file in fname into a
-    numpy record array and return the record array.
 
-    If names is None, a header row is required to automatically assign
-    the recarray names.  The headers will be lower cased, spaces will
-    be converted to underscores, and illegal attribute name characters
-    removed.  If names is not None, it is a sequence of names to use
-    for the column names.  In this case, it is assumed there is no header row.
-
-
-    fname - can be a filename or a file handle.  Support for gzipped
-    files is automatic, if the filename ends in .gz
-
-    comments - the character used to indicate the start of a comment
-    in the file
-
-    skiprows  - is the number of rows from the top to skip
-
-    checkrows - is the number of rows to check to validate the column
-    data type.  When set to zero all rows are validated.
-
-    converterd, if not None, is a dictionary mapping column number or
-    munged column name to a converter function
-
-    names, if not None, is a list of header names.  In this case, no
-    header will be read from the file
-
-    if no rows are found, None is returned See examples/loadrec.py
-    """
-
-    if converterd is None:
-        converterd = dict()
-
-    import dateutil.parser
-    parsedate = dateutil.parser.parse
-
-
-    fh = cbook.to_filehandle(fname)
-
-    
-    class FH:
-        """
-        for space delimited files, we want different behavior than
-        comma or tab.  Generally, we want multiple spaces to be
-        treated as a single separator, whereas with comma and tab we
-        want multiple commas to return multiple (empty) fields.  The
-        join/strip trick below effects this
-        """
-        def __init__(self, fh):
-            self.fh = fh
-
-        def close(self):
-            self.fh.close()
-
-        def seek(self, arg):
-            self.fh.seek(arg)
-
-        def fix(self, s):
-            return ' '.join(s.split())
-
-
-        def next(self):
-            return self.fix(self.fh.next())
-
-        def __iter__(self):
-            for line in self.fh:            
-                yield self.fix(line)
-
-    if delimiter==' ':
-        fh = FH(fh)
-
-    reader = csv.reader(fh, delimiter=delimiter)        
-    def process_skiprows(reader):
-        if skiprows:
-            for i, row in enumerate(reader):
-                if i>=(skiprows-1): break
-
-        return fh, reader
-
-    process_skiprows(reader)
-
-
-    def myfloat(x):
-        if x==missing:
-            return npy.nan
-        else:
-            return float(x)
-
-    def get_func(item, func):
-        # promote functions in this order
-        funcmap = {int:myfloat, myfloat:dateutil.parser.parse, dateutil.parser.parse:str}
-        try: func(item)
-        except:
-            if func==str:
-                raise ValueError('Could not find a working conversion function')
-            else: return get_func(item, funcmap[func])    # recurse
-        else: return func
-
-
-    # map column names that clash with builtins -- TODO - extend this list
-    itemd = {
-        'return' : 'return_',
-        'file' : 'file_',
-        'print' : 'print_',
-        }
-        
-    def get_converters(reader):
-
-        converters = None
-        for i, row in enumerate(reader):
-            if i==0:
-                converters = [int]*len(row)
-            if checkrows and i>checkrows:
-                break
-            #print i, len(names), len(row)
-            #print 'converters', zip(converters, row)
-            for j, (name, item) in enumerate(zip(names, row)):
-                func = converterd.get(j)
-                if func is None:
-                    func = converterd.get(name)
-                if func is None:
-                    if not item.strip(): continue
-                    func = converters[j]
-                    if len(item.strip()):
-                        func = get_func(item, func)
-                converters[j] = func
-        return converters
-
-    # Get header and remove invalid characters
-    needheader = names is None
-    if needheader:
-        headers = reader.next()
-        # remove these chars
-        delete = set("""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""")
-        delete.add('"')
-
-        names = []
-        seen = dict()
-        for i, item in enumerate(headers):
-            item = item.strip().lower().replace(' ', '_')
-            item = ''.join([c for c in item if c not in delete])
-            if not len(item):
-                item = 'column%d'%i
-
-            item = itemd.get(item, item)
-            cnt = seen.get(item, 0)
-            if cnt>0:
-                names.append(item + '%d'%cnt)
-            else:
-                names.append(item)
-            seen[item] = cnt+1
-
-    # get the converter functions by inspecting checkrows
-    converters = get_converters(reader)
-    if converters is None:
-        raise ValueError('Could not find any valid data in CSV file')
-
-    # reset the reader and start over
-    fh.seek(0)
-    process_skiprows(reader)
-    if needheader:
-        skipheader = reader.next()
-
-    # iterate over the remaining rows and convert the data to date
-    # objects, ints, or floats as approriate
-    rows = []
-    for i, row in enumerate(reader):
-        if not len(row): continue
-        if row[0].startswith(comments): continue
-        rows.append([func(val) for func, val in zip(converters, row)])
-    fh.close()
-
-    if not len(rows):
-        return None
-    r = npy.rec.fromrecords(rows, names=names)
-    return r
-
-
-def rec2csv(r, fname, delimiter=','):
-    """
-    Save the data from numpy record array r into a comma/space/tab
-    delimited file.  The record array dtype names will be used for
-    column headers.
-
-
-    fname - can be a filename or a file handle.  Support for gzipped
-    files is automatic, if the filename ends in .gz
-    """
-    fh = cbook.to_filehandle(fname, 'w')
-    writer = csv.writer(fh, delimiter=delimiter, quoting=csv.QUOTE_NONNUMERIC)
-    header = r.dtype.names
-    writer.writerow(header)
-    for row in r:
-        writer.writerow(map(str, row))
-    fh.close()
-    
-try:
-    import pyExcelerator as excel
-except ImportError:
-    pass
-else:
-
-    class Format:
-        xlstyle = None
-        def convert(self, x):
-            return x
-
-    class FormatFloat(Format):
-        def __init__(self, precision=4):
-            self.xlstyle = excel.XFStyle()
-            zeros = ''.join(['0']*precision)
-            self.xlstyle.num_format_str = '#,##0.%s;[RED]-#,##0.%s'%(zeros, zeros)
-
-    class FormatInt(Format):
-        convert = int
-        def __init__(self):
-
-            self.xlstyle = excel.XFStyle()
-            self.xlstyle.num_format_str = '#,##;[RED]-#,##'
-
-    class FormatPercent(Format):
-        def __init__(self, precision=4):
-            self.xlstyle = excel.XFStyle()
-            zeros = ''.join(['0']*precision)
-            self.xlstyle.num_format_str = '0.%s%;[RED]-0.%s%'%(zeros, zeros)
-
-    class FormatThousands(FormatFloat):
-        def __init__(self, precision=1):
-            FormatFloat.__init__(self, precision)
-
-        def convert(self, x):
-            return x/1e3
-
-    class FormatMillions(FormatFloat):
-        def __init__(self, precision=1):
-            FormatFloat.__init__(self, precision)
-
-        def convert(self, x):
-            return x/1e6
-
-    class FormatDate(Format):
-        def __init__(self, fmt='%Y-%m-%d'):
-            self.fmt = fmt
-
-        def convert(self, val):
-            return val.strftime(self.fmt)
-
-    class FormatDatetime(Format):
-        def __init__(self, fmt='%Y-%m-%d %H:%M:%S'):
-            self.fmt = fmt
-
-        def convert(self, val):
-            return val.strftime(self.fmt)
-
-    class FormatObject(Format):
-
-        def convert(self, x):
-            return str(x)
-
-    def rec2excel(ws, r, formatd=None, rownum=0):
-        """
-        save record array r to excel pyExcelerator worksheet ws
-        starting at rownum
-
-        formatd is a dictionary mapping dtype name -> Format instances
-        """
-
-        if formatd is None:
-            formatd = dict()
-
-        formats = []
-        for i, name in enumerate(r.dtype.names):
-            dt = r.dtype[name]
-            format = formatd.get(name)
-            if format is None:
-                format = rec2excel.formatd.get(dt.type, FormatObject())
-
-            ws.write(rownum, i, name)
-            formats.append(format)
-
-        rownum+=1
-
-        ind = npy.arange(len(r.dtype.names))
-        for row in r:
-            for i in ind:
-                val = row[i]
-                format = formats[i]
-                val = format.convert(val)
-                if format.xlstyle is None:
-                    ws.write(rownum, i, val)
-                else:
-                    ws.write(rownum, i, val, format.xlstyle)
-            rownum += 1
-    rec2excel.formatd = {
-            npy.int16 : FormatInt(),                
-            npy.int32 : FormatInt(),
-            npy.int64 : FormatInt(),        
-            npy.float32 : FormatFloat(),
-            npy.float64 : FormatFloat(),        
-            npy.object_ : FormatObject(),
-            npy.string_ : Format(),        
-            }
-
-
-
-# some record array helpers
-def rec_append_field(rec, name, arr, dtype=None):
-    'return a new record array with field name populated with data from array arr'
-    arr = npy.asarray(arr)
-    if dtype is None:
-        dtype = arr.dtype
-    newdtype = npy.dtype(rec.dtype.descr + [(name, dtype)])
-    newrec = npy.empty(rec.shape, dtype=newdtype)
-    for field in rec.dtype.fields:
-        newrec[field] = rec[field]
-    newrec[name] = arr
-    return newrec.view(npy.recarray)
-
-  
-def rec_drop_fields(rec, names):
-    'return a new numpy record array with fields in names dropped'    
-
-    names = set(names)
-    Nr = len(rec)
-    
-    newdtype = npy.dtype([(name, rec.dtype[name]) for name in rec.dtype.names
-                       if name not in names])
-
-    newrec = npy.empty(Nr, dtype=newdtype)
-    for field in newdtype.names:
-        newrec[field] = rec[field]
-
-    return newrec.view(npy.recarray)
-
-    
-def rec_join(key, r1, r2):
-    """
-    join record arrays r1 and r2 on key; key is a tuple of field
-    names.  if r1 and r2 have equal values on all the keys in the key
-    tuple, then their fields will be merged into a new record array
-    containing the union of the fields of r1 and r2
-    """
-
-    for name in key:
-        if name not in r1.dtype.names:
-            raise ValueError('r1 does not have key field %s'%name)
-        if name not in r2.dtype.names:
-            raise ValueError('r2 does not have key field %s'%name)
-
-    def makekey(row):
-        return tuple([row[name] for name in key])
-
-  
-    names = list(r1.dtype.names) + [name for name in r2.dtype.names if name not in set(r1.dtype.names)]
- 
-
-    
-    r1d = dict([(makekey(row),i) for i,row in enumerate(r1)])        
-    r2d = dict([(makekey(row),i) for i,row in enumerate(r2)])
-
-    r1keys = set(r1d.keys())    
-    r2keys = set(r2d.keys())
-
-    keys = r1keys & r2keys
-
-    r1ind = [r1d[k] for k in keys]
-    r2ind = [r2d[k] for k in keys]
-
-    
-    r1 = r1[r1ind]
-    r2 = r2[r2ind]
-
-    r2 = rec_drop_fields(r2, r1.dtype.names)
-
-
-    def key_desc(name):
-        'if name is a string key, use the larger size of r1 or r2 before merging'
-        dt1 = r1.dtype[name]
-        if dt1.type != npy.string_:
-            return (name, dt1.descr[0][1])
-
-        dt2 = r1.dtype[name]
-        assert dt2==dt1
-        if dt1.num>dt2.num:
-            return (name, dt1.descr[0][1])
-        else:
-            return (name, dt2.descr[0][1])
-
-        
-        
-    keydesc = [key_desc(name) for name in key]
-
-    newdtype = npy.dtype(keydesc +
-                         [desc for desc in r1.dtype.descr if desc[0] not in key ] +
-                         [desc for desc in r2.dtype.descr if desc[0] not in key ] )
-                         
-    
-    newrec = npy.empty(len(r1), dtype=newdtype)
-    for field in r1.dtype.names:
-        newrec[field] = r1[field]
-
-    for field in r2.dtype.names:
-        newrec[field] = r2[field]
-
-    return newrec.view(npy.recarray)
-
 def slopes(x,y):
     """
     SLOPES calculate the slope y'(x) Given data vectors X and Y SLOPES


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.