Re: [Pytables-users] Re: nctoh5 script

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Hi,

I've been working on a 'beautified' version of nctoh5 to include it as an
utility of PyTables. I am attaching it at the bottom of this message.

The new version has support for specifying filters parameters on command
line and to optimize the I/O speed (through the use of the expectedrows
parameter of createEArray). Also, I've added a small patch that should deal
with Numeric single string characters (typecode 'c') arrays, but not tested
it, though.

Jeffrey: I don't understand why you create an Array (and not an EArray) for
unidimensional NetCDF variables. I've changed the new code to create always
and EArray (just to take advantage of filters). If you have some reason
against doing this, please tell me.

A Dilluns 01 Novembre 2004 17:09, Jeffrey S Whitaker va escriure:
> fri...@am... wrote:
>=20
> > Hi Jeffrey,
> >=20
> > thanks for your nctoh5-script in the pytables list. Should it work with=
 CharType-Variables? IsDescription.py complains about =BBIllegal type: 'c'=
=AB if only one CharType Variable is used in the netCDF-File-Arrays.=20
> >=20
> > I use EnthoughtPyton 2.3 along with pytables-0.81, ScientificIO-2.4.6, =
numarray-1.1, numeric-23.6
> >=20
> > Thanks in advance,
> > Friedemann
>=20
>=20
>=20
> Friedemann:  I can't think of any reason why it wouldn't work.  I'm=20
> cc'ing the pytables list in case anyone there has an idea.  It would=20
> help if you could post your netcdf file somewhere so I could try it out.
>=20
> -Jeff
>=20

=2D-=20
=46rancesc Alted

=2D---------------------------------------------------------------------
#!/usr/bin/env python

"""
convert netCDF file to HDF5 using Scientific.IO.NetCDF and PyTables.
Jeff Whitaker <jef...@no...>

Added some flags to select filters, as well as some small improvements.
=46rancesc Altet <fa...@ca...>

This requires Scientific from=20
http://starship.python.net/~hinsen/ScientificPython

"""
import Scientific.IO.NetCDF as NetCDF
import tables, sys, os.path, getopt, time

def nctoh5(ncfilename, h5filename, filters, overwritefile):
    # open netCDF file
    ncfile =3D NetCDF.NetCDFFile(ncfilename, mode =3D "r")
    # open h5 file
    if overwritefile:
        h5file =3D tables.openFile(h5filename, mode =3D "w")
    else:
        h5file =3D tables.openFile(h5filename, mode =3D "a")       =20
    # loop over variables in netCDF file.
    nobjects =3D 0; nbytes =3D 0  # Initialize counters
    for varname in ncfile.variables.keys():
        var =3D ncfile.variables[varname]
        vardims =3D list(var.dimensions)
        vardimsizes =3D [ncfile.dimensions[vardim] for vardim in vardims]
        # Check if any dimension is enlargeable
        edim =3D -1; ndim =3D 0
        for vardim in vardimsizes:
            if vardim =3D=3D None:
                edim =3D ndim
                break
            ndim +=3D 1
        # use long_name for title.
        if hasattr(var,'long_name'):
            title =3D var.long_name
        else: # or, just use some bogus title.
            title =3D varname + ' array'
        # Create an EArray to keep the NetCDF variable
        if edim < 0:
            # Make 0 the enlargeable dimension
            edim =3D 0
        vardimsizes[edim] =3D 0
        dtype=3Dvar.typecode()
        if dtype =3D=3D 'c':
            # Special case for Numeric character objects
            # (on which base Scientific Python works)
            atom =3D StringAtom(shape=3Dtuple(vardimsizes), length=3D1)=20
        else:
            atom =3D tables.Atom(dtype=3Dvar.typecode(), shape=3Dtuple(vard=
imsizes))

        vardata =3D h5file.createEArray(h5file.root, varname,
                                      atom, title, filters=3Dfilters,
                                      expectedrows=3Dvardimsizes[edim])
        # write data to enlargeable array on record at a time.
        # (so the whole array doesn't have to be kept in memory).
        for n in range(var.shape[0]):
            vardata.append(var[n:n+1])
        # Increment the counters
        nobjects +=3D 1
        nbytes +=3D reduce(lambda x,y:x*y, vardata.shape) * vardata.itemsize
        # set variable attributes.
        for key,val in var.__dict__.iteritems():
            setattr(vardata.attrs,key,val)
        setattr(vardata.attrs,'dimensions',tuple(vardims))
    # set global (file) attributes.
    for key,val in ncfile.__dict__.iteritems():
        setattr(h5file.root._v_attrs,key,val)
    # Close the file
    h5file.close()
    return (nobjects, nbytes)

usage =3D """usage: %s [-h] [-v] [-o] [--complevel=3D(0-9)] [--complib=3Dli=
b] [--shuffle=3D(0|1)] [--fletcher32=3D(0|1)] netcdffilename hdf5filename
 -h -- Print usage message.
 -v -- Show more information.
 -o -- Overwite destination file.
 --complevel=3D(0-9) -- Set a compression level (0 for no compression, which
     is the default).
 --complib=3Dlib -- Set the compression library to be used during the copy.
     lib can be set to "zlib", "lzo" or "ucl". Defaults to "zlib".
 --shuffle=3D(0|1) -- Activate or not the shuffling filter (default is acti=
ve
     if complevel>0).
 --fletcher32=3D(0|1) -- Whether to activate or not the fletcher32 filter (=
not
     active by default).
\n""" % os.path.basename(sys.argv[0])

try:
    opts, pargs =3D getopt.getopt(sys.argv[1:], 'hvo',
                                ['complevel=3D',
                                 'complib=3D',
                                 'shuffle=3D',
                                 'fletcher32=3D',
                                 ])
except:
    (type, value, traceback) =3D sys.exc_info()
    print "Error parsing the options. The error was:", value
    sys.stderr.write(usage)
    sys.exit(0)

# default options
verbose =3D 0
overwritefile =3D 0
complevel =3D None
complib =3D None
shuffle =3D None
fletcher32 =3D None

# Get the options
for option in opts:
    if option[0] =3D=3D '-h':
        sys.stderr.write(usage)
        sys.exit(0)
    elif option[0] =3D=3D '-v':
        verbose =3D 1
    elif option[0] =3D=3D '-o':
        overwritefile =3D 1
    elif option[0] =3D=3D '--complevel':
        complevel =3D int(option[1])
    elif option[0] =3D=3D '--complib':
        complib =3D option[1]
    elif option[0] =3D=3D '--shuffle':
        shuffle =3D int(option[1])
    elif option[0] =3D=3D '--fletcher32':
        fletcher32 =3D int(option[1])
    else:
        print option[0], ": Unrecognized option"
        sys.stderr.write(usage)
        sys.exit(0)
       =20
# if we pass a number of files different from 2, abort
if len(pargs) <> 2:
    print "You need to pass both source and destination!."
    sys.stderr.write(usage)
    sys.exit(0)

# Catch the files passed as the last arguments
ncfilename =3D pargs[0]
h5filename =3D pargs[1]

       =20
# Build the Filters instance
if (complevel, complib, shuffle, fletcher32) =3D=3D (None,)*4:
    filters =3D None
else:
    if complevel is None: complevel =3D 0
    if complevel > 0 and shuffle is None:
        shuffle =3D 1
    else:
        shuffle =3D 0
    if complib is None: complib =3D "zlib"
    if fletcher32 is None: fletcher32 =3D 0
    filters =3D tables.Filters(complevel=3Dcomplevel, complib=3Dcomplib,
                             shuffle=3Dshuffle, fletcher32=3Dfletcher32)

# Some timing
t1 =3D time.time()
cpu1 =3D time.clock()
# Copy the file
if verbose:=20
    print "+=3D+"*20
    print "Starting conversion from %s to %s" % (ncfilename, h5filename)
    print "Applying filters:", filters
    print "+=3D+"*20

# Do the conversion
(nobjects, nbytes) =3D nctoh5(ncfilename, h5filename, filters, overwritefil=
e)

# Gather some statistics
t2 =3D time.time()
cpu2 =3D time.clock()
tcopy =3D round(t2-t1, 3)
cpucopy =3D round(cpu2-cpu1, 3)
tpercent =3D int(round(cpucopy/tcopy, 2)*100)
if verbose:
    print "Number of variables copied:", nobjects
    print "KBytes copied:", round(nbytes/1024.,3)
    print "Time copying: %s s (real) %s s (cpu)  %s%%" % \
          (tcopy, cpucopy, tpercent)
    print "Copied variable/sec: ", round(nobjects / float(tcopy),1)
    print "Copied KB/s :", int(nbytes / (tcopy * 1024))