From: Nils G. <gos...@gm...> - 2009-07-09 08:12:53
|
"""datamanager.py - input output for AnuGA This module takes care of reading and writing datafiles such as topograhies, model output, etc Formats used within AnuGA: .sww: Netcdf format for storing model output f(t,x,y) .tms: Netcdf format for storing time series f(t) .csv: ASCII format for storing arbitrary points and associated attributes .pts: NetCDF format for storing arbitrary points and associated attributes .asc: ASCII format of regular DEMs as output from ArcView .prj: Associated ArcView file giving more meta data for asc format .ers: ERMapper header format of regular DEMs for ArcView .dem: NetCDF representation of regular DEM data .tsh: ASCII format for storing meshes and associated boundary and region info .msh: NetCDF format for storing meshes and associated boundary and region info .nc: Native ferret NetCDF format .geo: Houdinis ascii geometry format (?) A typical dataflow can be described as follows Manually created files: ASC, PRJ: Digital elevation models (gridded) TSH: Triangular meshes (e.g. created from anuga.pmesh) NC Model outputs for use as boundary conditions (e.g from MOST) AUTOMATICALLY CREATED FILES: ASC, PRJ -> DEM -> PTS: Conversion of DEM's to native pts file NC -> SWW: Conversion of MOST bundary files to boundary sww PTS + TSH -> TSH with elevation: Least squares fit TSH -> SWW: Conversion of TSH to sww viewable using Swollen TSH + Boundary SWW -> SWW: Simluation using abstract_2d_finite_volumes """ # This file was reverted from changeset:5484 to changeset:5470 on 10th July # by Ole. import os, sys import csv import exceptions import string import shutil from struct import unpack import array as p_array from os import sep, path, remove, mkdir, access, F_OK, W_OK, getcwd import Numeric as num from Scientific.IO.NetCDF import NetCDFFile from os.path import exists, basename, join from os import getcwd from anuga.coordinate_transforms.redfearn import redfearn, \ convert_from_latlon_to_utm from anuga.coordinate_transforms.geo_reference import Geo_reference, \ write_NetCDF_georeference, ensure_geo_reference from anuga.geospatial_data.geospatial_data import Geospatial_data,\ ensure_absolute from anuga.config import minimum_storable_height as \ default_minimum_storable_height from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a from anuga.config import max_float from anuga.utilities.numerical_tools import ensure_numeric, mean from anuga.caching.caching import myhash from anuga.utilities.anuga_exceptions import ANUGAError from anuga.shallow_water import Domain from anuga.abstract_2d_finite_volumes.pmesh2domain import \ pmesh_to_domain_instance from anuga.abstract_2d_finite_volumes.util import get_revision_number, \ remove_lone_verts, sww2timeseries, get_centroid_values from anuga.abstract_2d_finite_volumes.neighbour_mesh import segment_midpoints from anuga.load_mesh.loadASCII import export_mesh_file from anuga.utilities.polygon import intersection from anuga.utilities.system_tools import get_vars_in_expression # Default block size for sww2dem() DEFAULT_BLOCK_SIZE = 10000 ###### # Exception classes ###### class TitleValueError(exceptions.Exception): pass class DataMissingValuesError(exceptions.Exception): pass class DataFileNotOpenError(exceptions.Exception): pass class DataTimeError(exceptions.Exception): pass class DataDomainError(exceptions.Exception): pass class NewQuantity(exceptions.Exception): pass ###### # formula mappings ###### quantity_formula = {'momentum':'(xmomentum**2 + ymomentum**2)**0.5', 'depth':'stage-elevation', 'speed': \ '(xmomentum**2 + ymomentum**2)**0.5/(stage-elevation+1.e-6/(stage-elevation))'} ## # @brief Convert a possible filename into a standard form. # @param s Filename to process. # @return The new filename string. def make_filename(s): """Transform argument string into a Sexsuitable filename """ s = s.strip() s = s.replace(' ', '_') s = s.replace('(', '') s = s.replace(')', '') s = s.replace('__', '_') return s ## # @brief Check that a specified filesystem directory path exists. # @param path The dirstory path to check. # @param verbose True if this function is to be verbose. # @note If directory path doesn't exist, it will be created. def check_dir(path, verbose=None): """Check that specified path exists. If path does not exist it will be created if possible USAGE: checkdir(path, verbose): ARGUMENTS: path -- Directory verbose -- Flag verbose output (default: None) RETURN VALUE: Verified path including trailing separator """ import os.path if sys.platform in ['nt', 'dos', 'win32', 'what else?']: unix = 0 else: unix = 1 # add terminal separator, if it's not already there if path[-1] != os.sep: path = path + os.sep # expand ~ or ~username in path path = os.path.expanduser(path) # create directory if required if not (os.access(path, os.R_OK and os.W_OK) or path == ''): try: exitcode = os.mkdir(path) # Change access rights if possible if unix: exitcode = os.system('chmod 775 ' + path) else: pass # FIXME: What about access rights under Windows? if verbose: print 'MESSAGE: Directory', path, 'created.' except: print 'WARNING: Directory', path, 'could not be created.' if unix: path = '/tmp/' else: path = 'C:' + os.sep print "Using directory '%s' instead" % path return path ## # @brief Delete directory and all sub-directories. # @param path Path to the directory to delete. def del_dir(path): """Recursively delete directory path and all its contents """ if os.path.isdir(path): for file in os.listdir(path): X = os.path.join(path, file) if os.path.isdir(X) and not os.path.islink(X): del_dir(X) else: try: os.remove(X) except: print "Could not remove file %s" % X os.rmdir(path) ## # @brief ?? # @param path # @param __func__ # @param verbose True if this function is to be verbose. # @note ANOTHER OPTION, IF NEED IN THE FUTURE, Nick B 7/2007 def rmgeneric(path, func, verbose=False): ERROR_STR= """Error removing %(path)s, %(error)s """ try: func(path) if verbose: print 'Removed ', path except OSError, (errno, strerror): print ERROR_STR % {'path' : path, 'error': strerror } ## # @brief Remove directory and all sub-directories. # @param path Filesystem path to directory to remove. # @param verbose True if this function is to be verbose. def removeall(path, verbose=False): if not os.path.isdir(path): return for x in os.listdir(path): fullpath = os.path.join(path, x) if os.path.isfile(fullpath): f = os.remove rmgeneric(fullpath, f) elif os.path.isdir(fullpath): removeall(fullpath) f = os.rmdir rmgeneric(fullpath, f, verbose) ## # @brief Create a standard filename. # @param datadir Directory where file is to be created. # @param filename Filename 'stem'. # @param format Format of the file, becomes filename extension. # @param size Size of file, becomes part of filename. # @param time Time (float), becomes part of filename. # @return The complete filename path, including directory. # @note The containing directory is created, if necessary. def create_filename(datadir, filename, format, size=None, time=None): FN = check_dir(datadir) + filename if size is not None: FN += '_size%d' % size if time is not None: FN += '_time%.2f' % time FN += '.' + format return FN ## # @brief Get all files with a standard name and a given set of attributes. # @param datadir Directory files must be in. # @param filename Filename stem. # @param format Filename extension. # @param size Filename size. # @return A list of fielnames (including directory) that match the attributes. def get_files(datadir, filename, format, size): """Get all file (names) with given name, size and format """ import glob dir = check_dir(datadir) pattern = dir + os.sep + filename + '_size=%d*.%s' % (size, format) return glob.glob(pattern) ## # @brief Generic class for storing output to e.g. visualisation or checkpointing class Data_format: """Generic interface to data formats """ ## # @brief Instantiate this instance. # @param domain # @param extension # @param mode The mode of the underlying file. def __init__(self, domain, extension, mode=netcdf_mode_w): assert mode[0] in ['r', 'w', 'a'], \ "Mode %s must be either:\n" % mode + \ " 'w' (write)\n" + \ " 'r' (read)\n" + \ " 'a' (append)" #Create filename self.filename = create_filename(domain.get_datadir(), domain.get_name(), extension) self.timestep = 0 self.domain = domain # Exclude ghosts in case this is a parallel domain self.number_of_nodes = domain.number_of_full_nodes self.number_of_volumes = domain.number_of_full_triangles #self.number_of_volumes = len(domain) #FIXME: Should we have a general set_precision function? ## # @brief Class for storing output to e.g. visualisation class Data_format_sww(Data_format): """Interface to native NetCDF format (.sww) for storing model output There are two kinds of data 1: Constant data: Vertex coordinates and field values. Stored once 2: Variable data: Conserved quantities. Stored once per timestep. All data is assumed to reside at vertex locations. """ ## # @brief Instantiate this instance. # @param domain ?? # @param mode Mode of the underlying data file. # @param max_size ?? # @param recursion ?? # @note Prepare the underlying data file if mode starts with 'w'. def __init__(self, domain, mode=netcdf_mode_w, max_size=2000000000, recursion=False): from Scientific.IO.NetCDF import NetCDFFile self.precision = num.Float32 #Use single precision for quantities self.recursion = recursion self.mode = mode if hasattr(domain, 'max_size'): self.max_size = domain.max_size #file size max is 2Gig else: self.max_size = max_size if hasattr(domain, 'minimum_storable_height'): self.minimum_storable_height = domain.minimum_storable_height else: self.minimum_storable_height = default_minimum_storable_height # call owning constructor Data_format.__init__(self, domain, 'sww', mode) # NetCDF file definition fid = NetCDFFile(self.filename, mode) if mode[0] == 'w': description = 'Output from anuga.abstract_2d_finite_volumes ' \ 'suitable for plotting' self.writer = Write_sww() self.writer.store_header(fid, domain.starttime, self.number_of_volumes, self.domain.number_of_full_nodes, description=description, smoothing=domain.smooth, order=domain.default_order, sww_precision=self.precision) # Extra optional information if hasattr(domain, 'texture'): fid.texture = domain.texture if domain.quantities_to_be_monitored is not None: fid.createDimension('singleton', 1) fid.createDimension('two', 2) poly = domain.monitor_polygon if poly is not None: N = len(poly) fid.createDimension('polygon_length', N) fid.createVariable('extrema.polygon', self.precision, ('polygon_length', 'two')) fid.variables['extrema.polygon'][:] = poly interval = domain.monitor_time_interval if interval is not None: fid.createVariable('extrema.time_interval', self.precision, ('two',)) fid.variables['extrema.time_interval'][:] = interval for q in domain.quantities_to_be_monitored: fid.createVariable(q + '.extrema', self.precision, ('numbers_in_range',)) fid.createVariable(q + '.min_location', self.precision, ('numbers_in_range',)) fid.createVariable(q + '.max_location', self.precision, ('numbers_in_range',)) fid.createVariable(q + '.min_time', self.precision, ('singleton',)) fid.createVariable(q + '.max_time', self.precision, ('singleton',)) fid.close() ## # @brief Store connectivity data into the underlying data file. def store_connectivity(self): """Specialisation of store_connectivity for net CDF format Writes x,y,z coordinates of triangles constituting the bed elevation. """ from Scientific.IO.NetCDF import NetCDFFile domain = self.domain # append to the NetCDF file fid = NetCDFFile(self.filename, netcdf_mode_a) # # Get the variables # x = fid.variables['x'] # y = fid.variables['y'] # z = fid.variables['elevation'] # volumes = fid.variables['volumes'] # Get X, Y and bed elevation Z Q = domain.quantities['elevation'] X,Y,Z,V = Q.get_vertex_values(xy=True, precision=self.precision) # store the connectivity data points = num.concatenate( (X[:,num.NewAxis],Y[:,num.NewAxis]), axis=1 ) self.writer.store_triangulation(fid, points, # V.astype(volumes.typecode()), V.astype(num.Float32), Z, points_georeference=\ domain.geo_reference) fid.close() ## # @brief Store time and named quantities to the underlying data file. # @param names The names of the quantities to store. # @note If 'names' not supplied, store a standard set of quantities. def store_timestep(self, names=None): """Store time and named quantities to file """ from Scientific.IO.NetCDF import NetCDFFile import types from time import sleep from os import stat if names is None: # Standard shallow water wave equation quantitites in ANUGA names = ['stage', 'xmomentum', 'ymomentum'] # Get NetCDF retries = 0 file_open = False while not file_open and retries < 10: try: fid = NetCDFFile(self.filename, netcdf_mode_a) # Open existing file except IOError: # This could happen if someone was reading the file. # In that case, wait a while and try again msg = 'Warning (store_timestep): File %s could not be opened' \ % self.filename msg += ' - trying step %s again' % self.domain.time print msg retries += 1 sleep(1) else: file_open = True if not file_open: msg = 'File %s could not be opened for append' % self.filename raise DataFileNotOpenError, msg # Check to see if the file is already too big: time = fid.variables['time'] i = len(time) + 1 file_size = stat(self.filename)[6] file_size_increase = file_size / i if file_size + file_size_increase > self.max_size * 2**self.recursion: # In order to get the file name and start time correct, # I change the domain.filename and domain.starttime. # This is the only way to do this without changing # other modules (I think). # Write a filename addon that won't break swollens reader # (10.sww is bad) filename_ext = '_time_%s' % self.domain.time filename_ext = filename_ext.replace('.', '_') # Remember the old filename, then give domain a # name with the extension old_domain_filename = self.domain.get_name() if not self.recursion: self.domain.set_name(old_domain_filename + filename_ext) # Change the domain starttime to the current time old_domain_starttime = self.domain.starttime self.domain.starttime = self.domain.time # Build a new data_structure. next_data_structure = Data_format_sww(self.domain, mode=self.mode, max_size=self.max_size, recursion=self.recursion+1) if not self.recursion: print ' file_size = %s' % file_size print ' saving file to %s' % next_data_structure.filename #set up the new data_structure self.domain.writer = next_data_structure #FIXME - could be cleaner to use domain.store_timestep etc. next_data_structure.store_connectivity() next_data_structure.store_timestep(names) fid.sync() fid.close() #restore the old starttime and filename self.domain.starttime = old_domain_starttime self.domain.set_name(old_domain_filename) else: self.recursion = False domain = self.domain # Get the variables time = fid.variables['time'] stage = fid.variables['stage'] xmomentum = fid.variables['xmomentum'] ymomentum = fid.variables['ymomentum'] i = len(time) if type(names) not in [types.ListType, types.TupleType]: names = [names] if 'stage' in names \ and 'xmomentum' in names \ and 'ymomentum' in names: # Get stage, elevation, depth and select only those # values where minimum_storable_height is exceeded Q = domain.quantities['stage'] A, _ = Q.get_vertex_values(xy=False, precision=self.precision) z = fid.variables['elevation'] storable_indices = (A-z[:] >= self.minimum_storable_height) stage = num.choose(storable_indices, (z[:], A)) # Define a zero vector of same size and type as A # for use with momenta null = num.zeros(num.size(A), A.typecode()) # Get xmomentum where depth exceeds minimum_storable_height Q = domain.quantities['xmomentum'] xmom, _ = Q.get_vertex_values(xy=False, precision=self.precision) xmomentum = num.choose(storable_indices, (null, xmom)) # Get ymomentum where depth exceeds minimum_storable_height Q = domain.quantities['ymomentum'] ymom, _ = Q.get_vertex_values(xy=False, precision=self.precision) ymomentum = num.choose(storable_indices, (null, ymom)) # Write quantities to underlying data file self.writer.store_quantities(fid, time=self.domain.time, sww_precision=self.precision, stage=stage, xmomentum=xmomentum, ymomentum=ymomentum) else: msg = 'Quantities stored must be: stage, xmomentum, ymomentum, ' msg += 'but I got: ' + str(names) raise Exception, msg # Update extrema if requested domain = self.domain if domain.quantities_to_be_monitored is not None: for q, info in domain.quantities_to_be_monitored.items(): if info['min'] is not None: fid.variables[q + '.extrema'][0] = info['min'] fid.variables[q + '.min_location'][:] = \ info['min_location'] fid.variables[q + '.min_time'][0] = info['min_time'] if info['max'] is not None: fid.variables[q + '.extrema'][1] = info['max'] fid.variables[q + '.max_location'][:] = \ info['max_location'] fid.variables[q + '.max_time'][0] = info['max_time'] # Flush and close fid.sync() fid.close() ## # @brief Class for handling checkpoints data class Data_format_cpt(Data_format): """Interface to native NetCDF format (.cpt) """ ## # @brief Initialize this instantiation. # @param domain ?? # @param mode Mode of underlying data file (default WRITE). def __init__(self, domain, mode=netcdf_mode_w): from Scientific.IO.NetCDF import NetCDFFile self.precision = num.Float #Use full precision Data_format.__init__(self, domain, 'sww', mode) # NetCDF file definition fid = NetCDFFile(self.filename, mode) if mode[0] == 'w': #Create new file fid.institution = 'Geoscience Australia' fid.description = 'Checkpoint data' #fid.smooth = domain.smooth fid.order = domain.default_order # dimension definitions fid.createDimension('number_of_volumes', self.number_of_volumes) fid.createDimension('number_of_vertices', 3) #Store info at all vertices (no smoothing) fid.createDimension('number_of_points', 3*self.number_of_volumes) fid.createDimension('number_of_timesteps', None) #extensible # variable definitions #Mesh fid.createVariable('x', self.precision, ('number_of_points',)) fid.createVariable('y', self.precision, ('number_of_points',)) fid.createVariable('volumes', num.Int, ('number_of_volumes', 'number_of_vertices')) fid.createVariable('time', self.precision, ('number_of_timesteps',)) #Allocate space for all quantities for name in domain.quantities.keys(): fid.createVariable(name, self.precision, ('number_of_timesteps', 'number_of_points')) fid.close() ## # @brief Store connectivity data to underlying data file. def store_checkpoint(self): """Write x,y coordinates of triangles. Write connectivity ( constituting the bed elevation. """ from Scientific.IO.NetCDF import NetCDFFile domain = self.domain #Get NetCDF fid = NetCDFFile(self.filename, netcdf_mode_a) # Get the variables x = fid.variables['x'] y = fid.variables['y'] volumes = fid.variables['volumes'] # Get X, Y and bed elevation Z Q = domain.quantities['elevation'] X,Y,Z,V = Q.get_vertex_values(xy=True, precision=self.precision) x[:] = X.astype(self.precision) y[:] = Y.astype(self.precision) z[:] = Z.astype(self.precision) volumes[:] = V fid.close() ## # @brief Store tiem and named quantities to underlying data file. # @param name def store_timestep(self, name): """Store time and named quantity to file """ from Scientific.IO.NetCDF import NetCDFFile from time import sleep #Get NetCDF retries = 0 file_open = False while not file_open and retries < 10: try: fid = NetCDFFile(self.filename, netcdf_mode_a) except IOError: #This could happen if someone was reading the file. #In that case, wait a while and try again msg = 'Warning (store_timestep): File %s could not be opened' \ ' - trying again' % self.filename print msg retries += 1 sleep(1) else: file_open = True if not file_open: msg = 'File %s could not be opened for append' % self.filename raise DataFileNotOpenError, msg domain = self.domain # Get the variables time = fid.variables['time'] stage = fid.variables['stage'] i = len(time) #Store stage time[i] = self.domain.time # Get quantity Q = domain.quantities[name] A,V = Q.get_vertex_values(xy=False, precision=self.precision) stage[i,:] = A.astype(self.precision) #Flush and close fid.sync() fid.close() ## # @brief Class for National Exposure Database storage (NEXIS). LAT_TITLE = 'LATITUDE' LONG_TITLE = 'LONGITUDE' X_TITLE = 'x' Y_TITLE = 'y' class Exposure_csv: ## # @brief Instantiate this instance. # @param file_name Name of underlying data file. # @param latitude_title ?? # @param longitude_title ?? # @param is_x_y_locations ?? # @param x_title ?? # @param y_title ?? # @param refine_polygon ?? # @param title_check_list ?? def __init__(self,file_name, latitude_title=LAT_TITLE, longitude_title=LONG_TITLE, is_x_y_locations=None, x_title=X_TITLE, y_title=Y_TITLE, refine_polygon=None, title_check_list=None): """ This class is for handling the exposure csv file. It reads the file in and converts the lats and longs to a geospatial data object. Use the methods to read and write columns. The format of the csv files it reads is; The first row is a title row. comma's are the delimiters each column is a 'set' of data Feel free to use/expand it to read other csv files. It is not for adding and deleting rows Can geospatial handle string attributes? It's not made for them. Currently it can't load and save string att's. So just use geospatial to hold the x, y and georef? Bad, since different att's are in diferent structures. Not so bad, the info to write if the .csv file is saved is in attribute_dic The location info is in the geospatial attribute. """ self._file_name = file_name self._geospatial = None # # self._attribute_dic is a dictionary. #The keys are the column titles. #The values are lists of column data # self._title_index_dic is a dictionary. #The keys are the column titles. #The values are the index positions of file columns. self._attribute_dic, self._title_index_dic = \ csv2dict(self._file_name, title_check_list=title_check_list) try: #Have code here that handles caps or lower lats = self._attribute_dic[latitude_title] longs = self._attribute_dic[longitude_title] except KeyError: # maybe a warning.. #Let's see if this works.. if False != is_x_y_locations: is_x_y_locations = True pass else: self._geospatial = Geospatial_data(latitudes=lats, longitudes=longs) if is_x_y_locations is True: if self._geospatial is not None: pass #fixme throw an error try: xs = self._attribute_dic[x_title] ys = self._attribute_dic[y_title] points = [[float(i),float(j)] for i,j in map(None,xs,ys)] except KeyError: # maybe a warning.. msg = "Could not find location information." raise TitleValueError, msg else: self._geospatial = Geospatial_data(data_points=points) # create a list of points that are in the refining_polygon # described by a list of indexes representing the points ## # @brief Create a comparison method. # @param self This object. # @param other The other object. # @return True if objects are 'same'. def __cmp__(self, other): #check that 'other' is an instance of this class if isinstance(self, type(other)): result = cmp(self._attribute_dic, other._attribute_dic) if result <> 0: return result # The order of the columns is important. Therefore.. result = cmp(self._title_index_dic, other._title_index_dic) if result <> 0: return result for self_ls, other_ls in map(None, self._attribute_dic, other._attribute_dic): result = cmp(self._attribute_dic[self_ls], other._attribute_dic[other_ls]) if result <> 0: return result return 0 else: return 1 ## # @brief Get a list of column values given a column name. # @param column_name The name of the column to get values from. # @param use_refind_polygon Unused?? def get_column(self, column_name, use_refind_polygon=False): """ Given a column name return a list of the column values Note, the type of the values will be String! do this to change a list of strings to a list of floats time = [float(x) for x in time] Not implemented: if use_refind_polygon is True, only return values in the refined polygon """ if not self._attribute_dic.has_key(column_name): msg = 'There is no column called %s!' % column_name raise TitleValueError, msg return self._attribute_dic[column_name] ## # @brief ?? # @param value_column_name ?? # @param known_column_name ?? # @param known_values ?? # @param use_refind_polygon ?? def get_value(self, value_column_name, known_column_name, known_values, use_refind_polygon=False): """ Do linear interpolation on the known_colum, using the known_value, to return a value of the column_value_name. """ pass ## # @brief Get a geospatial object that describes the locations. # @param use_refind_polygon Unused?? def get_location(self, use_refind_polygon=False): """ Return a geospatial object which describes the locations of the location file. Note, if there is not location info, this returns None. Not implemented: if use_refind_polygon is True, only return values in the refined polygon """ return self._geospatial ## # @brief Add column to 'end' of CSV data. # @param column_name The new column name. # @param column_values The new column values. # @param overwrite If True, overwrites last column, doesn't add at end. def set_column(self, column_name, column_values, overwrite=False): """ Add a column to the 'end' (with the right most column being the end) of the csv file. Set overwrite to True if you want to overwrite a column. Note, in column_name white space is removed and case is not checked. Precondition The column_name and column_values cannot have comma's in it. """ # sanity checks value_row_count = \ len(self._attribute_dic[self._title_index_dic.keys()[0]]) if len(column_values) <> value_row_count: msg = 'The number of column values must equal the number of rows.' raise DataMissingValuesError, msg # check new column name isn't already used, and we aren't overwriting if self._attribute_dic.has_key(column_name): if not overwrite: msg = 'Column name %s already in use!' % column_name raise TitleValueError, msg else: # New title. Add it to the title index. self._title_index_dic[column_name] = len(self._title_index_dic) self._attribute_dic[column_name] = column_values ## # @brief Save the exposure CSV file. # @param file_name If supplied, use this filename, not original. def save(self, file_name=None): """ Save the exposure csv file """ if file_name is None: file_name = self._file_name fd = open(file_name, 'wb') writer = csv.writer(fd) #Write the title to a cvs file line = [None] * len(self._title_index_dic) for title in self._title_index_dic.iterkeys(): line[self._title_index_dic[title]] = title writer.writerow(line) # Write the values to a cvs file value_row_count = \ len(self._attribute_dic[self._title_index_dic.keys()[0]]) for row_i in range(value_row_count): line = [None] * len(self._title_index_dic) for title in self._title_index_dic.iterkeys(): line[self._title_index_dic[title]] = \ self._attribute_dic[title][row_i] writer.writerow(line) def csv2building_polygons(file_name, floor_height=3, clipping_polygons=None): """ Convert CSV files of the form: easting,northing,id,floors 422664.22,870785.46,2,0 422672.48,870780.14,2,0 422668.17,870772.62,2,0 422660.35,870777.17,2,0 422664.22,870785.46,2,0 422661.30,871215.06,3,1 422667.50,871215.70,3,1 422668.30,871204.86,3,1 422662.21,871204.33,3,1 422661.30,871215.06,3,1 to a dictionary of polygons with id as key. The associated number of floors are converted to m above MSL and returned as a separate dictionary also keyed by id. Optional parameter floor_height is the height of each building story. Optional parameter clipping_olygons is a list of polygons selecting buildings. Any building not in these polygons will be omitted. See csv2polygons for more details """ polygons, values = csv2polygons(file_name, value_name='floors', clipping_polygons=None) heights = {} for key in values.keys(): v = float(values[key]) heights[key] = v*floor_height return polygons, heights ## # @brief Convert CSV file into a dictionary of polygons and associated values. # @param filename The path to the file to read, value_name name for the 4th column def csv2polygons(file_name, value_name='value', clipping_polygons=None): """ Convert CSV files of the form: easting,northing,id,value 422664.22,870785.46,2,0 422672.48,870780.14,2,0 422668.17,870772.62,2,0 422660.35,870777.17,2,0 422664.22,870785.46,2,0 422661.30,871215.06,3,1 422667.50,871215.70,3,1 422668.30,871204.86,3,1 422662.21,871204.33,3,1 422661.30,871215.06,3,1 to a dictionary of polygons with id as key. The associated values are returned as a separate dictionary also keyed by id. easting: x coordinate relative to zone implied by the model northing: y coordinate relative to zone implied by the model id: tag for polygon comprising points with this tag value: numeral associated with each polygon. These must be the same for all points in each polygon. The last header, value, can take on other names such as roughness, floors, etc - or it can be omitted in which case the returned values will be None Eastings and Northings will be returned as floating point values while id and values will be returned as strings. Optional argument: clipping_polygons will select only those polygons that are fully within one or more of the clipping_polygons. In other words any polygon from the csv file which has at least one point not inside one of the clipping polygons will be excluded See underlying function csv2dict for more details. """ X, _ = csv2dict(file_name) msg = 'Polygon csv file must have 3 or 4 columns' assert len(X.keys()) in [3, 4], msg msg = 'Did not find expected column header: easting' assert 'easting' in X.keys(), msg msg = 'Did not find expected column header: northing' assert 'northing' in X.keys(), northing msg = 'Did not find expected column header: northing' assert 'id' in X.keys(), msg if value_name is not None: msg = 'Did not find expected column header: %s' % value_name assert value_name in X.keys(), msg polygons = {} if len(X.keys()) == 4: values = {} else: values = None # Loop through entries and compose polygons excluded_polygons={} past_ids = {} last_id = None for i, id in enumerate(X['id']): # Check for duplicate polygons if id in past_ids: msg = 'Polygon %s was duplicated in line %d' % (id, i) raise Exception, msg if id not in polygons: # Start new polygon polygons[id] = [] if values is not None: values[id] = X[value_name][i] # Keep track of previous polygon ids if last_id is not None: past_ids[last_id] = i # Append this point to current polygon point = [float(X['easting'][i]), float(X['northing'][i])] if clipping_polygons is not None: exclude=True for clipping_polygon in clipping_polygons: if inside_polygon(point, clipping_polygon): exclude=False break if exclude is True: excluded_polygons[id]=True polygons[id].append(point) # Check that value is the same across each polygon msg = 'Values must be the same across each polygon.' msg += 'I got %s in line %d but it should have been %s' % (X[value_name][i], i, values[id]) assert values[id] == X[value_name][i], msg last_id = id # Weed out polygons that were not wholly inside clipping polygons for id in excluded_polygons: del polygons[id] return polygons, values ## # @brief Convert CSV file to a dictionary of arrays. # @param file_name The path to the file to read. def csv2array(file_name): """ Convert CSV files of the form: time, discharge, velocity 0.0, 1.2, 0.0 0.1, 3.2, 1.1 ... to a dictionary of numeric arrays. See underlying function csv2dict for more details. """ X, _ = csv2dict(file_name) Y = {} for key in X.keys(): Y[key] = num.array([float(x) for x in X[key]]) return Y ## # @brief Read a CSV file and convert to a dictionary of {key: column}. # @param file_name The path to the file to read. # @param title_check_list List of titles that *must* be columns in the file. # @return Two dicts: ({key:column}, {title:index}). # @note WARNING: Values are returned as strings. def csv2dict(file_name, title_check_list=None): """ Load in the csv as a dictionary, title as key and column info as value. Also, create a dictionary, title as key and column index as value, to keep track of the column order. Two dictionaries are returned. WARNING: Values are returned as strings. Do this to change a list of strings to a list of floats time = [float(x) for x in time] """ # FIXME(Ole): Consider dealing with files without headers # FIXME(Ole): Consider a wrapper automatically converting text fields # to the right type by trying for: int, float, string attribute_dic = {} title_index_dic = {} titles_stripped = [] # List of titles reader = csv.reader(file(file_name)) # Read in and manipulate the title info titles = reader.next() for i, title in enumerate(titles): header = title.strip() titles_stripped.append(header) title_index_dic[header] = i title_count = len(titles_stripped) # Check required columns if title_check_list is not None: for title_check in title_check_list: if not title_index_dic.has_key(title_check): msg = 'Reading error. This row is not present %s' % title_check raise IOError, msg # Create a dictionary of column values, indexed by column title for line in reader: n = len(line) # Number of entries if n != title_count: msg = 'Entry in file %s had %d columns ' % (file_name, n) msg += 'although there were %d headers' % title_count raise IOError, msg for i, value in enumerate(line): attribute_dic.setdefault(titles_stripped[i], []).append(value) return attribute_dic, title_index_dic ## # @brief # @param filename # @param x # @param y # @param z def write_obj(filename, x, y, z): """Store x,y,z vectors into filename (obj format). Vectors are assumed to have dimension (M,3) where M corresponds to the number elements. triangles are assumed to be disconnected The three numbers in each vector correspond to three vertices, e.g. the x coordinate of vertex 1 of element i is in x[i,1] """ import os.path root, ext = os.path.splitext(filename) if ext == '.obj': FN = filename else: FN = filename + '.obj' outfile = open(FN, 'wb') outfile.write("# Triangulation as an obj file\n") M, N = x.shape assert N == 3 #Assuming three vertices per element for i in range(M): for j in range(N): outfile.write("v %f %f %f\n" % (x[i,j], y[i,j], z[i,j])) for i in range(M): base = i * N outfile.write("f %d %d %d\n" % (base+1, base+2, base+3)) outfile.close() ######################################################### #Conversion routines ######################################################## ## # @brief Convert SWW data to OBJ data. # @param basefilename Stem of filename, needs size and extension added. # @param size The number of lines to write. def sww2obj(basefilename, size): """Convert netcdf based data output to obj """ from Scientific.IO.NetCDF import NetCDFFile # Get NetCDF FN = create_filename('.', basefilename, 'sww', size) print 'Reading from ', FN fid = NetCDFFile(FN, netcdf_mode_r) #Open existing file for read # Get the variables x = fid.variables['x'] y = fid.variables['y'] z = fid.variables['elevation'] time = fid.variables['time'] stage = fid.variables['stage'] M = size #Number of lines xx = num.zeros((M,3), num.Float) yy = num.zeros((M,3), num.Float) zz = num.zeros((M,3), num.Float) for i in range(M): for j in range(3): xx[i,j] = x[i+j*M] yy[i,j] = y[i+j*M] zz[i,j] = z[i+j*M] # Write obj for bathymetry FN = create_filename('.', basefilename, 'obj', size) write_obj(FN,xx,yy,zz) # Now read all the data with variable information, combine with # x,y info and store as obj for k in range(len(time)): t = time[k] print 'Processing timestep %f' %t for i in range(M): for j in range(3): zz[i,j] = stage[k,i+j*M] #Write obj for variable data #FN = create_filename(basefilename, 'obj', size, time=t) FN = create_filename('.', basefilename[:5], 'obj', size, time=t) write_obj(FN, xx, yy, zz) ## # @brief # @param basefilename Stem of filename, needs size and extension added. def dat2obj(basefilename): """Convert line based data output to obj FIXME: Obsolete? """ import glob, os from anuga.config import data_dir # Get bathymetry and x,y's lines = open(data_dir+os.sep+basefilename+'_geometry.dat', 'r').readlines() M = len(lines) #Number of lines x = num.zeros((M,3), num.Float) y = num.zeros((M,3), num.Float) z = num.zeros((M,3), num.Float) for i, line in enumerate(lines): tokens = line.split() values = map(float, tokens) for j in range(3): x[i,j] = values[j*3] y[i,j] = values[j*3+1] z[i,j] = values[j*3+2] # Write obj for bathymetry write_obj(data_dir + os.sep + basefilename + '_geometry', x, y, z) # Now read all the data files with variable information, combine with # x,y info and store as obj. files = glob.glob(data_dir + os.sep + basefilename + '*.dat') for filename in files: print 'Processing %s' % filename lines = open(data_dir + os.sep + filename, 'r').readlines() assert len(lines) == M root, ext = os.path.splitext(filename) # Get time from filename i0 = filename.find('_time=') if i0 == -1: #Skip bathymetry file continue i0 += 6 #Position where time starts i1 = filename.find('.dat') if i1 > i0: t = float(filename[i0:i1]) else: raise DataTimeError, 'Hmmmm' for i, line in enumerate(lines): tokens = line.split() values = map(float,tokens) for j in range(3): z[i,j] = values[j] # Write obj for variable data write_obj(data_dir + os.sep + basefilename + '_time=%.4f' % t, x, y, z) ## # @brief Filter data file, selecting timesteps first:step:last. # @param filename1 Data file to filter. # @param filename2 File to write filtered timesteps to. # @param first First timestep. # @param last Last timestep. # @param step Timestep stride. def filter_netcdf(filename1, filename2, first=0, last=None, step=1): """Read netcdf filename1, pick timesteps first:step:last and save to nettcdf file filename2 """ from Scientific.IO.NetCDF import NetCDFFile # Get NetCDF infile = NetCDFFile(filename1, netcdf_mode_r) #Open existing file for read outfile = NetCDFFile(filename2, netcdf_mode_w) #Open new file # Copy dimensions for d in infile.dimensions: outfile.createDimension(d, infile.dimensions[d]) # Copy variable definitions for name in infile.variables: var = infile.variables[name] outfile.createVariable(name, var.typecode(), var.dimensions) # Copy the static variables for name in infile.variables: if name == 'time' or name == 'stage': pass else: outfile.variables[name][:] = infile.variables[name][:] # Copy selected timesteps time = infile.variables['time'] stage = infile.variables['stage'] newtime = outfile.variables['time'] newstage = outfile.variables['stage'] if last is None: last = len(time) selection = range(first, last, step) for i, j in enumerate(selection): print 'Copying timestep %d of %d (%f)' % (j, last-first, time[j]) newtime[i] = time[j] newstage[i,:] = stage[j,:] # Close infile.close() outfile.close() ## # @brief Return instance of class of given format using filename. # @param domain Data domain (eg, 'sww', etc). # @param mode The mode to open domain in. # @return A class instance of required domain and mode. #Get data objects def get_dataobject(domain, mode=netcdf_mode_w): """Return instance of class of given format using filename """ cls = eval('Data_format_%s' % domain.format) return cls(domain, mode) ## # @brief Convert DEM data to PTS data. # @param basename_in Stem of input filename. # @param basename_out Stem of output filename. # @param easting_min # @param easting_max # @param northing_min # @param northing_max # @param use_cache # @param verbose # @return def dem2pts(basename_in, basename_out=None, easting_min=None, easting_max=None, northing_min=None, northing_max=None, use_cache=False, verbose=False,): """Read Digitial Elevation model from the following NetCDF format (.dem) Example: ncols 3121 nrows 1800 xllcorner 722000 yllcorner 5893000 cellsize 25 NODATA_value -9999 138.3698 137.4194 136.5062 135.5558 .......... Convert to NetCDF pts format which is points: (Nx2) Float array elevation: N Float array """ kwargs = {'basename_out': basename_out, 'easting_min': easting_min, 'easting_max': easting_max, 'northing_min': northing_min, 'northing_max': northing_max, 'verbose': verbose} if use_cache is True: from caching import cache result = cache(_dem2pts, basename_in, kwargs, dependencies = [basename_in + '.dem'], verbose = verbose) else: result = apply(_dem2pts, [basename_in], kwargs) return result ## # @brief # @param basename_in # @param basename_out # @param verbose # @param easting_min # @param easting_max # @param northing_min # @param northing_max def _dem2pts(basename_in, basename_out=None, verbose=False, easting_min=None, easting_max=None, northing_min=None, northing_max=None): """Read Digitial Elevation model from the following NetCDF format (.dem) Internal function. See public function dem2pts for details. """ # FIXME: Can this be written feasibly using write_pts? import os from Scientific.IO.NetCDF import NetCDFFile root = basename_in # Get NetCDF infile = NetCDFFile(root + '.dem', netcdf_mode_r) if verbose: print 'Reading DEM from %s' %(root + '.dem') ncols = infile.ncols[0] nrows = infile.nrows[0] xllcorner = infile.xllcorner[0] # Easting of lower left corner yllcorner = infile.yllcorner[0] # Northing of lower left corner cellsize = infile.cellsize[0] NODATA_value = infile.NODATA_value[0] dem_elevation = infile.variables['elevation'] zone = infile.zone[0] false_easting = infile.false_easting[0] false_northing = infile.false_northing[0] # Text strings projection = infile.projection datum = infile.datum units = infile.units # Get output file if basename_out == None: ptsname = root + '.pts' else: ptsname = basename_out + '.pts' if verbose: print 'Store to NetCDF file %s' %ptsname # NetCDF file definition outfile = NetCDFFile(ptsname, netcdf_mode_w) # Create new file outfile.institution = 'Geoscience Australia' outfile.description = 'NetCDF pts format for compact and portable ' \ 'storage of spatial point data' # Assign default values if easting_min is None: easting_min = xllcorner if easting_max is None: easting_max = xllcorner + ncols*cellsize if northing_min is None: northing_min = yllcorner if northing_max is None: northing_max = yllcorner + nrows*cellsize # Compute offsets to update georeferencing easting_offset = xllcorner - easting_min northing_offset = yllcorner - northing_min # Georeferencing outfile.zone = zone outfile.xllcorner = easting_min # Easting of lower left corner outfile.yllcorner = northing_min # Northing of lower left corner outfile.false_easting = false_easting outfile.false_northing = false_northing outfile.projection = projection outfile.datum = datum outfile.units = units # Grid info (FIXME: probably not going to be used, but heck) outfile.ncols = ncols outfile.nrows = nrows dem_elevation_r = num.reshape(dem_elevation, (nrows, ncols)) totalnopoints = nrows*ncols # Calculating number of NODATA_values for each row in clipped region # FIXME: use array operations to do faster nn = 0 k = 0 i1_0 = 0 j1_0 = 0 thisj = 0 thisi = 0 for i in range(nrows): y = (nrows-i-1)*cellsize + yllcorner for j in range(ncols): x = j*cellsize + xllcorner if easting_min <= x <= easting_max \ and northing_min <= y <= northing_max: thisj = j thisi = i if dem_elevation_r[i,j] == NODATA_value: nn += 1 if k == 0: i1_0 = i j1_0 = j k += 1 index1 = j1_0 index2 = thisj # Dimension definitions nrows_in_bounding_box = int(round((northing_max-northing_min)/cellsize)) ncols_in_bounding_box = int(round((easting_max-easting_min)/cellsize)) clippednopoints = (thisi+1-i1_0)*(thisj+1-j1_0) nopoints = clippednopoints-nn clipped_dem_elev = dem_elevation_r[i1_0:thisi+1,j1_0:thisj+1] if verbose: print 'There are %d values in the elevation' %totalnopoints print 'There are %d values in the clipped elevation' %clippednopoints print 'There are %d NODATA_values in the clipped elevation' %nn outfile.createDimension('number_of_points', nopoints) outfile.createDimension('number_of_dimensions', 2) #This is 2d data # Variable definitions outfile.createVariable('points', num.Float, ('number_of_points', 'number_of_dimensions')) outfile.createVariable('elevation', num.Float, ('number_of_points',)) # Get handles to the variables points = outfile.variables['points'] elevation = outfile.variables['elevation'] lenv = index2-index1+1 # Store data global_index = 0 # for i in range(nrows): for i in range(i1_0, thisi+1, 1): if verbose and i % ((nrows+10)/10) == 0: print 'Processing row %d of %d' % (i, nrows) lower_index = global_index v = dem_elevation_r[i,index1:index2+1] no_NODATA = num.sum(v == NODATA_value) if no_NODATA > 0: newcols = lenv - no_NODATA # ncols_in_bounding_box - no_NODATA else: newcols = lenv # ncols_in_bounding_box telev = num.zeros(newcols, num.Float) tpoints = num.zeros((newcols, 2), num.Float) local_index = 0 y = (nrows-i-1)*cellsize + yllcorner #for j in range(ncols): for j in range(j1_0,index2+1,1): x = j*cellsize + xllcorner if easting_min <= x <= easting_max \ and northing_min <= y <= northing_max \ and dem_elevation_r[i,j] <> NODATA_value: tpoints[local_index, :] = [x-easting_min, y-northing_min] telev[local_index] = dem_elevation_r[i, j] global_index += 1 local_index += 1 upper_index = global_index if upper_index == lower_index + newcols: points[lower_index:upper_index, :] = tpoints elevation[lower_index:upper_index] = telev assert global_index == nopoints, 'index not equal to number of points' infile.close() outfile.close() ## # @brief Return block of surface lines for each cross section # @param lines Iterble of text lines to process. # @note BROKEN? UNUSED? def _read_hecras_cross_sections(lines): """Return block of surface lines for each cross section Starts with SURFACE LINE, Ends with END CROSS-SECTION """ points = [] reading_surface = False for i, line in enumerate(lines): if len(line.strip()) == 0: #Ignore blanks continue if lines[i].strip().startswith('SURFACE LINE'): reading_surface = True continue if lines[i].strip().startswith('END') and reading_surface: yield points reading_surface = False points = [] if reading_surface: fields = line.strip().split(',') easting = float(fields[0]) northing = float(fields[1]) elevation = float(fields[2]) points.append([easting, northing, elevation]) ## # @brief Convert HECRAS (.sdf) file to PTS file. # @param basename_in Sterm of input filename. # @param basename_out Sterm of output filename. # @param verbose True if this function is to be verbose. def hecras_cross_sections2pts(basename_in, basename_out=None, verbose=False): """Read HEC-RAS Elevation datal from the following ASCII format (.sdf) Example: # RAS export file created on Mon 15Aug2005 11:42 # by HEC-RAS Version 3.1.1 BEGIN HEADER: UNITS: METRIC DTM TYPE: TIN DTM: v:\1\cit\perth_topo\river_tin STREAM LAYER: c:\local\hecras\21_02_03\up_canning_cent3d.shp CROSS-SECTION LAYER: c:\local\hecras\21_02_03\up_can_xs3d.shp MAP PROJECTION: UTM PROJECTION ZONE: 50 DATUM: AGD66 VERTICAL DATUM: NUMBER OF REACHES: 19 NUMBER OF CROSS-SECTIONS: 14206 END HEADER: Only the SURFACE LINE data of the following form will be utilised CROSS-SECTION: STREAM ID:Southern-Wungong REACH ID:Southern-Wungong STATION:19040.* CUT LINE: 405548.671603161 , 6438142.7594925 405734.536092045 , 6438326.10404912 405745.130459356 , 6438331.48627354 405813.89633823 , 6438368.6272789 SURFACE LINE: 405548.67, 6438142.76, 35.37 405552.24, 6438146.28, 35.41 405554.78, 6438148.78, 35.44 405555.80, 6438149.79, 35.44 405559.37, 6438153.31, 35.45 405560.88, 6438154.81, 35.44 405562.93, 6438156.83, 35.42 405566.50, 6438160.35, 35.38 405566.99, 6438160.83, 35.37 ... END CROSS-SECTION Convert to NetCDF pts format which is points: (Nx2) Float array elevation: N Float array """ import os from Scientific.IO.NetCDF import NetCDFFile root = basename_in # Get ASCII file infile = open(root + '.sdf', 'r') if verbose: print 'Reading DEM from %s' %(root + '.sdf') lines = infile.readlines() infile.close() if verbose: print 'Converting to pts format' # Scan through the header, picking up stuff we need. i = 0 while lines[i].strip() == '' or lines[i].strip().startswith('#'): i += 1 assert lines[i].strip().upper() == 'BEGIN HEADER:' i += 1 assert lines[i].strip().upper().startswith('UNITS:') units = lines[i].strip().split()[1] i += 1 assert lines[i].strip().upper().startswith('DTM TYPE:') i += 1 assert lines[i].strip().upper().startswith('DTM:') i += 1 assert lines[i].strip().upper().startswith('STREAM') i += 1 assert lines[i].strip().upper().startswith('CROSS') i += 1 assert lines[i].strip().upper().startswith('MAP PROJECTION:') projection = lines[i].strip().split(':')[1] i += 1 assert lines[i].strip().upper().startswith('PROJECTION ZONE:') zone = int(lines[i].strip().split(':')[1]) i += 1 assert lines[i].strip().upper().startswith('DATUM:') datum = lines[i].strip().split(':')[1] i += 1 assert ... [truncated message content] |