From: <zep...@us...> - 2006-12-15 02:07:56
|
Revision: 215 http://svn.sourceforge.net/pzfilereader/?rev=215&view=rev Author: zepernick Date: 2006-12-14 18:07:56 -0800 (Thu, 14 Dec 2006) Log Message: ----------- first attempt at reading files with BufferedReader left open while the file is being looped through. This is incomplete Benoit, but maybe you can take a peak and see what you think about the direction this is heading. Added Paths: ----------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/brparse/ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/brparse/BuffReaderDelimPZDataSet.java trunk/PZFileReader/src/main/java/net/sf/pzfilereader/brparse/BuffReaderDelimPZParser.java trunk/PZFileReader/src/main/java/net/sf/pzfilereader/brparse/BuffReaderPZParseFactory.java Added: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/brparse/BuffReaderDelimPZDataSet.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/brparse/BuffReaderDelimPZDataSet.java (rev 0) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/brparse/BuffReaderDelimPZDataSet.java 2006-12-15 02:07:56 UTC (rev 215) @@ -0,0 +1,51 @@ +package net.sf.pzfilereader.brparse; + +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.TreeMap; + +import net.sf.pzfilereader.DefaultDataSet; +import net.sf.pzfilereader.structure.Row; + +public class BuffReaderDelimPZDataSet extends DefaultDataSet{ + private BuffReaderDelimPZParser brpzparser; + + public BuffReaderDelimPZDataSet(final Map columnMD2, BuffReaderDelimPZParser brpzparser) { + super(columnMD2); + //register the parser with the dataset so we can fetch rows from + //the bufferedreader as needed + this.brpzparser = brpzparser; + } + + public boolean next() { + try { + final Row r = brpzparser.buildRow(this); + + if (r == null) { + setPointer(-1); + return false; + } + + //make sure we have some MD + if (getColumnMD() == null) { + //create a new map so the user cannot change the internal + //DataSet representation of the MD through the parser + setColumnMD(new LinkedHashMap(brpzparser.getColumnMD())); + } + + getRows().clear(); + addRow(r); + + setPointer(0); + + return true; + + } catch(IOException ex) { + //TODO real logging here + ex.printStackTrace(); + } + + return false; + } +} Added: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/brparse/BuffReaderDelimPZParser.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/brparse/BuffReaderDelimPZParser.java (rev 0) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/brparse/BuffReaderDelimPZParser.java 2006-12-15 02:07:56 UTC (rev 215) @@ -0,0 +1,158 @@ +package net.sf.pzfilereader.brparse; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import net.sf.pzfilereader.DataSet; +import net.sf.pzfilereader.DefaultDataSet; +import net.sf.pzfilereader.DelimiterPZParser; +import net.sf.pzfilereader.structure.Row; +import net.sf.pzfilereader.util.PZConstants; +import net.sf.pzfilereader.util.ParserUtils; + +public class BuffReaderDelimPZParser extends DelimiterPZParser { + private BufferedReader br; + private InputStreamReader isr; + private boolean processedFirst = false; + + public BuffReaderDelimPZParser(final File pzmapXML, final File dataSource, final char delimiter, final char qualifier, + final boolean ignoreFirstRecord) { + super(dataSource, delimiter, qualifier, ignoreFirstRecord); + } + + public BuffReaderDelimPZParser(final InputStream pzmapXMLStream, final InputStream dataSourceStream, final char delimiter, + final char qualifier, final boolean ignoreFirstRecord) { + super(dataSourceStream, delimiter, qualifier, ignoreFirstRecord); + } + + public BuffReaderDelimPZParser(final File dataSource, final char delimiter, final char qualifier, final boolean ignoreFirstRecord) { + super(dataSource, delimiter, qualifier, ignoreFirstRecord); + } + + public BuffReaderDelimPZParser(final InputStream dataSourceStream, final char delimiter, final char qualifier, + final boolean ignoreFirstRecord) { + super(dataSourceStream, delimiter, qualifier, ignoreFirstRecord); + } + + + public DataSet doParse() { + final DataSet ds = new BuffReaderDelimPZDataSet(getColumnMD(), this); + try { + //gather the conversion properties + ds.setPZConvertProps(ParserUtils.loadConvertProperties()); + + if (getDataSourceStream() == null) { + setDataSourceStream(ParserUtils.createInputStream(getDataSource())); + } + + isr = new InputStreamReader(getDataSourceStream()); + br = new BufferedReader(isr); + + return ds; + + } catch(IOException ex) { + ex.printStackTrace(); + } + + return null; + } + + /** + * Reads in the next record on the file and return a row + * + * @param ds + * @return Row + * @throws IOException + */ + public Row buildRow(final DefaultDataSet ds) throws IOException{ + /** loop through each line in the file */ + while (true) { + String line = fetchNextRecord(br, getQualifier(), getDelimiter()); + + if (line == null) { + return null; + } + + // check to see if the user has elected to skip the first record + if (!processedFirst && isIgnoreFirstRecord()) { + processedFirst = true; + continue; + } else if (!processedFirst && shouldCreateMDFromFile()) { + processedFirst = true; + setColumnMD(ParserUtils.getColumnMDFromFile(line, getDelimiter(), getQualifier())); + continue; + } + + //TODO + //seems like we may want to try doing something like this. I have my reservations because + //it is possible that we don't get a "detail" id and this might generate NPE + //is it going to create too much overhead to do a null check here as well??? + //final int intialSize = ParserUtils.getColumnMetaData(PZConstants.DETAIL_ID, getColumnMD()).size(); + // column values + final List columns = ParserUtils.splitLine(line, getDelimiter(), getQualifier(), PZConstants.SPLITLINE_SIZE_INIT); + final String mdkey = ParserUtils.getCMDKeyForDelimitedFile(getColumnMD(), columns); + final List cmds = ParserUtils.getColumnMetaData(mdkey, getColumnMD()); + final int columnCount = cmds.size(); + // DEBUG + + // Incorrect record length on line log the error. Line + // will not be included in the dataset + if (columns.size() > columnCount) { + // log the error + addError(ds, "TOO MANY COLUMNS WANTED: " + columnCount + " GOT: " + columns.size(), getLineCount(), 2); + continue; + } else if (columns.size() < columnCount) { + if (isHandlingShortLines()) { + // We can pad this line out + while (columns.size() < columnCount) { + columns.add(""); + } + + // log a warning + addError(ds, "PADDED LINE TO CORRECT NUMBER OF COLUMNS", getLineCount(), 1); + + } else { + addError(ds, "TOO FEW COLUMNS WANTED: " + columnCount + " GOT: " + columns.size(), getLineCount(), 2); + continue; + } + } + + final Row row = new Row(); + row.setMdkey(mdkey.equals(PZConstants.DETAIL_ID) ? null : mdkey); // try + // to limit the memory use + row.setCols(columns); + row.setRowNumber(getLineCount()); + + return row; + } + } + + /** + * Closes out the file readers + * + *@throws IOException + */ + public void close() throws IOException{ + if (br != null) { + br.close(); + } + if (isr != null) { + isr.close(); + } + } + + /** + * Returns the meta data describing the columns + */ + public Map getColumnMD() { + return super.getColumnMD(); + } +} Added: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/brparse/BuffReaderPZParseFactory.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/brparse/BuffReaderPZParseFactory.java (rev 0) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/brparse/BuffReaderPZParseFactory.java 2006-12-15 02:07:56 UTC (rev 215) @@ -0,0 +1,121 @@ +package net.sf.pzfilereader.brparse; + +import java.io.File; +import java.io.InputStream; +import java.sql.Connection; + +import net.sf.pzfilereader.PZParser; +import net.sf.pzfilereader.PZParserFactory; + +/** + * Provides a PZParser which obtains records directly from + * a BufferedReader as an alternative to reading the + * entire file into memory. + * + * @author Paul Zepernick + */ +public class BuffReaderPZParseFactory implements PZParserFactory{ + private static final BuffReaderPZParseFactory INSTANCE = new BuffReaderPZParseFactory(); + + public static PZParserFactory getInstance() { + return INSTANCE; + } + + /* + * (non-Javadoc) + * + * @see net.sf.pzfilereader.PZParserFactory#newFixedWidthParser(java.sql.Connection, + * java.io.File, java.lang.String) + */ + public PZParser newFixedLengthParser(final Connection con, final File dataSource, final String dataDefinition) { + //return new DBFixedLengthPZParser(con, dataSource, dataDefinition); + return null; + } + + /* + * (non-Javadoc) + * + * @see net.sf.pzfilereader.PZParserFactory#newFixedWidthParser(java.sql.Connection, + * java.io.InputStream, java.lang.String) + */ + public PZParser newFixedLengthParser(final Connection con, final InputStream dataSourceStream, final String dataDefinition) { + //return new DBFixedLengthPZParser(con, dataSourceStream, dataDefinition); + return null; + } + + /* + * (non-Javadoc) + * + * @see net.sf.pzfilereader.PZParserFactory#newParser(java.io.File, + * java.io.File) + */ + public PZParser newFixedLengthParser(final File pzmapXML, final File dataSource) { + // return new FixedLengthPZParser(pzmapXML, dataSource); + return null; + } + + /* + * (non-Javadoc) + * + * @see net.sf.pzfilereader.PZParserFactory#newParser(java.io.InputStream, + * java.io.InputStream) + */ + public PZParser newFixedLengthParser(final InputStream pzmapXMLStream, final InputStream dataSourceStream) { + // return new FixedLengthPZParser(pzmapXMLStream, dataSourceStream); + return null; + } + + /* + * (non-Javadoc) + * + * @see net.sf.pzfilereader.PZParserFactory#newParser(java.sql.Connection, + * java.io.InputStream, java.lang.String, char, char, boolean) + */ + public PZParser newDelimitedParser(final Connection con, final InputStream dataSourceStream, final String dataDefinition, + final char delimiter, final char qualifier, final boolean ignoreFirstRecord) { + //return new BuffReaderDelimPZParser(con, dataSourceStream, dataDefinition, delimiter, qualifier, ignoreFirstRecord); + return null; + } + + /* + * (non-Javadoc) + * + * @see net.sf.pzfilereader.PZParserFactory#newParser(java.io.File, + * java.io.File, char, char, boolean) + */ + public PZParser newDelimitedParser(final File pzmapXML, final File dataSource, final char delimiter, final char qualifier, + final boolean ignoreFirstRecord) { + return new BuffReaderDelimPZParser(pzmapXML, dataSource, delimiter, qualifier, ignoreFirstRecord); + } + + /* + * (non-Javadoc) + * + * @see net.sf.pzfilereader.PZParserFactory#newParser(java.io.InputStream, + * java.io.InputStream, char, char, boolean) + */ + public PZParser newDelimitedParser(final InputStream pzmapXMLStream, final InputStream dataSourceStream, + final char delimiter, final char qualifier, final boolean ignoreFirstRecord) { + return new BuffReaderDelimPZParser(pzmapXMLStream, dataSourceStream, delimiter, qualifier, ignoreFirstRecord); + } + + /* + * (non-Javadoc) + * + * @see net.sf.pzfilereader.PZParserFactory#newParser(java.io.File, char, + * char) + */ + public PZParser newDelimitedParser(final File dataSource, final char delimiter, final char qualifier) { + return new BuffReaderDelimPZParser(dataSource, delimiter, qualifier, false); + } + + /* + * (non-Javadoc) + * + * @see net.sf.pzfilereader.PZParserFactory#newParser(java.io.InputStream, + * char, char) + */ + public PZParser newDelimitedParser(final InputStream dataSourceStream, final char delimiter, final char qualifier) { + return new BuffReaderDelimPZParser(dataSourceStream, delimiter, qualifier, false); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |