From: <zep...@us...> - 2006-12-15 01:59:02
|
Revision: 213 http://svn.sourceforge.net/pzfilereader/?rev=213&view=rev Author: zepernick Date: 2006-12-14 17:59:01 -0800 (Thu, 14 Dec 2006) Log Message: ----------- added a method to fetch the next record from a given BufferedReader. Utilized this in the new BufferedReader parsers, and coule be usefull for future parser implementations. Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/AbstractDelimiterPZParser.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/AbstractDelimiterPZParser.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/AbstractDelimiterPZParser.java 2006-12-15 00:26:53 UTC (rev 212) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/AbstractDelimiterPZParser.java 2006-12-15 01:59:01 UTC (rev 213) @@ -56,6 +56,8 @@ private char qualifier = 0; private boolean ignoreFirstRecord = false; + + private int lineCount = 0; public AbstractDelimiterPZParser(final InputStream dataSourceStream, final String dataDefinition, final char delimiter, final char qualifier, final boolean ignoreFirstRecord) { @@ -83,6 +85,7 @@ public DataSet doParse() { try { + lineCount = 0; if (getDataSourceStream() != null) { return doDelimitedFile(getDataSourceStream(), getDelimiter(), getQualifier(), isIgnoreFirstRecord(), shouldCreateMDFromFile()); @@ -137,6 +140,10 @@ this.qualifier = qualifier; } + protected int getLineCount() { + return lineCount; + } + /* * This is the new version of doDelimitedFile using InputStrem instead of * File. This is more flexible especially it is working with WebStart. @@ -145,7 +152,7 @@ * mappings, and SQL table mappings */ private DataSet doDelimitedFile(final InputStream dataSource, final char delimiter, final char qualifier, - final boolean ignoreFirstRecord, final boolean createMDFromFile) throws IOException, Exception { + final boolean ignoreFirstRecord, final boolean createMDFromFile) throws IOException { if (dataSource == null) { throw new NullPointerException("dataSource is null"); } @@ -160,25 +167,13 @@ // get the total column count // columnCount = columnMD.size(); - /** Read in the flat file */ - // fr = new FileReader(dataSource.getAbsolutePath()); isr = new InputStreamReader(dataSource); br = new BufferedReader(isr); boolean processedFirst = false; - boolean processingMultiLine = false; - int lineCount = 0; - String lineData = ""; /** loop through each line in the file */ String line = null; - while ((line = br.readLine()) != null) { - lineCount++; - /** empty line skip past it */ - final String trimmed = line.trim(); - if (!processingMultiLine && trimmed.length() == 0) { - continue; - } - + while ((line = fetchNextRecord(br, qualifier, delimiter)) != null) { // check to see if the user has elected to skip the first record if (!processedFirst && ignoreFirstRecord) { processedFirst = true; @@ -190,96 +185,13 @@ continue; } - // ******************************************************** - // new functionality as of 2.1.0 check to see if we have - // any line breaks in the middle of the record, this will only - // be checked if we have specified a delimiter - // ******************************************************** - final char[] chrArry = trimmed.toCharArray(); - if (!processingMultiLine && delimiter > 0) { - processingMultiLine = ParserUtils.isMultiLine(chrArry, delimiter, qualifier); - } - - // check to see if we have reached the end of the linebreak in - // the record - - final String trimmedLineData = lineData.trim(); - if (processingMultiLine && trimmedLineData.length() > 0) { - // need to do one last check here. it is possible that the " - // could be part of the data - // excel will escape these with another quote; here is some - // data "" This would indicate - // there is more to the multiline - if (trimmed.charAt(trimmed.length() - 1) == qualifier && !trimmed.endsWith("" + qualifier + qualifier)) { - // it is safe to assume we have reached the end of the - // line break - processingMultiLine = false; - if (trimmedLineData.length() > 0) { // + would always be - // true surely.... - lineData += "\r\n"; - } - lineData += line; - } else { - // check to see if this is the last line of the record - // looking for a qualifier followed by a delimiter - if (trimmedLineData.length() > 0) { // + here again, - // this should - // always be true... - lineData += "\r\n"; - } - lineData += line; - boolean qualiFound = false; - for (int i = 0; i < chrArry.length; i++) { - if (qualiFound) { - if (chrArry[i] == ' ') { - continue; - } else { - // not a space, if this char is the - // delimiter, then we have reached the end - // of - // the record - if (chrArry[i] == delimiter) { - // processingMultiLine = false; - // fix put in, setting to false caused - // bug when processing multiple - // multi-line - // columns on the same record - processingMultiLine = ParserUtils.isMultiLine(chrArry, delimiter, qualifier); - break; - } - qualiFound = false; - continue; - } - } else if (chrArry[i] == qualifier) { - qualiFound = true; - } - } - // check to see if we are still in multi line mode, if - // so grab the next line - if (processingMultiLine) { - continue; - } - } - } else { - // throw the line into lineData var. - lineData += line; - if (processingMultiLine) { - continue; // if we are working on a multiline rec, get - // the data on the next line - } - } - // ******************************************************************** - // end record line break logic - // ******************************************************************** - - //TODO + //TODO //seems like we may want to try doing something like this. I have my reservations because //it is possible that we don't get a "detail" id and this might generate NPE //is it going to create too much overhead to do a null check here as well??? //final int intialSize = ParserUtils.getColumnMetaData(PZConstants.DETAIL_ID, getColumnMD()).size(); // column values - final List columns = ParserUtils.splitLine(lineData, delimiter, qualifier, PZConstants.SPLITLINE_SIZE_INIT); - lineData = ""; + final List columns = ParserUtils.splitLine(line, delimiter, qualifier, PZConstants.SPLITLINE_SIZE_INIT); final String mdkey = ParserUtils.getCMDKeyForDelimitedFile(getColumnMD(), columns); final List cmds = ParserUtils.getColumnMetaData(mdkey, getColumnMD()); final int columnCount = cmds.size(); @@ -325,4 +237,119 @@ } return ds; } + + /** + * Reads a record from a delimited file. This will account for records which + * could span multiple lines. + * NULL will be returned when the end of the file is reached + * + * @param br + * Open reader being used to read through the file + * @return String + * Record from delimited file + * + */ + protected String fetchNextRecord(final BufferedReader br, final char qualifier, + final char delimiter) throws IOException{ + String line = null; + String lineData = ""; + boolean processingMultiLine = false; + + while ((line = br.readLine()) != null) { + lineCount++; + /** empty line skip past it */ + final String trimmed = line.trim(); + if (!processingMultiLine && trimmed.length() == 0) { + continue; + } + + // ******************************************************** + // new functionality as of 2.1.0 check to see if we have + // any line breaks in the middle of the record, this will only + // be checked if we have specified a delimiter + // ******************************************************** + final char[] chrArry = trimmed.toCharArray(); + if (!processingMultiLine && delimiter > 0) { + processingMultiLine = ParserUtils.isMultiLine(chrArry, delimiter, qualifier); + } + + // check to see if we have reached the end of the linebreak in + // the record + + final String trimmedLineData = lineData.trim(); + if (processingMultiLine && trimmedLineData.length() > 0) { + // need to do one last check here. it is possible that the " + // could be part of the data + // excel will escape these with another quote; here is some + // data "" This would indicate + // there is more to the multiline + if (trimmed.charAt(trimmed.length() - 1) == qualifier && !trimmed.endsWith("" + qualifier + qualifier)) { + // it is safe to assume we have reached the end of the + // line break + processingMultiLine = false; + if (trimmedLineData.length() > 0) { // + would always be + // true surely.... + lineData += "\r\n"; + } + lineData += line; + } else { + // check to see if this is the last line of the record + // looking for a qualifier followed by a delimiter + if (trimmedLineData.length() > 0) { // + here again, + // this should + // always be true... + lineData += "\r\n"; + } + lineData += line; + boolean qualiFound = false; + for (int i = 0; i < chrArry.length; i++) { + if (qualiFound) { + if (chrArry[i] == ' ') { + continue; + } else { + // not a space, if this char is the + // delimiter, then we have reached the end + // of + // the record + if (chrArry[i] == delimiter) { + // processingMultiLine = false; + // fix put in, setting to false caused + // bug when processing multiple + // multi-line + // columns on the same record + processingMultiLine = ParserUtils.isMultiLine(chrArry, delimiter, qualifier); + break; + } + qualiFound = false; + continue; + } + } else if (chrArry[i] == qualifier) { + qualiFound = true; + } + } + // check to see if we are still in multi line mode, if + // so grab the next line + if (processingMultiLine) { + continue; + } + } + } else { + // throw the line into lineData var. + lineData += line; + if (processingMultiLine) { + continue; // if we are working on a multiline rec, get + // the data on the next line + } + } + + break; + } + + if (line == null && lineData.length() == 0) { + //eof + return null; + } + + return lineData; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-12-16 14:41:32
|
Revision: 231 http://svn.sourceforge.net/pzfilereader/?rev=231&view=rev Author: zepernick Date: 2006-12-16 06:41:28 -0800 (Sat, 16 Dec 2006) Log Message: ----------- checkstyle fixes Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/AbstractDelimiterPZParser.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/AbstractDelimiterPZParser.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/AbstractDelimiterPZParser.java 2006-12-16 14:10:23 UTC (rev 230) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/AbstractDelimiterPZParser.java 2006-12-16 14:41:28 UTC (rev 231) @@ -38,9 +38,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.net.URL; import java.util.List; -import java.util.Properties; import net.sf.pzfilereader.structure.Row; import net.sf.pzfilereader.util.PZConstants; @@ -87,14 +85,12 @@ try { lineCount = 0; if (getDataSourceStream() != null) { - return doDelimitedFile(getDataSourceStream(), getDelimiter(), getQualifier(), isIgnoreFirstRecord(), - shouldCreateMDFromFile()); + return doDelimitedFile(getDataSourceStream(), shouldCreateMDFromFile()); } else { InputStream stream = null; try { stream = ParserUtils.createInputStream(getDataSource()); - return doDelimitedFile(stream, getDelimiter(), getQualifier(), isIgnoreFirstRecord(), - shouldCreateMDFromFile()); + return doDelimitedFile(stream, shouldCreateMDFromFile()); } catch (final Exception e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -151,8 +147,7 @@ * puts together the dataset for a DELIMITED file. This is used for PZ XML * mappings, and SQL table mappings */ - private DataSet doDelimitedFile(final InputStream dataSource, final char delimiter, final char qualifier, - final boolean ignoreFirstRecord, final boolean createMDFromFile) throws IOException { + private DataSet doDelimitedFile(final InputStream dataSource, final boolean createMDFromFile) throws IOException { if (dataSource == null) { throw new NullPointerException("dataSource is null"); } @@ -173,9 +168,9 @@ boolean processedFirst = false; /** loop through each line in the file */ String line = null; - while ((line = fetchNextRecord(br, qualifier, delimiter)) != null) { + while ((line = fetchNextRecord(br, getQualifier(), getDelimiter())) != null) { // check to see if the user has elected to skip the first record - if (!processedFirst && ignoreFirstRecord) { + if (!processedFirst && isIgnoreFirstRecord()) { processedFirst = true; continue; } else if (!processedFirst && createMDFromFile) { @@ -191,7 +186,7 @@ //is it going to create too much overhead to do a null check here as well??? //final int intialSize = ParserUtils.getColumnMetaData(PZConstants.DETAIL_ID, getColumnMD()).size(); // column values - final List columns = ParserUtils.splitLine(line, delimiter, qualifier, PZConstants.SPLITLINE_SIZE_INIT); + final List columns = ParserUtils.splitLine(line, getDelimiter(), getQualifier(), PZConstants.SPLITLINE_SIZE_INIT); final String mdkey = ParserUtils.getCMDKeyForDelimitedFile(getColumnMD(), columns); final List cmds = ParserUtils.getColumnMetaData(mdkey, getColumnMD()); final int columnCount = cmds.size(); @@ -245,21 +240,26 @@ * * @param br * Open reader being used to read through the file + * @param qual + * Qualifier being used for parse + * @parma delim + * Delimiter being used for parse * @return String * Record from delimited file * */ - protected String fetchNextRecord(final BufferedReader br, final char qualifier, - final char delimiter) throws IOException{ + protected String fetchNextRecord(final BufferedReader br, final char qual, + final char delim) throws IOException{ String line = null; String lineData = ""; boolean processingMultiLine = false; while ((line = br.readLine()) != null) { lineCount++; - /** empty line skip past it */ final String trimmed = line.trim(); if (!processingMultiLine && trimmed.length() == 0) { + //empty line skip past it, as long as it + //is not part of the multiline continue; } @@ -269,8 +269,8 @@ // be checked if we have specified a delimiter // ******************************************************** final char[] chrArry = trimmed.toCharArray(); - if (!processingMultiLine && delimiter > 0) { - processingMultiLine = ParserUtils.isMultiLine(chrArry, delimiter, qualifier); + if (!processingMultiLine && delim > 0) { + processingMultiLine = ParserUtils.isMultiLine(chrArry, delim, qual); } // check to see if we have reached the end of the linebreak in @@ -283,24 +283,15 @@ // excel will escape these with another quote; here is some // data "" This would indicate // there is more to the multiline - if (trimmed.charAt(trimmed.length() - 1) == qualifier && !trimmed.endsWith("" + qualifier + qualifier)) { + if (trimmed.charAt(trimmed.length() - 1) == qual && !trimmed.endsWith("" + qual + qual)) { // it is safe to assume we have reached the end of the // line break processingMultiLine = false; - if (trimmedLineData.length() > 0) { // + would always be - // true surely.... - lineData += "\r\n"; - } - lineData += line; + lineData += "\r\n" + line; } else { // check to see if this is the last line of the record // looking for a qualifier followed by a delimiter - if (trimmedLineData.length() > 0) { // + here again, - // this should - // always be true... - lineData += "\r\n"; - } - lineData += line; + lineData += "\r\n" + line; boolean qualiFound = false; for (int i = 0; i < chrArry.length; i++) { if (qualiFound) { @@ -311,19 +302,19 @@ // delimiter, then we have reached the end // of // the record - if (chrArry[i] == delimiter) { + if (chrArry[i] == delim) { // processingMultiLine = false; // fix put in, setting to false caused // bug when processing multiple // multi-line // columns on the same record - processingMultiLine = ParserUtils.isMultiLine(chrArry, delimiter, qualifier); + processingMultiLine = ParserUtils.isMultiLine(chrArry, delim, qual); break; } qualiFound = false; continue; } - } else if (chrArry[i] == qualifier) { + } else if (chrArry[i] == qual) { qualiFound = true; } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2007-01-18 16:35:26
|
Revision: 266 http://svn.sourceforge.net/pzfilereader/?rev=266&view=rev Author: zepernick Date: 2007-01-18 08:34:24 -0800 (Thu, 18 Jan 2007) Log Message: ----------- correct findbug string concat, changed to StringBuffer Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/AbstractDelimiterPZParser.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/AbstractDelimiterPZParser.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/AbstractDelimiterPZParser.java 2007-01-18 14:31:48 UTC (rev 265) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/AbstractDelimiterPZParser.java 2007-01-18 16:34:24 UTC (rev 266) @@ -248,7 +248,7 @@ protected String fetchNextRecord(final BufferedReader br, final char qual, final char delim) throws IOException{ String line = null; - String lineData = ""; + StringBuffer lineData = new StringBuffer(); boolean processingMultiLine = false; while ((line = br.readLine()) != null) { @@ -273,7 +273,7 @@ // check to see if we have reached the end of the linebreak in // the record - final String trimmedLineData = lineData.trim(); + final String trimmedLineData = lineData.toString().trim(); if (processingMultiLine && trimmedLineData.length() > 0) { // need to do one last check here. it is possible that the " // could be part of the data @@ -284,11 +284,11 @@ // it is safe to assume we have reached the end of the // line break processingMultiLine = false; - lineData += "\r\n" + line; + lineData.append("\r\n").append(line); } else { // check to see if this is the last line of the record // looking for a qualifier followed by a delimiter - lineData += "\r\n" + line; + lineData.append("\r\n").append(line); boolean qualiFound = false; for (int i = 0; i < chrArry.length; i++) { if (qualiFound) { @@ -323,7 +323,7 @@ } } else { // throw the line into lineData var. - lineData += line; + lineData.append(line); if (processingMultiLine) { continue; // if we are working on a multiline rec, get // the data on the next line @@ -338,6 +338,6 @@ return null; } - return lineData; + return lineData.toString(); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |