From: <zep...@us...> - 2006-10-19 06:03:34
|
Revision: 101 http://svn.sourceforge.net/pzfilereader/?rev=101&view=rev Author: zepernick Date: 2006-10-18 23:03:30 -0700 (Wed, 18 Oct 2006) Log Message: ----------- backed out a 1.5 only method Integer.valueOf Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-18 20:51:29 UTC (rev 100) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-19 06:03:30 UTC (rev 101) @@ -805,7 +805,8 @@ int idx = 0; for (final Iterator it = columns.iterator(); it.hasNext(); idx++) { final ColumnMetaData meta = (ColumnMetaData) it.next(); - map.put(meta.getColName(), Integer.valueOf(idx)); + //map.put(meta.getColName(), Integer.valueOf(idx)); breaks 1.4 compile + map.put(meta.getColName(), new Integer(idx)); } } return map; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <be...@us...> - 2006-10-26 15:51:42
|
Revision: 127 http://svn.sourceforge.net/pzfilereader/?rev=127&view=rev Author: benoitx Date: 2006-10-26 08:51:35 -0700 (Thu, 26 Oct 2006) Log Message: ----------- try to reduce memory requirements by trimming to size the list. Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-26 15:50:17 UTC (rev 126) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-26 15:51:35 UTC (rev 127) @@ -84,7 +84,7 @@ // line which has not yet been read // check to see if there is a text qualifier final char currentChar = line.charAt(i); - final String currentString = String.valueOf(currentChar); +// final String currentString = String.valueOf(currentChar); if (qualifier > 0) { if (currentChar == qualifier && !beginQualifier && !beginNoQualifier) { // begining of a set of data @@ -152,7 +152,7 @@ } // + this needs to be revisited... - String trimmed = sb.toString().trim(); + final String trimmed = sb.toString().trim(); // remove the ending text qualifier if needed if (qualifier > 0 && trimmed.length() > 0) { if (trimmed.charAt(trimmed.length() - 1) == qualifier) { @@ -162,7 +162,7 @@ } } - String trimmed2 = line.trim(); + final String trimmed2 = line.trim(); int lengthLeft = trimmed2.length(); if (qualifier <= 0 || beginQualifier || beginNoQualifier || lengthLeft > 0 && trimmed2.charAt(lengthLeft - 1) == delimiter) { @@ -176,6 +176,8 @@ sb = null; + list.trimToSize(); + return list; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-10-27 00:20:37
|
Revision: 132 http://svn.sourceforge.net/pzfilereader/?rev=132&view=rev Author: zepernick Date: 2006-10-26 17:20:31 -0700 (Thu, 26 Oct 2006) Log Message: ----------- moved fixed width method to FixedWidthParserUtils deprecated method Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-27 00:07:34 UTC (rev 131) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-27 00:20:31 UTC (rev 132) @@ -615,6 +615,8 @@ * @param columnMD * @param line * @return List - ColumMetaData + * @deprecated Moved to FixedWidthParserUtils.getCMDKey() + * */ public static String getCMDKeyForFixedLengthFile(final Map columnMD, final String line) { if (columnMD.size() == 1) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-10-27 01:05:00
|
Revision: 137 http://svn.sourceforge.net/pzfilereader/?rev=137&view=rev Author: zepernick Date: 2006-10-26 18:04:55 -0700 (Thu, 26 Oct 2006) Log Message: ----------- fixed bug, should not trim off qualifier unless the element began with a qulifier Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-27 00:25:58 UTC (rev 136) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-27 01:04:55 UTC (rev 137) @@ -154,8 +154,10 @@ // + this needs to be revisited... final String trimmed = sb.toString().trim(); // remove the ending text qualifier if needed - if (qualifier > 0 && trimmed.length() > 0) { + //only if the last element was truly qualified + if (beginQualifier && qualifier > 0 && trimmed.length() > 0) { if (trimmed.charAt(trimmed.length() - 1) == qualifier) { + System.out.println(">>>>>>>Triming Off Qualifier"); final String s = trimmed.substring(0, trimmed.length() - 1); sb.delete(0, sb.length()); sb.append(s); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-10-27 01:05:24
|
Revision: 138 http://svn.sourceforge.net/pzfilereader/?rev=138&view=rev Author: zepernick Date: 2006-10-26 18:05:18 -0700 (Thu, 26 Oct 2006) Log Message: ----------- removed system out Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-27 01:04:55 UTC (rev 137) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-27 01:05:18 UTC (rev 138) @@ -157,7 +157,7 @@ //only if the last element was truly qualified if (beginQualifier && qualifier > 0 && trimmed.length() > 0) { if (trimmed.charAt(trimmed.length() - 1) == qualifier) { - System.out.println(">>>>>>>Triming Off Qualifier"); + // System.out.println(">>>>>>>Triming Off Qualifier"); final String s = trimmed.substring(0, trimmed.length() - 1); sb.delete(0, sb.length()); sb.append(s); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-10-30 11:51:29
|
Revision: 144 http://svn.sourceforge.net/pzfilereader/?rev=144&view=rev Author: zepernick Date: 2006-10-30 03:51:24 -0800 (Mon, 30 Oct 2006) Log Message: ----------- Trim left and right space for unqualified elements. Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-27 14:14:36 UTC (rev 143) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-30 11:51:24 UTC (rev 144) @@ -131,7 +131,9 @@ } else if (beginNoQualifier && currentChar == delimiter) { // check to see if we are done with an element that was not // being qulified - list.add(sb.toString()); + //remove the space from the front and back of unqualified + //elements + list.add(lTrim(sb.toString().trim())); sb.delete(0, sb.length()); beginNoQualifier = false; } else if (beginNoQualifier || beginQualifier) { @@ -143,7 +145,9 @@ } else { // not using a qualifier. Using a delimiter only if (currentChar == delimiter) { - list.add(sb.toString()); + //remove the space from the front and back of unqualified + //elements + list.add(lTrim(sb.toString().trim())); sb.delete(0, sb.length()); } else { sb.append(currentChar); @@ -173,7 +177,7 @@ // check to see if we need to add the last column in..this will // happen on empty columns // add the last column - list.add(sb.toString()); + list.add(beginNoQualifier ? lTrim(sb.toString().trim()) : sb.toString()); } sb = null; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-10-30 11:55:32
|
Revision: 145 http://svn.sourceforge.net/pzfilereader/?rev=145&view=rev Author: zepernick Date: 2006-10-30 03:55:25 -0800 (Mon, 30 Oct 2006) Log Message: ----------- added to javadoc description for splitLine Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-30 11:51:24 UTC (rev 144) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-30 11:55:25 UTC (rev 145) @@ -59,6 +59,9 @@ * qualifier around the text, the qualifier parameter can be left null, or * empty. There should not be any line breaks in the string. Each line of * the file should be passed in individually. + * Elements which are not qualified will have leading and trailing white + * space removed. This includes unqualified elements, which may be + * contained in an unqualified parse: "data", data ,"data" * * @param line - * String of data to be parsed This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <be...@us...> - 2006-10-30 12:06:22
|
Revision: 148 http://svn.sourceforge.net/pzfilereader/?rev=148&view=rev Author: benoitx Date: 2006-10-30 04:05:55 -0800 (Mon, 30 Oct 2006) Log Message: ----------- removed code that was commented out since the new code is now the accepted version. Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-30 11:57:30 UTC (rev 147) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-30 12:05:55 UTC (rev 148) @@ -1,16 +1,16 @@ /* Copyright 2006 Paul Zepernick - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed - under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR - CONDITIONS OF ANY KIND, either express or implied. See the License for - the specific language governing permissions and limitations under the License. + Unless required by applicable law or agreed to in writing, software distributed + under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + CONDITIONS OF ANY KIND, either express or implied. See the License for + the specific language governing permissions and limitations under the License. */ package net.sf.pzfilereader.util; @@ -60,9 +60,9 @@ * empty. There should not be any line breaks in the string. Each line of * the file should be passed in individually. * Elements which are not qualified will have leading and trailing white - * space removed. This includes unqualified elements, which may be + * space removed. This includes unqualified elements, which may be * contained in an unqualified parse: "data", data ,"data" - * + * * @param line - * String of data to be parsed * @param delimiter - @@ -73,7 +73,6 @@ */ public static List splitLine(String line, final char delimiter, final char qualifier) { final ArrayList list = new ArrayList(); - // String temp = ""; boolean beginQualifier = false; // this will be used for delimted files that have some items qualified // and some items dont @@ -87,7 +86,6 @@ // line which has not yet been read // check to see if there is a text qualifier final char currentChar = line.charAt(i); - // final String currentString = String.valueOf(currentChar); if (qualifier > 0) { if (currentChar == qualifier && !beginQualifier && !beginNoQualifier) { // begining of a set of data @@ -96,12 +94,10 @@ && lTrim(remainderOfLine).charAt(0) != qualifier) { // try to account for empty space before qualifier starts // we have not yet begun a qualifier and the char we are on - // is NOT - // a qualifier. Start reading data + // is NOT a qualifier. Start reading data beginNoQualifier = true; // make sure that this is not just an empty column with no - // qualifiers. ie - // "data",,"data" + // qualifiers. ie "data",,"data" if (currentChar == delimiter) { list.add(sb.toString()); sb.delete(0, sb.length()); @@ -119,8 +115,7 @@ sb.delete(0, sb.length()); beginQualifier = false; // add to "i" so we can get past the qualifier, otherwise it - // is read into a set - // of data which + // is read into a set of data which // may not be qualified. Find out how many spaces to the // delimiter final int offset = getDelimiterOffset(line, i, delimiter) - 1; @@ -133,9 +128,9 @@ } } else if (beginNoQualifier && currentChar == delimiter) { // check to see if we are done with an element that was not - // being qulified - //remove the space from the front and back of unqualified - //elements + // being qualified + // remove the space from the front and back of unqualified + // elements list.add(lTrim(sb.toString().trim())); sb.delete(0, sb.length()); beginNoQualifier = false; @@ -148,7 +143,7 @@ } else { // not using a qualifier. Using a delimiter only if (currentChar == delimiter) { - //remove the space from the front and back of unqualified + //remove the space from the front and back of unqualified //elements list.add(lTrim(sb.toString().trim())); sb.delete(0, sb.length()); @@ -193,42 +188,23 @@ /** * reads from the specified point in the line and returns how many chars to * the specified delimter - * + * * @param line * @param start * @param delimiter * @return int */ - public static int getDelimiterOffset(final String line, final int start, final char delimiter) { int idx = line.indexOf(delimiter, start); if (idx >= 0) { - // idx++; - // idx-=start; idx -= start - 1; } return idx; - - // int offset = 0; - // for (int i = start; i < line.length(); i++) { - // offset++; - // if (line.substring(i, i + 1).equals(delimiter)) { - // if (offset != idx) { - // System.out.println("String [" + line + "] start:" + start + "(" + - // line.charAt(start) + ") delim [" - // + delimiter + "] length:" + delimiter.length() + " Old:" + offset + " - // new:" + idx); - // } - // - // return offset; - // } - // } - // return -1; } /** * Removes empty space from the begining of a string - * + * * @param value - * to be trimmed * @return String @@ -246,27 +222,11 @@ } return trimmed; - // - // - // final StringBuffer returnVal = new StringBuffer(); - // boolean gotAChar = false; - // - // for (int i = 0; i < value.length(); i++) { - // if (value.substring(i, i + 1).trim().length() == 0 && !gotAChar) { - // continue; - // } else { - // gotAChar = true; - // returnVal.append(value.substring(i, i + 1)); - // } - // } - // - // return returnVal.toString(); - // } /** * Removes empty space from the begining of a string, except for tabs - * + * * @param value - * to be trimmed * @return String @@ -284,26 +244,11 @@ } return trimmed; - // final StringBuffer returnVal = new StringBuffer(); - // boolean gotAChar = false; - // - // for (int i = 0; i < value.length(); i++) { - // if (!value.substring(i, i + 1).equals("\t") && value.substring(i, i + - // 1).trim().length() == 0 && !gotAChar) { - // continue; - // } else { - // gotAChar = true; - // returnVal.append(value.substring(i, i + 1)); - // } - // } - // - // return returnVal.toString(); - } /** * Removes a single string character from a given string - * + * * @param character - * string char * @param theString - @@ -320,23 +265,13 @@ } return s.toString(); - // final StringBuffer s = new StringBuffer(); - // for (int i = 0; i < theString.length(); i++) { - // if (theString.substring(i, i + 1).equalsIgnoreCase(character)) { - // continue; - // } - // s.append(theString.substring(i, i + 1)); - // } - // - // return s.toString(); - } /** * Returns a list of ColumnMetaData objects. This is for use with delimited * files. The first line of the file which contains data will be used as the * column names - * + * * @param theStream * @param delimiter * @param qualifier @@ -357,8 +292,6 @@ try { isr = new InputStreamReader(theStream); br = new BufferedReader(isr); - // fr = new FileReader(theFile); - // br = new BufferedReader(fr); while ((line = br.readLine()) != null) { if (line.trim().length() == 0) { @@ -395,7 +328,7 @@ * Returns a list of ColumnMetaData objects. This is for use with delimited * files. The first line of the file which contains data will be used as the * column names - * + * * @param line * @param delimiter * @param qualifier @@ -424,7 +357,7 @@ * Returns a list of ColumnMetaData objects. This is for use with delimited * files. The first line of the file which contains data will be used as the * column names - * + * * @param theFile * @param delimiter * @param qualifier @@ -490,7 +423,7 @@ /** * Determines if the given line is the first part of a multiline record - * + * * @param chrArry - * char data of the line * @param delimiter - @@ -506,7 +439,7 @@ /** * Determines if the given line is the first part of a multiline record - * + * * @param chrArry - * char data of the line * @param delimiter - @@ -531,8 +464,7 @@ continue; } else { // not a space, if this char is the delimiter, then we - // have a line break - // in the record + // have a line break in the record if (chrArry[i] == delimiter) { return true; } @@ -541,9 +473,8 @@ } } else if (chrArry[i] == delimiter) { // if we have a delimiter followed by a qualifier, then we - // have moved on - // to a new element and this could not be multiline. start a - // new loop here in case there is + // have moved on to a new element and this could not be multiline. + // start a new loop here in case there is // space between the delimiter and qualifier for (int j = i - 1; j >= 0; j--) { if (chrArry[j] == ' ') { @@ -585,7 +516,7 @@ /** * Returns a map with the MD id's and their record lengths. This is used for * fixed length parsing - * + * * @param columnMD * @return Map */ @@ -618,13 +549,13 @@ /** * Returns the key to the list of ColumnMetaData objects. Returns the * correct MetaData per the mapping file and the data contained on the line - * - * + * + * * @param columnMD * @param line * @return List - ColumMetaData * @deprecated Moved to FixedWidthParserUtils.getCMDKey() - * + * */ public static String getCMDKeyForFixedLengthFile(final Map columnMD, final String line) { if (columnMD.size() == 1) { @@ -666,8 +597,8 @@ /** * Returns the key to the list of ColumnMetaData objects. Returns the * correct MetaData per the mapping file and the data contained on the line - * - * + * + * * @param columnMD * @param lineElements * @return List - ColumMetaData @@ -707,7 +638,7 @@ /** * Returns a list of ColumMetaData objects for the given key - * + * * @param key * @param columnMD * @return List @@ -722,7 +653,7 @@ /** * Use this method to find the index of a column. - * + * * @author Benoit Xhenseval * @param key * @param columnMD @@ -753,7 +684,7 @@ /** * Create an InputStream based on a File. - * + * * @param file * The file. * @return the InputStream. @@ -763,22 +694,14 @@ if (file == null) { throw new IllegalArgumentException("null not allowed"); } - // if (!file.exists()) { - // throw new FileNotFoundException("file does not exist " + - // file.getAbsolutePath()); - // } - // if (!file.canRead()) { - // throw new FileNotFoundException("file cannot be read " + - // file.getAbsolutePath()); - // } return new FileInputStream(file); } /** * Closes the given reader - * + * * @param reader - * + * */ public static void closeReader(final Reader reader) { try { @@ -789,9 +712,9 @@ /** * Closes the given reader - * + * * @param reader - * + * */ public static void closeReader(final InputStream reader) { try { @@ -804,13 +727,13 @@ * <p> * Returns padding using the specified delimiter repeated to a given length. * </p> - * + * * <pre> * StringUtils.padding(0, 'e') = "" * StringUtils.padding(3, 'e') = "eee" * StringUtils.padding(-2, 'e') = IndexOutOfBoundsException * </pre> - * + * * <p> * Note: this method doesn't not support padding with <a * href="http://www.unicode.org/glossary/#supplementary_character">Unicode @@ -818,7 +741,7 @@ * to be represented. If you are needing to support full I18N of your * applications consider using {@link #repeat(String, int)} instead. * </p> - * + * * @param repeat * number of times to repeat delim * @param padChar @@ -841,7 +764,7 @@ /** * Build a map of name/position based on a list of ColumnMetaData. - * + * * @author Benoit Xhenseval * @param columns * @return a new Map @@ -853,8 +776,6 @@ int idx = 0; for (final Iterator it = columns.iterator(); it.hasNext(); idx++) { final ColumnMetaData meta = (ColumnMetaData) it.next(); - // map.put(meta.getColName(), Integer.valueOf(idx)); breaks 1.4 - // compile map.put(meta.getColName(), new Integer(idx)); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-10-31 20:00:21
|
Revision: 162 http://svn.sourceforge.net/pzfilereader/?rev=162&view=rev Author: zepernick Date: 2006-10-31 12:00:17 -0800 (Tue, 31 Oct 2006) Log Message: ----------- - handle null's for lTrim(), lTrimKeepTabs, and splitLine() - splitLine now returns nulls for elements which are empty and have not been qualified - Added a trimToNull method. - All current tests pass with these changes Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-31 17:33:49 UTC (rev 161) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-31 20:00:17 UTC (rev 162) @@ -73,6 +73,14 @@ */ public static List splitLine(String line, final char delimiter, final char qualifier) { final ArrayList list = new ArrayList(); + + if (line == null) { + return list; + } else if (line.trim().length() == 0){ + list.add(null); + return list; + } + boolean beginQualifier = false; // this will be used for delimted files that have some items qualified // and some items dont @@ -99,7 +107,8 @@ // make sure that this is not just an empty column with no // qualifiers. ie "data",,"data" if (currentChar == delimiter) { - list.add(sb.toString()); + //list.add(sb.toString()); + list.add(null); sb.delete(0, sb.length()); beginNoQualifier = false; continue;// grab the next char @@ -175,7 +184,8 @@ // check to see if we need to add the last column in..this will // happen on empty columns // add the last column - list.add(beginNoQualifier ? lTrim(sb.toString().trim()) : sb.toString()); + list.add(!beginQualifier ? lTrim(trimToNull(sb.toString())) : sb.toString()); + //list.add(null); } sb = null; @@ -210,6 +220,10 @@ * @return String */ public static String lTrim(final String value) { + if (value == null) { + return null; + } + String trimmed = value; int offset = 0; final int maxLength = value.length(); @@ -232,6 +246,10 @@ * @return String */ public static String lTrimKeepTabs(final String value) { + if (value == null) { + return null; + } + String trimmed = value; int offset = 0; final int maxLength = value.length(); @@ -245,6 +263,25 @@ return trimmed; } + + /** + * Will return a null if the String is empty returns the + * trimmed string otherwise. + * + * @param value + * to be trimmed + * @return String + */ + public static String trimToNull(final String value) { + if (value == null) { + return null; + } + + final String ret = value.trim(); + + return ret.length() == 0 ? null : ret; + + } /** * Removes a single string character from a given string This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-11-04 16:04:29
|
Revision: 168 http://svn.sourceforge.net/pzfilereader/?rev=168&view=rev Author: zepernick Date: 2006-11-04 08:04:24 -0800 (Sat, 04 Nov 2006) Log Message: ----------- added two new methods stripNonDoubleChars() and stripNonLongChars() removed logic from DefaultDataSet and called these methods instead. These methods will be usefull in other areas of the code, as we further develop the API Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-03 11:47:38 UTC (rev 167) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-04 16:04:24 UTC (rev 168) @@ -818,4 +818,54 @@ } return map; } + + /** + * Removes chars from the String that could not + * be parsed into a Long value + * + * @param value + * @return String + */ + public static String stripNonLongChars(String value){ + final char[] charString = value.toCharArray(); + final StringBuffer newString = new StringBuffer(); + + for (int i = 0; i < charString.length; i++) { + if (charString[i] >= '0' && charString[i] <= '9' || charString[i] == '-') { + newString.append(charString[i]); + } + } + // check to make sure we do not have a single length string with + // just a minus sign + if (newString.length() == 0 || (newString.length() == 1 && newString.toString().equals("-"))) { + newString.append("0"); + } + + return newString.toString(); + } + + /** + * Removes chars from the String that could not + * be parsed into a Double value + * + * @param value + * @return String + */ + public static String stripNonDoubleChars(String value){ + final char[] charString = value.toCharArray(); + final StringBuffer newString = new StringBuffer(); + + for (int i = 0; i < charString.length; i++) { + if (charString[i] >= '0' && charString[i] <= '9' || charString[i] == '-' + || charString[i] == '.') { + newString.append(charString[i]); + } + } + if (newString.length() == 0 || (newString.length() == 1 && newString.toString().equals(".")) + || (newString.length() == 1 && newString.toString().equals("-"))) { + newString.append("0"); + } + + return newString.toString(); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-11-04 16:40:04
|
Revision: 170 http://svn.sourceforge.net/pzfilereader/?rev=170&view=rev Author: zepernick Date: 2006-11-04 08:39:57 -0800 (Sat, 04 Nov 2006) Log Message: ----------- modified performance for strip methods per Benoits suggestions Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-04 16:05:34 UTC (rev 169) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-04 16:39:57 UTC (rev 170) @@ -826,19 +826,21 @@ * @param value * @return String */ - public static String stripNonLongChars(String value){ - final char[] charString = value.toCharArray(); + public static String stripNonLongChars(final String value) { final StringBuffer newString = new StringBuffer(); - for (int i = 0; i < charString.length; i++) { - if (charString[i] >= '0' && charString[i] <= '9' || charString[i] == '-') { - newString.append(charString[i]); + for (int i = 0; i < value.length(); i++) { + final char c = value.charAt(i); + if (c >= '0' && c <= '9' || c == '-') { + newString.append(c); } } // check to make sure we do not have a single length string with // just a minus sign - if (newString.length() == 0 || (newString.length() == 1 && newString.toString().equals("-"))) { - newString.append("0"); + final int sLen = newString.length(); + final String s = newString.toString(); + if (sLen == 0 || (sLen == 1 && s.equals("-"))) { + return "0"; } return newString.toString(); @@ -851,19 +853,20 @@ * @param value * @return String */ - public static String stripNonDoubleChars(String value){ - final char[] charString = value.toCharArray(); + public static String stripNonDoubleChars(final String value) { final StringBuffer newString = new StringBuffer(); - for (int i = 0; i < charString.length; i++) { - if (charString[i] >= '0' && charString[i] <= '9' || charString[i] == '-' - || charString[i] == '.') { - newString.append(charString[i]); + for (int i = 0; i < value.length(); i++) { + final char c = value.charAt(i); + if (c >= '0' && c <= '9' || c == '-' + || c == '.') { + newString.append(c); } } - if (newString.length() == 0 || (newString.length() == 1 && newString.toString().equals(".")) - || (newString.length() == 1 && newString.toString().equals("-"))) { - newString.append("0"); + final int sLen = newString.length(); + final String s = newString.toString(); + if (sLen == 0 || (sLen == 1 && s.equals(".")) || (sLen == 1 && s.equals("-"))) { + return "0"; } return newString.toString(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-11-04 17:03:45
|
Revision: 171 http://svn.sourceforge.net/pzfilereader/?rev=171&view=rev Author: zepernick Date: 2006-11-04 09:03:39 -0800 (Sat, 04 Nov 2006) Log Message: ----------- added a check for demcimal in stripNonLongChars Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-04 16:39:57 UTC (rev 170) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-04 17:03:39 UTC (rev 171) @@ -831,7 +831,13 @@ for (int i = 0; i < value.length(); i++) { final char c = value.charAt(i); - if (c >= '0' && c <= '9' || c == '-') { + //TODO may want to revist this logic and what exactly should + //happen in this method for the following value 1000.10 + //in the current version (2.2) it would become 100010 + if (c == '.') { + //stop if we hit a decimal point + break; + } else if (c >= '0' && c <= '9' || c == '-') { newString.append(c); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-11-04 17:11:13
|
Revision: 173 http://svn.sourceforge.net/pzfilereader/?rev=173&view=rev Author: zepernick Date: 2006-11-04 09:11:08 -0800 (Sat, 04 Nov 2006) Log Message: ----------- added better javadoc comment for stripNonLongChars Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-04 17:04:38 UTC (rev 172) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-04 17:11:08 UTC (rev 173) @@ -822,7 +822,12 @@ /** * Removes chars from the String that could not * be parsed into a Long value - * + * + * StringUtils.stripNonLongChars("1000.25") = "1000" + * + * Method will truncate everything to the right of the decimal + * place when encountered. + * * @param value * @return String */ @@ -831,9 +836,6 @@ for (int i = 0; i < value.length(); i++) { final char c = value.charAt(i); - //TODO may want to revist this logic and what exactly should - //happen in this method for the following value 1000.10 - //in the current version (2.2) it would become 100010 if (c == '.') { //stop if we hit a decimal point break; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-11-25 17:18:52
|
Revision: 196 http://svn.sourceforge.net/pzfilereader/?rev=196&view=rev Author: zepernick Date: 2006-11-25 09:18:43 -0800 (Sat, 25 Nov 2006) Log Message: ----------- added bx parser Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-25 17:18:09 UTC (rev 195) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-25 17:18:43 UTC (rev 196) @@ -1,16 +1,34 @@ /* - Copyright 2006 Paul Zepernick - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software distributed - under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR - CONDITIONS OF ANY KIND, either express or implied. See the License for - the specific language governing permissions and limitations under the License. + * ObjectLab, http://www.objectlab.co.uk/open is supporting PZFileReader. + * + * Based in London, we are world leaders in the design and development + * of bespoke applications for the securities financing markets. + * + * <a href="http://www.objectlab.co.uk/open">Click here to learn more</a> + * ___ _ _ _ _ _ + * / _ \| |__ (_) ___ ___| |_| | __ _| |__ + * | | | | '_ \| |/ _ \/ __| __| | / _` | '_ \ + * | |_| | |_) | | __/ (__| |_| |__| (_| | |_) | + * \___/|_.__// |\___|\___|\__|_____\__,_|_.__/ + * |__/ + * + * www.ObjectLab.co.uk + * + * $Id: ColorProvider.java 74 2006-10-24 22:19:05Z benoitx $ + * + * Copyright 2006 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. */ package net.sf.pzfilereader.util; @@ -39,10 +57,12 @@ import net.sf.pzfilereader.xml.XMLRecordElement; /** - * @author zepernick Static utilities that are used to perform parsing in the + * Static utilities that are used to perform parsing in the * DataSet class These can also be used for low level parsing, if not * wishing to use the DataSet class. - * @version 2.0 + * + * @author Paul Zepernick + * @author Benoit Xhenseval */ public final class ParserUtils { private ParserUtils() { @@ -53,10 +73,11 @@ * @param line * @param delimiter * @param qualifier - * @return + * @return List */ public static List splitLine(final String line, final String delimiter, final String qualifier) { - return splitLine(line, delimiter != null ? delimiter.charAt(0) : 0, qualifier != null ? qualifier.charAt(0) : 0); + return splitLine(line, delimiter != null ? delimiter.charAt(0) : 0, qualifier != null ? qualifier.charAt(0) : 0, + PZConstants.SPLITLINE_SIZE_INIT); } /** @@ -67,136 +88,109 @@ * Elements which are not qualified will have leading and trailing white * space removed. This includes unqualified elements, which may be * contained in an unqualified parse: "data", data ,"data" + * + * Special thanks to Benoit for contributing this much improved speedy parser :0) * + * @author Benoit Xhenseval * @param line - * String of data to be parsed * @param delimiter - * Delimiter seperating each element * @param qualifier - * qualifier which is surrounding the text - * @return ArrayList + * @param initialSize - + * intial capacity of the List size + * @return List */ - public static List splitLine(String line, final char delimiter, final char qualifier) { - final ArrayList list = new ArrayList(); - - if (line == null) { + public static List splitLine(String line, final char delimiter, final char qualifier, int initialSize) { + List list = new ArrayList(initialSize); + + if (delimiter == 0) { + list.add(line); return list; - } else if (line.trim().length() == 0){ - list.add(null); + } else if (line == null) { return list; } - - boolean beginQualifier = false; - // this will be used for delimted files that have some items qualified - // and some items dont - boolean beginNoQualifier = false; - StringBuffer sb = new StringBuffer(); - // trim hard leading spaces at the begining of the line - line = lTrim(line); - for (int i = 0; i < line.length(); i++) { - final String remainderOfLine = line.substring(i); // data of the - // line which has not yet been read - // check to see if there is a text qualifier - final char currentChar = line.charAt(i); - if (qualifier > 0) { - if (currentChar == qualifier && !beginQualifier && !beginNoQualifier) { - // begining of a set of data - beginQualifier = true; - } else if (!beginQualifier && !beginNoQualifier && currentChar != qualifier - && lTrim(remainderOfLine).charAt(0) != qualifier) { - // try to account for empty space before qualifier starts - // we have not yet begun a qualifier and the char we are on - // is NOT a qualifier. Start reading data - beginNoQualifier = true; - // make sure that this is not just an empty column with no - // qualifiers. ie "data",,"data" - if (currentChar == delimiter) { - //list.add(sb.toString()); - list.add(null); - sb.delete(0, sb.length()); - beginNoQualifier = false; - continue;// grab the next char + final String trimmedLine = line.trim(); + int size = trimmedLine.length(); + + if (size == 0) { + list.add(""); + return list; + } + + boolean insideQualifier = false; + char previousChar = 0; + int startBlock = 0; + int endBlock = 0; + boolean blockWasInQualifier = false; + + final String doubleQualifier = String.valueOf(qualifier) + String.valueOf(qualifier); + for (int i = 0; i < size; i++) { + + final char currentChar = trimmedLine.charAt(i); + if (currentChar != delimiter && currentChar != qualifier) { + previousChar = currentChar; + endBlock = i + 1; + continue; + } + + if (currentChar == delimiter) { + // we've found the delimiter (eg ,) + if (!insideQualifier) { + String trimmed = trimmedLine.substring(startBlock, endBlock > startBlock ? endBlock : startBlock + 1); + if (!blockWasInQualifier) { + trimmed = trimmed.trim(); + trimmed = trimmed.replaceAll(doubleQualifier, String.valueOf(qualifier)); } - sb.append(currentChar); - } else if (!beginNoQualifier && currentChar == qualifier && beginQualifier - && (i == line.length() - 1 || lTrim(remainderOfLine.substring(1)).length() == 0 - // this will be true on empty undelmited columns at the - // end of theline - || lTrimKeepTabs(remainderOfLine).charAt(1) == delimiter)) { - // end of a set of data that was qualified - list.add(sb.toString()); - sb.delete(0, sb.length()); - beginQualifier = false; - // add to "i" so we can get past the qualifier, otherwise it - // is read into a set of data which - // may not be qualified. Find out how many spaces to the - // delimiter - final int offset = getDelimiterOffset(line, i, delimiter) - 1; - // subtract 1 since i is going to get incremented again at - // the top of the loop - if (offset < 1) { - i++; + + if (trimmed.length() == 1 && (trimmed.charAt(0) == delimiter || trimmed.charAt(0) == qualifier)) { + list.add(""); } else { - i += offset; + list.add(trimmed); } - } else if (beginNoQualifier && currentChar == delimiter) { - // check to see if we are done with an element that was not - // being qualified - // remove the space from the front and back of unqualified - // elements - list.add(lTrim(sb.toString().trim())); - sb.delete(0, sb.length()); - beginNoQualifier = false; - } else if (beginNoQualifier || beginQualifier) { - // getting data in a NO qualifier element or qualified - // element - sb.append(currentChar); + blockWasInQualifier = false; + startBlock = i + 1; } - - } else { - // not using a qualifier. Using a delimiter only - if (currentChar == delimiter) { - //remove the space from the front and back of unqualified - //elements - list.add(lTrim(sb.toString().trim())); - sb.delete(0, sb.length()); + } else if (currentChar == qualifier) { + if (!insideQualifier && previousChar != qualifier) { + if (previousChar == delimiter || previousChar == 0 || previousChar == ' ') { + insideQualifier = true; + startBlock = i + 1; + } else { + endBlock = i + 1; + } } else { - sb.append(currentChar); + insideQualifier = false; + blockWasInQualifier = true; + endBlock = i; + // last column (e.g. finishes with ") + if (i == size - 1) { + list.add(trimmedLine.substring(startBlock, size - 1)); + startBlock = i + 1; + } } } + previousChar = currentChar; } - // + this needs to be revisited... - final String trimmed = sb.toString().trim(); - // remove the ending text qualifier if needed - // only if the last element was truly qualified - if (beginQualifier && qualifier > 0 && trimmed.length() > 0) { - if (trimmed.charAt(trimmed.length() - 1) == qualifier) { - // System.out.println(">>>>>>>Triming Off Qualifier"); - final String s = trimmed.substring(0, trimmed.length() - 1); - sb.delete(0, sb.length()); - sb.append(s); + if (startBlock < size) { + String str = trimmedLine.substring(startBlock, size); + str = str.replaceAll(doubleQualifier, String.valueOf(qualifier)); + if (blockWasInQualifier) { + if (str.charAt(str.length() - 1) == qualifier) { + list.add(str.substring(0, str.length() - 1)); + } else { + list.add(str); + } + } else { + list.add(str.trim()); } + } else if (trimmedLine.charAt(size - 1) == delimiter) { + list.add(""); } - final String trimmed2 = line.trim(); - final int lengthLeft = trimmed2.length(); - if (qualifier <= 0 || beginQualifier || beginNoQualifier || lengthLeft > 0 - && trimmed2.charAt(lengthLeft - 1) == delimiter) { - // also account for a delimiter with an empty column at the end that - // was not qualified - // check to see if we need to add the last column in..this will - // happen on empty columns - // add the last column - list.add(!beginQualifier ? lTrim(trimToNull(sb.toString())) : sb.toString()); - //list.add(null); - } - - sb = null; - - list.trimToSize(); - return list; } @@ -340,7 +334,7 @@ continue; } - lineData = splitLine(line, delimiter.charAt(0), qualifier.charAt(0)); + lineData = splitLine(line, delimiter.charAt(0), qualifier.charAt(0), PZConstants.SPLITLINE_SIZE_INIT); for (int i = 0; i < lineData.size(); i++) { final ColumnMetaData cmd = new ColumnMetaData(); cmd.setColName((String) lineData.get(i)); @@ -382,7 +376,7 @@ final List results = new ArrayList(); final Map columnMD = new LinkedHashMap(); - lineData = splitLine(line, delimiter, qualifier); + lineData = splitLine(line, delimiter, qualifier, PZConstants.SPLITLINE_SIZE_INIT); for (int i = 0; i < lineData.size(); i++) { final ColumnMetaData cmd = new ColumnMetaData(); cmd.setColName((String) lineData.get(i)); @@ -422,7 +416,7 @@ continue; } - lineData = splitLine(line, delimiter.charAt(0), qualifier.charAt(0)); + lineData = splitLine(line, delimiter.charAt(0), qualifier.charAt(0), PZConstants.SPLITLINE_SIZE_INIT); for (int i = 0; i < lineData.size(); i++) { final ColumnMetaData cmd = new ColumnMetaData(); cmd.setColName((String) lineData.get(i)); @@ -663,7 +657,7 @@ final XMLRecordElement recordXMLElement = (XMLRecordElement) columnMD.get(key); if (recordXMLElement.getElementNumber() > lineElements.size()) { - // make sure our substring is not going to fail + // make sure the element referenced in the mapping exists continue; } final String lineElement = (String) lineElements.get(recordXMLElement.getElementNumber() - 1); @@ -930,4 +924,110 @@ throw new PZConvertException(ex); } } + + + //LEAVE AS A REFERENCE FOR POSSIBLE LATER USE + /* public static List splitLineWithBuf(String line, final char delimiter, char qualifier, int initialSize) { + List list = new ArrayList(initialSize); + + if (delimiter == 0) { + list.add(line); + return list; + } else if (line == null) { + return list; + } + + final String trimmedLine = line.trim(); + int size = trimmedLine.length(); + + if (size == 0) { + list.add(""); + return list; + } + + boolean insideQualifier = false; + char previousChar = 0; + boolean blockWasInQualifier = false; + StringBuffer buf = new StringBuffer(32); + + // final String doubleQualifier = String.valueOf(qualifier) + + // String.valueOf(qualifier); + for (int i = 0; i < size; i++) { + final char currentChar = trimmedLine.charAt(i); + if (currentChar != delimiter && currentChar != qualifier) { + previousChar = currentChar; + if (' ' != currentChar || insideQualifier || buf.length() > 0) { + buf.append(currentChar); + } + continue; + } + + if (currentChar == delimiter) { + // we've found the delimiter (eg ,) + if (!insideQualifier) { + // String trimmed = trimmedLine.substring(startBlock, + // endBlock > startBlock ? endBlock : startBlock + 1); + String trimmed = buf.toString(); + if (!blockWasInQualifier) { + trimmed = trimmed.trim(); + // trimmed = trimmed.replaceAll(doubleQualifier, + // String.valueOf(qualifier)); + } + + if (trimmed.length() == 1 && (trimmed.charAt(0) == delimiter || trimmed.charAt(0) == qualifier)) { + list.add(""); + } else { + list.add(trimmed); + } + blockWasInQualifier = false; + buf.delete(0, buf.length()); + } else if (buf.length() != 1 || buf.charAt(0) != qualifier) { + buf.append(currentChar); + } else { + buf.delete(0, buf.length()); + insideQualifier = false; + list.add(""); + } + } else if (currentChar == qualifier) { + if (!insideQualifier && previousChar != qualifier) { + if (previousChar == delimiter || previousChar == 0 || previousChar == ' ') { + insideQualifier = true; + int l = buf.length(); + if (l > 0) { + buf.delete(0, l); // just entered a + // qualifier, remove + // whatever was + } + } else { + buf.append(currentChar); + } + } else { + insideQualifier = false; + blockWasInQualifier = true; + if (previousChar == qualifier) { + buf.append(qualifier); + insideQualifier = true; + previousChar = 0; + continue; + } + // last column (e.g. finishes with ") + if (i == size - 1) { + // list.add(trimmedLine.substring(startBlock, size - + // 1)); + list.add(buf.toString()); + buf.delete(0, buf.length()); + } + } + } + previousChar = currentChar; + } + + if (buf.length() > 0) { + list.add(buf.toString().trim()); + } else if (trimmedLine.charAt(size - 1) == delimiter) { + list.add(""); + } + + return list; + }*/ } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-11-25 19:59:07
|
Revision: 201 http://svn.sourceforge.net/pzfilereader/?rev=201&view=rev Author: zepernick Date: 2006-11-25 11:59:07 -0800 (Sat, 25 Nov 2006) Log Message: ----------- corrected javadoc Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-25 17:23:29 UTC (rev 200) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-25 19:59:07 UTC (rev 201) @@ -285,7 +285,7 @@ /** * Removes a single string character from a given string * - * @param character - + * @param theChar - * string char * @param theString - * string to search This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-11-25 20:29:13
|
Revision: 202 http://svn.sourceforge.net/pzfilereader/?rev=202&view=rev Author: zepernick Date: 2006-11-25 12:29:14 -0800 (Sat, 25 Nov 2006) Log Message: ----------- fixed problem with qualifiers being allowed within the qualified element. Created a new issue. The check on line 134 of ParserUtilsSplitLineTest is failing. It seems to me like the check itself may be flawed on the end result. The parser looks like it is chopping off a " on the end of the element that should be there. Benoit, I left a note on what I updated. There is probably a better way that is going to make for a faster parse. Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-25 19:59:07 UTC (rev 201) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-25 20:29:14 UTC (rev 202) @@ -130,6 +130,7 @@ for (int i = 0; i < size; i++) { final char currentChar = trimmedLine.charAt(i); + //System.out.println(currentChar); if (currentChar != delimiter && currentChar != qualifier) { previousChar = currentChar; endBlock = i + 1; @@ -161,6 +162,15 @@ } else { endBlock = i + 1; } + } + //TODO + //this is probably a pretty costly check, maybe Benoit will have a better idea of how + //to handle + else if (i + 1 < size && delimiter != ' ' && + lTrimKeepTabs(trimmedLine.substring(i + 1)).charAt(0) != delimiter) { + previousChar = currentChar; + endBlock = i + 1; + continue; } else { insideQualifier = false; blockWasInQualifier = true; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-11-26 13:54:17
|
Revision: 203 http://svn.sourceforge.net/pzfilereader/?rev=203&view=rev Author: zepernick Date: 2006-11-26 05:54:16 -0800 (Sun, 26 Nov 2006) Log Message: ----------- corrected a couple bugs where the double qualifier replace was not getting run. Tried to make the look ahead check a little more efficient preventing the substring and left trim where possible. Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-25 20:29:14 UTC (rev 202) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-26 13:54:16 UTC (rev 203) @@ -143,6 +143,8 @@ String trimmed = trimmedLine.substring(startBlock, endBlock > startBlock ? endBlock : startBlock + 1); if (!blockWasInQualifier) { trimmed = trimmed.trim(); + } else { + //need to run the qualifier replace when it was in qualifier trimmed = trimmed.replaceAll(doubleQualifier, String.valueOf(qualifier)); } @@ -163,11 +165,13 @@ endBlock = i + 1; } } - //TODO - //this is probably a pretty costly check, maybe Benoit will have a better idea of how - //to handle + //try to first look ahead 1 char. If we have a match on the delimiter it will drop to the else + //otherwise do one last check to make sure there is no space between the delimiter and + //the qualifer. This looks a little sloppy, but I am trying to avoid the left trim, and substring if + //possible. else if (i + 1 < size && delimiter != ' ' && - lTrimKeepTabs(trimmedLine.substring(i + 1)).charAt(0) != delimiter) { + ((trimmedLine.charAt(i + 1) != ' ' && trimmedLine.charAt(i + 1) != delimiter) || + lTrimKeepTabs(trimmedLine.substring(i + 1)).charAt(0) != delimiter)) { previousChar = currentChar; endBlock = i + 1; continue; @@ -177,7 +181,9 @@ endBlock = i; // last column (e.g. finishes with ") if (i == size - 1) { - list.add(trimmedLine.substring(startBlock, size - 1)); + String str = trimmedLine.substring(startBlock, size - 1); + str = str.replaceAll(doubleQualifier, String.valueOf(qualifier)); + list.add(str); startBlock = i + 1; } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-11-27 13:47:06
|
Revision: 208 http://svn.sourceforge.net/pzfilereader/?rev=208&view=rev Author: zepernick Date: 2006-11-27 05:47:01 -0800 (Mon, 27 Nov 2006) Log Message: ----------- added to the comment about the lTrimKeepTabs Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-27 13:27:43 UTC (rev 207) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-27 13:47:01 UTC (rev 208) @@ -169,6 +169,8 @@ //otherwise do one last check to make sure there is no space between the delimiter and //the qualifer. This looks a little sloppy, but I am trying to avoid the left trim, and substring if //possible. + // "a","b","c" should not call the lTrimKeepTabs + // "a", "b", "c" will use the lTrimKeepTabs to remove the space between the delimiter and qualifer else if (i + 1 < size && delimiter != ' ' && ((trimmedLine.charAt(i + 1) != ' ' && trimmedLine.charAt(i + 1) != delimiter) || lTrimKeepTabs(trimmedLine.substring(i + 1)).charAt(0) != delimiter)) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-11-27 16:06:57
|
Revision: 209 http://svn.sourceforge.net/pzfilereader/?rev=209&view=rev Author: zepernick Date: 2006-11-27 08:06:49 -0800 (Mon, 27 Nov 2006) Log Message: ----------- more efficient check for unescaped qualifier contained within a qualified element. This new change avoids substring and left trim per Benoit's suggestion. Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-27 13:47:01 UTC (rev 208) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-11-27 16:06:49 UTC (rev 209) @@ -164,20 +164,29 @@ } else { endBlock = i + 1; } - } - //try to first look ahead 1 char. If we have a match on the delimiter it will drop to the else - //otherwise do one last check to make sure there is no space between the delimiter and - //the qualifer. This looks a little sloppy, but I am trying to avoid the left trim, and substring if - //possible. - // "a","b","c" should not call the lTrimKeepTabs - // "a", "b", "c" will use the lTrimKeepTabs to remove the space between the delimiter and qualifer - else if (i + 1 < size && delimiter != ' ' && - ((trimmedLine.charAt(i + 1) != ' ' && trimmedLine.charAt(i + 1) != delimiter) || - lTrimKeepTabs(trimmedLine.substring(i + 1)).charAt(0) != delimiter)) { - previousChar = currentChar; - endBlock = i + 1; - continue; } else { + if (i + 1 < size && delimiter != ' ') { + //this is used to allow unescaped qualifiers to be contained within the element + //do not run this check is a space is being used as a delimiter + //we don't want to trim the delimiter off + //loop until we find a char that is not a space, or we reach the end of the line. + int start = i + 1; + char charToCheck = trimmedLine.charAt(start); + while (charToCheck == ' ') { + start ++; + if (start == size) { + break; + } + charToCheck = trimmedLine.charAt(start); + } + + if (charToCheck != delimiter) { + previousChar = currentChar; + endBlock = i + 1; + continue; + } + + } insideQualifier = false; blockWasInQualifier = true; endBlock = i; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2006-12-15 00:23:46
|
Revision: 211 http://svn.sourceforge.net/pzfilereader/?rev=211&view=rev Author: zepernick Date: 2006-12-14 16:23:46 -0800 (Thu, 14 Dec 2006) Log Message: ----------- cleaned up exception handling. Got rid of throws Exception Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-12-12 12:55:58 UTC (rev 210) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-12-15 00:23:46 UTC (rev 211) @@ -398,7 +398,7 @@ * @exception Exception * @return ArrayList - ColumnMetaData */ - public static Map getColumnMDFromFile(final String line, final char delimiter, final char qualifier) throws Exception { + public static Map getColumnMDFromFile(final String line, final char delimiter, final char qualifier) { List lineData = null; final List results = new ArrayList(); final Map columnMD = new LinkedHashMap(); @@ -424,10 +424,12 @@ * @param theFile * @param delimiter * @param qualifier - * @exception Exception + * @exception FileNotFoundException + * @exception IOException * @return ArrayList - ColumnMetaData */ - public static List getColumnMDFromFile(final File theFile, final String delimiter, final String qualifier) throws Exception { + public static List getColumnMDFromFile(final File theFile, final String delimiter, + final String qualifier) throws IOException{ BufferedReader br = null; FileReader fr = null; String line = null; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <be...@us...> - 2006-12-16 21:01:40
|
Revision: 247 http://svn.sourceforge.net/pzfilereader/?rev=247&view=rev Author: benoitx Date: 2006-12-16 13:01:40 -0800 (Sat, 16 Dec 2006) Log Message: ----------- checkstyle Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-12-16 21:01:13 UTC (rev 246) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-12-16 21:01:40 UTC (rev 247) @@ -39,7 +39,6 @@ import java.io.FileReader; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.io.Reader; import java.net.URL; import java.sql.Connection; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2007-01-10 17:01:35
|
Revision: 261 http://svn.sourceforge.net/pzfilereader/?rev=261&view=rev Author: zepernick Date: 2007-01-10 09:01:29 -0800 (Wed, 10 Jan 2007) Log Message: ----------- fixed NPE when referencing a column that did not exist in the map for a fixed length file Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2007-01-02 23:20:18 UTC (rev 260) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2007-01-10 17:01:29 UTC (rev 261) @@ -645,7 +645,10 @@ idx = ((XMLRecordElement) columnMD.get(key)).getColumnIndex(colName); } else if (key == null || key.equals(PZConstants.DETAIL_ID)) { final Map map = (Map) columnMD.get(PZConstants.COL_IDX); - idx = ((Integer) map.get(colName)).intValue(); + final Integer i = (Integer) map.get(colName); + if (i != null) { //happens when the col name does not exist in the mapping + idx = i.intValue(); + } } if (idx < 0) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2007-02-06 14:45:50
|
Revision: 276 http://svn.sourceforge.net/pzfilereader/?rev=276&view=rev Author: zepernick Date: 2007-02-06 06:45:14 -0800 (Tue, 06 Feb 2007) Log Message: ----------- corrected issue with tab and space delimiter when there were just empty columns with no data. trim() was removing delimiters and breaking the parse Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2007-02-06 14:43:08 UTC (rev 275) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2007-02-06 14:45:14 UTC (rev 276) @@ -115,8 +115,16 @@ } else if (line == null) { return list; } - - final String trimmedLine = line.trim(); + + String trimmedLine; + if (delimiter == '\t' || delimiter == ' ') { + //skip the trim for these delimiters, doing the trim will mess up the parse + //on empty records which contain just the delimiter + trimmedLine = line; + } else { + trimmedLine = line.trim(); + } + int size = trimmedLine.length(); if (size == 0) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <zep...@us...> - 2007-03-20 15:36:38
|
Revision: 288 http://svn.sourceforge.net/pzfilereader/?rev=288&view=rev Author: zepernick Date: 2007-03-20 08:36:35 -0700 (Tue, 20 Mar 2007) Log Message: ----------- Stopped column lookup from being case sensitive Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2007-03-20 15:36:16 UTC (rev 287) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2007-03-20 15:36:35 UTC (rev 288) @@ -50,6 +50,7 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.NoSuchElementException; import java.util.Properties; @@ -650,10 +651,11 @@ public static int getColumnIndex(final String key, final Map columnMD, final String colName) { int idx = -1; if (key != null && !key.equals(PZConstants.DETAIL_ID) && !key.equals(PZConstants.COL_IDX)) { - idx = ((XMLRecordElement) columnMD.get(key)).getColumnIndex(colName); + idx = ((XMLRecordElement) columnMD.get(key)).getColumnIndex(colName.toLowerCase( + Locale.getDefault())); } else if (key == null || key.equals(PZConstants.DETAIL_ID)) { final Map map = (Map) columnMD.get(PZConstants.COL_IDX); - final Integer i = (Integer) map.get(colName); + final Integer i = (Integer) map.get(colName.toLowerCase(Locale.getDefault())); if (i != null) { //happens when the col name does not exist in the mapping idx = i.intValue(); } @@ -765,7 +767,8 @@ int idx = 0; for (final Iterator it = columns.iterator(); it.hasNext(); idx++) { final ColumnMetaData meta = (ColumnMetaData) it.next(); - map.put(meta.getColName(), new Integer(idx)); + map.put(meta.getColName().toLowerCase( + Locale.getDefault()), new Integer(idx)); } } return map; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |