From: <be...@us...> - 2006-10-31 22:47:30
|
Revision: 165 http://svn.sourceforge.net/pzfilereader/?rev=165&view=rev Author: benoitx Date: 2006-10-31 14:47:08 -0800 (Tue, 31 Oct 2006) Log Message: ----------- Paul, some interesting new methods, the BX parser (sorry about the name) seems to be significantly faster than the current one when there are NO qualifiers, If there are qualifiers, it is reasonably faster. But the interesting bits start when I created a new method that uses a StringBuffer rather than using "chunks". The new method is flying when there are qualifiers all over the place... but is slower (albeit not slower than the current one) when there are no qualifier... and that is a bit of a mystery... anyway... getting too late... Have a look and let me know. Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/BXParser.java trunk/PZFileReader/src/test/java/net/sf/pzfilereader/parserutils/BXParserTest.java trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/Examples.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/BXParser.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/BXParser.java 2006-10-31 20:47:02 UTC (rev 164) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/BXParser.java 2006-10-31 22:47:08 UTC (rev 165) @@ -12,8 +12,12 @@ */ public class BXParser { public static List splitLine(String line, final char delimiter, char qualifier) { - List list = new ArrayList(); + return splitLine(line, delimiter, qualifier, 10); + } + public static List splitLine(String line, final char delimiter, char qualifier, int initialSize) { + List list = new ArrayList(initialSize); + if (delimiter == 0) { list.add(line); return list; @@ -39,7 +43,6 @@ for (int i = 0; i < size; i++) { final char currentChar = trimmedLine.charAt(i); - if (currentChar != delimiter && currentChar != qualifier) { previousChar = currentChar; endBlock = i + 1; @@ -52,10 +55,9 @@ String trimmed = trimmedLine.substring(startBlock, endBlock > startBlock ? endBlock : startBlock + 1); if (!blockWasInQualifier) { trimmed = trimmed.trim(); + trimmed = trimmed.replaceAll(doubleQualifier, String.valueOf(qualifier)); } - trimmed = trimmed.replaceAll(doubleQualifier, String.valueOf(qualifier)); - if (trimmed.length() == 1 && (trimmed.charAt(0) == delimiter || trimmed.charAt(0) == qualifier)) { list.add(""); } else { @@ -83,7 +85,6 @@ } } } - // antepenultimateChar = previousChar; previousChar = currentChar; } @@ -105,4 +106,108 @@ return list; } + + public static List splitLineWithBuf(String line, final char delimiter, char qualifier, int initialSize) { + List list = new ArrayList(initialSize); + + if (delimiter == 0) { + list.add(line); + return list; + } else if (line == null) { + return list; + } + + final String trimmedLine = line.trim(); + int size = trimmedLine.length(); + + if (size == 0) { + list.add(""); + return list; + } + + boolean insideQualifier = false; + char previousChar = 0; + boolean blockWasInQualifier = false; + StringBuffer buf = new StringBuffer(32); + + // final String doubleQualifier = String.valueOf(qualifier) + + // String.valueOf(qualifier); + for (int i = 0; i < size; i++) { + final char currentChar = trimmedLine.charAt(i); + if (currentChar != delimiter && currentChar != qualifier) { + previousChar = currentChar; + if (' ' != currentChar || insideQualifier || buf.length() > 0) { + buf.append(currentChar); + } + continue; + } + + if (currentChar == delimiter) { + // we've found the delimiter (eg ,) + if (!insideQualifier) { + // String trimmed = trimmedLine.substring(startBlock, + // endBlock > startBlock ? endBlock : startBlock + 1); + String trimmed = buf.toString(); + if (!blockWasInQualifier) { + trimmed = trimmed.trim(); + // trimmed = trimmed.replaceAll(doubleQualifier, + // String.valueOf(qualifier)); + } + + if (trimmed.length() == 1 && (trimmed.charAt(0) == delimiter || trimmed.charAt(0) == qualifier)) { + list.add(""); + } else { + list.add(trimmed); + } + blockWasInQualifier = false; + buf.delete(0, buf.length()); + } else if (buf.length() != 1 || buf.charAt(0) != qualifier) { + buf.append(currentChar); + } else { + buf.delete(0, buf.length()); + insideQualifier = false; + list.add(""); + } + } else if (currentChar == qualifier) { + if (!insideQualifier && previousChar != qualifier) { + if (previousChar == delimiter || previousChar == 0 || previousChar == ' ') { + insideQualifier = true; + int l = buf.length(); + if (l > 0) { + buf.delete(0, l); // just entered a + // qualifier, remove + // whatever was + } + } else { + buf.append(currentChar); + } + } else { + insideQualifier = false; + blockWasInQualifier = true; + if (previousChar == qualifier) { + buf.append(qualifier); + insideQualifier = true; + previousChar = 0; + continue; + } + // last column (e.g. finishes with ") + if (i == size - 1) { + // list.add(trimmedLine.substring(startBlock, size - + // 1)); + list.add(buf.toString()); + buf.delete(0, buf.length()); + } + } + } + previousChar = currentChar; + } + + if (buf.length() > 0) { + list.add(buf.toString().trim()); + } else if (trimmedLine.charAt(size - 1) == delimiter) { + list.add(""); + } + + return list; + } } Modified: trunk/PZFileReader/src/test/java/net/sf/pzfilereader/parserutils/BXParserTest.java =================================================================== --- trunk/PZFileReader/src/test/java/net/sf/pzfilereader/parserutils/BXParserTest.java 2006-10-31 20:47:02 UTC (rev 164) +++ trunk/PZFileReader/src/test/java/net/sf/pzfilereader/parserutils/BXParserTest.java 2006-10-31 22:47:08 UTC (rev 165) @@ -44,7 +44,7 @@ final String txtToParse = UnitTestUtils.buildDelimString(DELIMITED_DATA_NO_BREAKS, d, q); - final List splitLineResults = BXParser.splitLine(txtToParse, d, q); + final List splitLineResults = BXParser.splitLineWithBuf(txtToParse, d, q, 10); // check to make sure we have the same amount of elements which were // expected @@ -64,7 +64,7 @@ * Test with any line breaks * */ - public void NOtestLineBreaks() { + public void testLineBreaks() { // loop down all delimiter qualifier pairs to test for (int i = 0; i < DELIM_QUAL_PAIR.length; i++) { final char d = DELIM_QUAL_PAIR[i][0]; @@ -72,7 +72,7 @@ final String txtToParse = UnitTestUtils.buildDelimString(DELIMITED_DATA_WITH_BREAKS, d, q); - final List splitLineResults = BXParser.splitLine(txtToParse, d, q); + final List splitLineResults = BXParser.splitLineWithBuf(txtToParse, d, q, 10); // check to make sure we have the same amount of elements which were // expected @@ -92,7 +92,7 @@ * data */ public void testMalformedData() { - final List splitLineResults = BXParser.splitLine(DELIMITED_BAD_DATA, ',', '\"'); + final List splitLineResults = BXParser.splitLineWithBuf(DELIMITED_BAD_DATA, ',', '\"', 10); assertEquals("Expecting 2 Data Elements From The Malformed Data", 2, splitLineResults.size()); } @@ -134,7 +134,7 @@ } private void check(final String txtToParse, final char delim, final char qualifier, final String[] expected) { - final List splitLineResults = BXParser.splitLine(txtToParse, delim, qualifier); + final List splitLineResults = BXParser.splitLineWithBuf(txtToParse, delim, qualifier, 10); assertEquals( "Did Not Get Amount Of Elements Expected (d = " + delim + " q = " + qualifier + ") txt [" + txtToParse + "]", Modified: trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/Examples.java =================================================================== --- trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/Examples.java 2006-10-31 20:47:02 UTC (rev 164) +++ trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/Examples.java 2006-10-31 22:47:08 UTC (rev 165) @@ -252,11 +252,11 @@ public void doTestParsers() { final int repeat = ConsoleMenu.getInt("How many times?", 1000); - final int characters = ConsoleMenu.getInt("How many columns?", 100); + final int numberOfCols = ConsoleMenu.getInt("How many columns?", 100); final boolean qualif = ConsoleMenu.getBoolean("With qualifier?", true); StringBuilder aRow = new StringBuilder(); - for (int i = 0; i < characters; i++) { + for (int i = 0; i < numberOfCols; i++) { if (qualif) { aRow.append("\""); } @@ -277,14 +277,21 @@ System.out.println("ParserUtil " + (stop - start) + " ms."); start = System.currentTimeMillis(); - StringBuffer sb = new StringBuffer(); for (int i = 0; i < repeat; i++) { - BXParser.splitLine(line, ',', '\"'); + BXParser.splitLine(line, ',', '\"', numberOfCols); } stop = System.currentTimeMillis(); System.out.println("BXParser " + (stop - start) + " ms."); + start = System.currentTimeMillis(); + for (int i = 0; i < repeat; i++) { + BXParser.splitLineWithBuf(line, ',', '\"', numberOfCols); + } + stop = System.currentTimeMillis(); + + System.out.println("BXParser with buf " + (stop - start) + " ms."); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |