From: <be...@us...> - 2006-10-18 13:40:38
|
Revision: 99 http://svn.sourceforge.net/pzfilereader/?rev=99&view=rev Author: benoitx Date: 2006-10-18 06:40:13 -0700 (Wed, 18 Oct 2006) Log Message: ----------- Some serious kicking... 1/ use a map for finding the column index; this makes the fetch of the first or last column consistent 2/ removed SOME of the substring which are causing dramatic performance degradation when once has a fair amount of columns. 3/ optimised some string manipulation code (getDelimiterOffset, lTrim, lTrimKeepTabs, removeChar 4/ I would suggest the creation of a suite of unit tests for all those methods.... Paul, do you want to take this on? Modified Paths: -------------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/DataSet.java trunk/PZFileReader/src/main/java/net/sf/pzfilereader/ordering/OrderBy.java trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ExcelTransformer.java trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java trunk/PZFileReader/src/main/java/net/sf/pzfilereader/xml/PZMapParser.java trunk/PZFileReader/src/main/java/net/sf/pzfilereader/xml/ResolveLocalDTD.java trunk/PZFileReader/src/main/java/net/sf/pzfilereader/xml/XMLRecordElement.java trunk/PZFileReaderSamples/.classpath trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/Examples.java trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/csvperformancetest/CSVPerformanceTest.java trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/delimiteddynamiccolumns/DelimitedWithPZMap.java Added Paths: ----------- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/PZConstants.java Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/DataSet.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/DataSet.java 2006-10-18 11:29:19 UTC (rev 98) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/DataSet.java 2006-10-18 13:40:13 UTC (rev 99) @@ -31,11 +31,13 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.NoSuchElementException; import net.sf.pzfilereader.ordering.OrderBy; import net.sf.pzfilereader.structure.ColumnMetaData; import net.sf.pzfilereader.structure.Row; import net.sf.pzfilereader.util.ExcelTransformer; +import net.sf.pzfilereader.util.PZConstants; import net.sf.pzfilereader.util.ParserUtils; import net.sf.pzfilereader.xml.PZMapParser; @@ -48,8 +50,6 @@ * @todo Ought to implement an interface for the access to data. */ public class DataSet { - private static final String DETAIL_ID = "detail"; - /** Array to hold the rows and their values in the text file */ private List rows = null; @@ -148,7 +148,7 @@ // the dataset when reading in the file while (rs.next()) { - ColumnMetaData column = new ColumnMetaData(); + final ColumnMetaData column = new ColumnMetaData(); column.setColName(rs.getString("DATASTRUCTURE_COLUMN")); column.setColLength(rs.getInt("DATASTRUCTURE_LENGTH")); column.setStartPosition(recPosition); @@ -158,7 +158,8 @@ cmds.add(column); } - columnMD.put(DETAIL_ID, cmds); + columnMD.put(PZConstants.DETAIL_ID, cmds); + columnMD.put(PZConstants.COL_IDX, ParserUtils.buidColumnIndexMap(cmds)); if (cmds.isEmpty()) { throw new FileNotFoundException("DATA DEFINITION CAN NOT BE FOUND IN THE DATABASE " + dataDefinition); @@ -271,7 +272,8 @@ hasResults = true; } - columnMD.put(DETAIL_ID, cmds); + columnMD.put(PZConstants.DETAIL_ID, cmds); + columnMD.put(PZConstants.COL_IDX, ParserUtils.buidColumnIndexMap(cmds)); if (!hasResults) { throw new FileNotFoundException("DATA DEFINITION CAN NOT BE FOUND IN THE DATABASE " + dataDefinition); @@ -522,7 +524,7 @@ int recPosition = 1; final Row row = new Row(); - row.setMdkey(mdkey.equals(DETAIL_ID) ? null : mdkey); // try + row.setMdkey(mdkey.equals(PZConstants.DETAIL_ID) ? null : mdkey); // try final List cmds = ParserUtils.getColumnMetaData(mdkey, columnMD); // to limit the memory use @@ -678,11 +680,11 @@ // ******************************************************************** // column values - List columns = ParserUtils.splitLine(lineData, delimiter, qualifier); + final List columns = ParserUtils.splitLine(lineData, delimiter, qualifier); lineData = ""; - String mdkey = ParserUtils.getCMDKeyForDelimitedFile(columnMD, columns); - List cmds = ParserUtils.getColumnMetaData(mdkey, columnMD); - int columnCount = cmds.size(); + final String mdkey = ParserUtils.getCMDKeyForDelimitedFile(columnMD, columns); + final List cmds = ParserUtils.getColumnMetaData(mdkey, columnMD); + final int columnCount = cmds.size(); // DEBUG // Incorrect record length on line log the error. Line @@ -707,8 +709,8 @@ } } - Row row = new Row(); - row.setMdkey(mdkey.equals(DETAIL_ID) ? null : mdkey); // try + final Row row = new Row(); + row.setMdkey(mdkey.equals(PZConstants.DETAIL_ID) ? null : mdkey); // try // to limit the memory use row.setCols(columns); row.setRowNumber(lineCount); @@ -739,10 +741,14 @@ */ public void setValue(final String columnName, final String value) throws Exception { /** get a reference to the row */ - Row row = (Row) rows.get(pointer); - final List cmds = ParserUtils.getColumnMetaData(row.getMdkey(), columnMD); + final Row row = (Row) rows.get(pointer); + + final int idx = ParserUtils.getColumnIndex(row.getMdkey(), columnMD, columnName); + row.setValue(idx, value); + // final List cmds = ParserUtils.getColumnMetaData(row.getMdkey(), + // columnMD); /** change the value of the column */ - row.setValue(ParserUtils.findColumn(columnName, cmds), value); + // row.setValue(ParserUtils.findColumn(columnName, cmds), value); } /** @@ -799,20 +805,27 @@ */ public String getString(final String column) { final Row row = (Row) rows.get(pointer); - final List cmds = ParserUtils.getColumnMetaData(row.getMdkey(), columnMD); + // final List cmds = ParserUtils.getColumnMetaData(row.getMdkey(), + // columnMD); + final String s = row.getValue(ParserUtils.getColumnIndex(row.getMdkey(), columnMD, column)); if (upperCase) { // convert data to uppercase before returning - return row.getValue(ParserUtils.findColumn(column, cmds)).toUpperCase(Locale.getDefault()); + // return row.getValue(ParserUtils.findColumn(column, + // cmds)).toUpperCase(Locale.getDefault()); + return s.toUpperCase(Locale.getDefault()); } if (lowerCase) { // convert data to lowercase before returning - return row.getValue(ParserUtils.findColumn(column, cmds)).toLowerCase(Locale.getDefault()); + // return row.getValue(ParserUtils.findColumn(column, + // cmds)).toLowerCase(Locale.getDefault()); + return s.toLowerCase(Locale.getDefault()); } // return value as how it is in the file - return row.getValue(ParserUtils.findColumn(column, cmds)); + // return row.getValue(ParserUtils.findColumn(column, cmds)); + return s; } /** @@ -828,8 +841,11 @@ final StringBuffer newString = new StringBuffer(); final Row row = (Row) rows.get(pointer); - final List cmds = ParserUtils.getColumnMetaData(row.getMdkey(), columnMD); - String s = ((Row) rows.get(pointer)).getValue(ParserUtils.findColumn(column, cmds)); + // final List cmds = ParserUtils.getColumnMetaData(row.getMdkey(), + // columnMD); + // String s = ((Row) + // rows.get(pointer)).getValue(ParserUtils.findColumn(column, cmds)); + final String s = row.getValue(ParserUtils.getColumnIndex(row.getMdkey(), columnMD, column)); if (!strictNumericParse) { if (s.trim().length() == 0) { @@ -864,10 +880,12 @@ public int getInt(final String column) { final StringBuffer newString = new StringBuffer(); final Row row = (Row) rows.get(pointer); - final List cmds = ParserUtils.getColumnMetaData(row.getMdkey(), columnMD); + // final List cmds = ParserUtils.getColumnMetaData(row.getMdkey(), + // columnMD); + // + // String s = row.getValue(ParserUtils.findColumn(column, cmds)); + final String s = row.getValue(ParserUtils.getColumnIndex(row.getMdkey(), columnMD, column)); - String s = row.getValue(ParserUtils.findColumn(column, cmds)); - if (!strictNumericParse) { if (s.trim().length() == 0) { return 0; @@ -903,9 +921,11 @@ public Date getDate(final String column) throws ParseException { final SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); final Row row = (Row) rows.get(pointer); - final List cmds = ParserUtils.getColumnMetaData(row.getMdkey(), columnMD); + // final List cmds = ParserUtils.getColumnMetaData(row.getMdkey(), + // columnMD); - String s = row.getValue(ParserUtils.findColumn(column, cmds)); + // String s = row.getValue(ParserUtils.findColumn(column, cmds)); + final String s = row.getValue(ParserUtils.getColumnIndex(row.getMdkey(), columnMD, column)); return sdf.parse(s); } @@ -924,9 +944,11 @@ */ public Date getDate(final String column, final SimpleDateFormat sdf) throws ParseException { final Row row = (Row) rows.get(pointer); - final List cmds = ParserUtils.getColumnMetaData(row.getMdkey(), columnMD); - - String s = row.getValue(ParserUtils.findColumn(column, cmds)); + // final List cmds = ParserUtils.getColumnMetaData(row.getMdkey(), + // columnMD); + // + // String s = row.getValue(ParserUtils.findColumn(column, cmds)); + final String s = row.getValue(ParserUtils.getColumnIndex(row.getMdkey(), columnMD, column)); return sdf.parse(s); } @@ -941,11 +963,13 @@ String[] array = null; if (columnMD != null) { - final List cmds = ParserUtils.getColumnMetaData(DETAIL_ID, columnMD); + final List cmds = ParserUtils.getColumnMetaData(PZConstants.DETAIL_ID, columnMD); + array = new String[cmds.size()]; for (int i = 0; i < cmds.size(); i++) { column = (ColumnMetaData) cmds.get(i); array[i] = column.getColName(); + System.out.println(i + "/ Columns... " + column.getColName()); } } @@ -966,7 +990,7 @@ final List cmds = ParserUtils.getColumnMetaData(recordID, columnMD); array = new String[cmds.size()]; for (int i = 0; i < cmds.size(); i++) { - ColumnMetaData column = (ColumnMetaData) cmds.get(i); + final ColumnMetaData column = (ColumnMetaData) cmds.get(i); array[i] = column.getColName(); } } @@ -1056,7 +1080,7 @@ public boolean isRecordID(final String recordID) { String rowID = ((Row) rows.get(pointer)).getMdkey(); if (rowID == null) { - rowID = DETAIL_ID; + rowID = PZConstants.DETAIL_ID; } return rowID.equals(recordID); @@ -1124,7 +1148,7 @@ // with <RECORD> mappings"); // } if (ob != null && rows != null) { - final List cmds = ParserUtils.getColumnMetaData(DETAIL_ID, columnMD); + final List cmds = ParserUtils.getColumnMetaData(PZConstants.DETAIL_ID, columnMD); ob.setColumnMD(cmds); Collections.sort(rows, ob); goTop(); Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/ordering/OrderBy.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/ordering/OrderBy.java 2006-10-18 11:29:19 UTC (rev 98) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/ordering/OrderBy.java 2006-10-18 13:40:13 UTC (rev 99) @@ -47,7 +47,7 @@ } /** - * over ridden from the Comparator class. + * overridden from the Comparator class. * * Performs the sort * @@ -59,27 +59,28 @@ for (int i = 0; i < orderbys.size(); i++) { final OrderColumn oc = (OrderColumn) orderbys.get(i); - - //null indicates "detail" record which is what the parser assigns to <column> 's setup outside of <record> elements - //shift all non detail records to the bottom of the DataSet - if (row0.getMdkey() != null && !row0.getMdkey().equals("detail")){ + + // null indicates "detail" record which is what the parser assigns + // to <column> 's setup outside of <record> elements + // shift all non detail records to the bottom of the DataSet + if (row0.getMdkey() != null && !row0.getMdkey().equals("detail")) { return 1; - }else if (row1.getMdkey() != null && !row1.getMdkey().equals("detail")){ + } else if (row1.getMdkey() != null && !row1.getMdkey().equals("detail")) { return 0; } - + // convert to one type of case so the comparator does not take case // into account when sorting final Comparable comp0 = row0.getValue(ParserUtils.findColumn(oc.getColumnName(), columnMD)).toLowerCase(); final Comparable comp1 = row1.getValue(ParserUtils.findColumn(oc.getColumnName(), columnMD)).toLowerCase(); -//+ BX will never be equal to null. -// if (comp0 == null) { -// comp0 = new String(""); -// } -// if (comp1 == null) { -// comp1 = new String(""); -// } + // + BX will never be equal to null. + // if (comp0 == null) { + // comp0 = new String(""); + // } + // if (comp1 == null) { + // comp1 = new String(""); + // } // multiply by the sort indicator to get a ASC or DESC result final int result = comp0.compareTo(comp1) * oc.getSortIndicator(); Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ExcelTransformer.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ExcelTransformer.java 2006-10-18 11:29:19 UTC (rev 98) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ExcelTransformer.java 2006-10-18 13:40:13 UTC (rev 99) @@ -22,7 +22,6 @@ import jxl.write.WritableFont; import jxl.write.WritableSheet; import jxl.write.WritableWorkbook; - import net.sf.pzfilereader.DataSet; /** Added: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/PZConstants.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/PZConstants.java (rev 0) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/PZConstants.java 2006-10-18 13:40:13 UTC (rev 99) @@ -0,0 +1,18 @@ +/** + * + */ +package net.sf.pzfilereader.util; + +/** + * @author xhensevb + * + */ +public final class PZConstants { + public static final String DETAIL_ID = "detail"; + + public static final String COL_IDX = "colIndex"; + + private PZConstants() { + + } +} Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-18 11:29:19 UTC (rev 98) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/util/ParserUtils.java 2006-10-18 13:40:13 UTC (rev 99) @@ -40,10 +40,7 @@ * @version 2.0 */ public final class ParserUtils { - private static final String DETAIL_ID = "detail"; - private ParserUtils() { - } /** @@ -73,11 +70,9 @@ line = lTrim(line); for (int i = 0; i < line.length(); i++) { final String remainderOfLine = line.substring(i); // data of the - // line which - // has not yet - // been - // read + // line which has not yet been read // check to see if there is a text qualifier + final char currentChar = line.charAt(i); if (qualifier != null && qualifier.trim().length() > 0) { if (line.substring(i, i + 1).equals(qualifier) && !beginQualifier && !beginNoQualifier) { // begining of a set of data @@ -98,7 +93,7 @@ beginNoQualifier = false; continue;// grab the next char } - sb.append(line.substring(i, i + 1)); + sb.append(currentChar); } else if ((!beginNoQualifier) && line.substring(i, i + 1).equals(qualifier) && beginQualifier && (lTrim(line.substring(i + 1)).length() == 0 // this will be true on empty undelmited columns at the @@ -130,7 +125,7 @@ } else if (beginNoQualifier || beginQualifier) { // getting data in a NO qualifier element or qualified // element - sb.append(line.substring(i, i + 1)); + sb.append(currentChar); } } else { @@ -139,7 +134,7 @@ list.add(sb.toString()); sb.delete(0, sb.length()); } else { - sb.append(line.substring(i, i + 1)); + sb.append(currentChar); } } } @@ -151,7 +146,6 @@ sb.delete(0, sb.length()); sb.append(s); } - } if (qualifier == null || qualifier.trim().length() == 0 || beginQualifier || beginNoQualifier @@ -180,14 +174,29 @@ */ public static int getDelimiterOffset(final String line, final int start, final String delimiter) { - int offset = 0; - for (int i = start; i < line.length(); i++) { - offset++; - if (line.substring(i, i + 1).equals(delimiter)) { - return offset; - } + int idx = line.indexOf(delimiter, start); + if (idx >= 0) { + // idx++; + // idx-=start; + idx -= start - 1; } - return -1; + return idx; + + // int offset = 0; + // for (int i = start; i < line.length(); i++) { + // offset++; + // if (line.substring(i, i + 1).equals(delimiter)) { + // if (offset != idx) { + // System.out.println("String [" + line + "] start:" + start + "(" + + // line.charAt(start) + ") delim [" + // + delimiter + "] length:" + delimiter.length() + " Old:" + offset + " + // new:" + idx); + // } + // + // return offset; + // } + // } + // return -1; } /** @@ -198,20 +207,34 @@ * @return String */ public static String lTrim(final String value) { - final StringBuffer returnVal = new StringBuffer(); - boolean gotAChar = false; + String trimmed = value; + int offset = 0; + final int maxLength = value.length(); + while (offset < maxLength && (value.charAt(offset) == ' ' || value.charAt(offset) == '\t')) { + offset++; + } - for (int i = 0; i < value.length(); i++) { - if (value.substring(i, i + 1).trim().length() == 0 && !gotAChar) { - continue; - } else { - gotAChar = true; - returnVal.append(value.substring(i, i + 1)); - } + if (offset > 0) { + trimmed = value.substring(offset); } - return returnVal.toString(); - + return trimmed; + // + // + // final StringBuffer returnVal = new StringBuffer(); + // boolean gotAChar = false; + // + // for (int i = 0; i < value.length(); i++) { + // if (value.substring(i, i + 1).trim().length() == 0 && !gotAChar) { + // continue; + // } else { + // gotAChar = true; + // returnVal.append(value.substring(i, i + 1)); + // } + // } + // + // return returnVal.toString(); + // } /** @@ -222,19 +245,32 @@ * @return String */ public static String lTrimKeepTabs(final String value) { - final StringBuffer returnVal = new StringBuffer(); - boolean gotAChar = false; + String trimmed = value; + int offset = 0; + final int maxLength = value.length(); + while (offset < maxLength && value.charAt(offset) == ' ') { + offset++; + } - for (int i = 0; i < value.length(); i++) { - if (!value.substring(i, i + 1).equals("\t") && value.substring(i, i + 1).trim().length() == 0 && !gotAChar) { - continue; - } else { - gotAChar = true; - returnVal.append(value.substring(i, i + 1)); - } + if (offset > 0) { + trimmed = value.substring(offset); } - return returnVal.toString(); + return trimmed; + // final StringBuffer returnVal = new StringBuffer(); + // boolean gotAChar = false; + // + // for (int i = 0; i < value.length(); i++) { + // if (!value.substring(i, i + 1).equals("\t") && value.substring(i, i + + // 1).trim().length() == 0 && !gotAChar) { + // continue; + // } else { + // gotAChar = true; + // returnVal.append(value.substring(i, i + 1)); + // } + // } + // + // return returnVal.toString(); } @@ -247,16 +283,25 @@ * string to search * @return String */ - public static String removeChar(final String character, final String theString) { + public static String removeChar(final char theChar, final String theString) { final StringBuffer s = new StringBuffer(); for (int i = 0; i < theString.length(); i++) { - if (theString.substring(i, i + 1).equalsIgnoreCase(character)) { - continue; + final char currentChar = theString.charAt(i); + if (currentChar != theChar) { + s.append(currentChar); } - s.append(theString.substring(i, i + 1)); } return s.toString(); + // final StringBuffer s = new StringBuffer(); + // for (int i = 0; i < theString.length(); i++) { + // if (theString.substring(i, i + 1).equalsIgnoreCase(character)) { + // continue; + // } + // s.append(theString.substring(i, i + 1)); + // } + // + // return s.toString(); } @@ -313,7 +358,8 @@ } } - columnMD.put(DETAIL_ID, results); + columnMD.put(PZConstants.DETAIL_ID, results); + columnMD.put(PZConstants.COL_IDX, buidColumnIndexMap(results)); return columnMD; } @@ -341,7 +387,8 @@ results.add(cmd); } - columnMD.put(DETAIL_ID, results); + columnMD.put(PZConstants.DETAIL_ID, results); + columnMD.put(PZConstants.COL_IDX, buidColumnIndexMap(results)); return columnMD; } @@ -401,6 +448,7 @@ * vector of ColumnMetaData objects * @return int - position of the column in the file * @throws NoSuchElementException + * @deprecated surely not... */ public static int findColumn(final String columnName, final List columnMD) { for (int i = 0; i < columnMD.size(); i++) { @@ -506,8 +554,8 @@ final Iterator columnMDIt = columnMD.keySet().iterator(); while (columnMDIt.hasNext()) { final String key = (String) columnMDIt.next(); - if (key.equals(DETAIL_ID)) { - cmds = (List) columnMD.get(key); + if (key.equals(PZConstants.DETAIL_ID) || key.equals(PZConstants.COL_IDX)) { + cmds = (List) columnMD.get(PZConstants.DETAIL_ID); } else { cmds = ((XMLRecordElement) columnMD.get(key)).getColumns(); } @@ -538,14 +586,14 @@ if (columnMD.size() == 1) { // no <RECORD> elments were specifed for this parse, just return the // detail id - return DETAIL_ID; + return PZConstants.DETAIL_ID; } final Iterator keys = columnMD.keySet().iterator(); // loop through the XMLRecordElement objects and see if we need a // different MD object while (keys.hasNext()) { final String key = (String) keys.next(); - if (key.equals(DETAIL_ID)) { + if (key.equals(PZConstants.DETAIL_ID) || key.equals(PZConstants.COL_IDX)) { continue; // skip this key will be assumed if none of the // others match } @@ -567,7 +615,7 @@ } // must be a detail line - return DETAIL_ID; + return PZConstants.DETAIL_ID; } @@ -584,14 +632,14 @@ if (columnMD.size() == 1) { // no <RECORD> elments were specifed for this parse, just return the // detail id - return DETAIL_ID; + return PZConstants.DETAIL_ID; } final Iterator keys = columnMD.keySet().iterator(); // loop through the XMLRecordElement objects and see if we need a // different MD object while (keys.hasNext()) { final String key = (String) keys.next(); - if (key.equals(DETAIL_ID)) { + if (key.equals(PZConstants.DETAIL_ID) || key.equals(PZConstants.COL_IDX)) { continue; // skip this key will be assumed if none of the // others match } @@ -610,7 +658,7 @@ } // must be a detail line - return DETAIL_ID; + return PZConstants.DETAIL_ID; } /** @@ -621,12 +669,41 @@ * @return List */ public static List getColumnMetaData(final String key, final Map columnMD) { - if (key == null || key.equals(DETAIL_ID)) { - return (List) columnMD.get(DETAIL_ID); + if (key == null || key.equals(PZConstants.DETAIL_ID) || key.equals(PZConstants.COL_IDX)) { + return (List) columnMD.get(PZConstants.DETAIL_ID); } return ((XMLRecordElement) columnMD.get(key)).getColumns(); + } + /** + * Use this method to find the index of a column. + * @author Benoit Xhenseval + * @param key + * @param columnMD + * @param colName + * @return -1 if it does not find it + */ + public static int getColumnIndex(final String key, final Map columnMD, final String colName) { + int idx = -1; + if (key != null && !key.equals(PZConstants.DETAIL_ID) && !key.equals(PZConstants.COL_IDX)) { + // if ("header".equals(key)) { + // System.out.println("Columsn====header == "+ ((XMLRecordElement) + // columnMD.get(key)).getColumns()); + // } + idx = ((XMLRecordElement) columnMD.get(key)).getColumnIndex(colName); + } else if (key == null || key.equals(PZConstants.DETAIL_ID)) { + final Map map = (Map) columnMD.get(PZConstants.COL_IDX); + // System.out.println("Map == " + map); + // System.out.println("look for == " + colName); + idx = ((Integer) map.get(colName)).intValue(); + // System.out.println("-------------> " + idx); + } + + if (idx < 0) { + throw new NoSuchElementException("Column " + colName + " does not exist, check case/spelling. key:" + key); + } + return idx; } /** @@ -675,28 +752,33 @@ } catch (final Exception ignore) { } } - + /** - * <p>Returns padding using the specified delimiter repeated - * to a given length.</p> - * + * <p> + * Returns padding using the specified delimiter repeated to a given length. + * </p> + * * <pre> - * StringUtils.padding(0, 'e') = "" - * StringUtils.padding(3, 'e') = "eee" - * StringUtils.padding(-2, 'e') = IndexOutOfBoundsException + * StringUtils.padding(0, 'e') = "" + * StringUtils.padding(3, 'e') = "eee" + * StringUtils.padding(-2, 'e') = IndexOutOfBoundsException * </pre> - * - * <p>Note: this method doesn't not support padding with - * <a href="http://www.unicode.org/glossary/#supplementary_character">Unicode Supplementary Characters</a> - * as they require a pair of <code>char</code>s to be represented. - * If you are needing to support full I18N of your applications - * consider using {@link #repeat(String, int)} instead. + * + * <p> + * Note: this method doesn't not support padding with <a + * href="http://www.unicode.org/glossary/#supplementary_character">Unicode + * Supplementary Characters</a> as they require a pair of <code>char</code>s + * to be represented. If you are needing to support full I18N of your + * applications consider using {@link #repeat(String, int)} instead. * </p> - * - * @param repeat number of times to repeat delim - * @param padChar character to repeat + * + * @param repeat + * number of times to repeat delim + * @param padChar + * character to repeat * @return String with repeated character - * @throws IndexOutOfBoundsException if <code>repeat < 0</code> + * @throws IndexOutOfBoundsException + * if <code>repeat < 0</code> * @see #repeat(String, int) */ public static String padding(final int repeat, final char padChar) { @@ -708,5 +790,24 @@ buf[i] = padChar; } return new String(buf); - } + } + + /** + * Build a map of name/position based on a list of ColumnMetaData. + * @author Benoit Xhenseval + * @param columns + * @return a new Map + */ + public static Map buidColumnIndexMap(final List columns) { + Map map = null; + if (columns != null && !columns.isEmpty()) { + map = new HashMap(); + int idx = 0; + for (final Iterator it = columns.iterator(); it.hasNext(); idx++) { + final ColumnMetaData meta = (ColumnMetaData) it.next(); + map.put(meta.getColName(), Integer.valueOf(idx)); + } + } + return map; + } } Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/xml/PZMapParser.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/xml/PZMapParser.java 2006-10-18 11:29:19 UTC (rev 98) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/xml/PZMapParser.java 2006-10-18 13:40:13 UTC (rev 99) @@ -22,22 +22,21 @@ import java.util.List; import java.util.Map; +import net.sf.pzfilereader.structure.ColumnMetaData; +import net.sf.pzfilereader.util.PZConstants; +import net.sf.pzfilereader.util.ParserUtils; + import org.jdom.Attribute; import org.jdom.Document; import org.jdom.Element; import org.jdom.input.SAXBuilder; -import net.sf.pzfilereader.structure.ColumnMetaData; -import net.sf.pzfilereader.util.ParserUtils; - /** * @author zepernick * * Parses a PZmap definition XML file */ public final class PZMapParser { - private static final String DETAIL_ID = "detail"; - private static boolean showDebug = false; /** @@ -91,8 +90,10 @@ List columns = getColumnChildren(root); final Map mdIndex = new LinkedHashMap(); // retain the same order // specified in the mapping - mdIndex.put(DETAIL_ID, columns); // always force detail to the top of - // the map no matter what + mdIndex.put(PZConstants.DETAIL_ID, columns); // always force detail + // to the top of + // the map no matter what + mdIndex.put(PZConstants.COL_IDX, ParserUtils.buidColumnIndexMap(columns)); // get all of the "record" elements and the columns under them final Iterator recordDescriptors = root.getChildren("RECORD").iterator(); @@ -103,7 +104,7 @@ // is the harcoded // value we are using to mark columns specified outside of a // <RECORD> element - if (xmlElement.getAttributeValue("id").equals(DETAIL_ID)) { + if (xmlElement.getAttributeValue("id").equals(PZConstants.DETAIL_ID)) { throw new Exception("The ID 'detail' on the <RECORD> element is reserved, please select another id"); } @@ -185,7 +186,7 @@ XMLRecordElement xmlrecEle = null; final String recordID = (String) mapIt.next(); Iterator columns = null; - if (recordID.equals(DETAIL_ID)) { + if (recordID.equals(PZConstants.DETAIL_ID)) { columns = ((List) xmlResults.get(recordID)).iterator(); } else { xmlrecEle = (XMLRecordElement) xmlResults.get(recordID); Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/xml/ResolveLocalDTD.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/xml/ResolveLocalDTD.java 2006-10-18 11:29:19 UTC (rev 98) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/xml/ResolveLocalDTD.java 2006-10-18 13:40:13 UTC (rev 99) @@ -37,7 +37,7 @@ public InputSource resolveEntity(final String publicId, final String systemId) throws SAXException, IOException { try { if (!systemId.toLowerCase().startsWith("http://")) { - URL resource = getClass().getResource("pzfilereader.dtd"); + final URL resource = getClass().getResource("pzfilereader.dtd"); if (resource != null) { return new InputSource(resource.openStream()); Modified: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/xml/XMLRecordElement.java =================================================================== --- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/xml/XMLRecordElement.java 2006-10-18 11:29:19 UTC (rev 98) +++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/xml/XMLRecordElement.java 2006-10-18 13:40:13 UTC (rev 99) @@ -15,7 +15,10 @@ package net.sf.pzfilereader.xml; import java.util.List; +import java.util.Map; +import net.sf.pzfilereader.util.ParserUtils; + /** * @author Paul Zepernick * @@ -32,6 +35,8 @@ private List columns; + private Map columnIndex; + /** * @return Returns the elementNumber. */ @@ -105,5 +110,23 @@ */ public void setColumns(final List columns) { this.columns = columns; + this.columnIndex = ParserUtils.buidColumnIndexMap(columns); } + + /** + * Returns the index of the column name. + * @author Benoit Xhenseval + * @param colName + * @return -1 if the column name does not exist. + */ + public int getColumnIndex(final String colName) { + int idx = -1; + if (columnIndex != null) { + final Integer i = (Integer) columnIndex.get(colName); + if (i != null) { + idx = i.intValue(); + } + } + return idx; + } } Modified: trunk/PZFileReaderSamples/.classpath =================================================================== --- trunk/PZFileReaderSamples/.classpath 2006-10-18 11:29:19 UTC (rev 98) +++ trunk/PZFileReaderSamples/.classpath 2006-10-18 13:40:13 UTC (rev 99) @@ -1,10 +1,9 @@ <?xml version="1.0" encoding="UTF-8"?> - <classpath> - <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"></classpathentry> - <classpathentry excluding="" kind="src" path="src/main/java"></classpathentry> - <classpathentry path="MAVEN_REPO/net.sf.pzfilereader/jars/pzfilereader-2.3.0.jar" kind="var"></classpathentry> - <classpathentry path="MAVEN_REPO/jdom/jars/jdom-1.0.jar" kind="var"></classpathentry> - <classpathentry path="MAVEN_REPO/jexcelapi/jars/jxl-2.4.2.jar" kind="var"></classpathentry> - <classpathentry kind="output" path="target/classes"></classpathentry> -</classpath> \ No newline at end of file + <classpathentry path="org.eclipse.jdt.launching.JRE_CONTAINER" kind="con"/> + <classpathentry path="src/main/java" kind="src"/> + <classpathentry sourcepath="/PZ_SRC_HOME" path="MAVEN_REPO/net.sf.pzfilereader/jars/pzfilereader-2.3.0.jar" kind="var"/> + <classpathentry path="MAVEN_REPO/jdom/jars/jdom-1.0.jar" kind="var"/> + <classpathentry path="MAVEN_REPO/jexcelapi/jars/jxl-2.4.2.jar" kind="var"/> + <classpathentry path="target/classes" kind="output"/> +</classpath> Modified: trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/Examples.java =================================================================== --- trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/Examples.java 2006-10-18 11:29:19 UTC (rev 98) +++ trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/Examples.java 2006-10-18 13:40:13 UTC (rev 99) @@ -67,6 +67,7 @@ menu.addMenuItem("LowLevelParse", "doLowLevelParse", false); menu.addMenuItem("DelimitedMultiLine", "doDelimitedMultiLine", false); menu.addMenuItem("NumericsAndDates", "doNumericsAndDates", false); + menu.addMenuItem("Ask for GC", "doGC", false); menu.addMenuItem("Who you gonna call?", "doCall", false); menu.displayMenu(); @@ -91,8 +92,9 @@ public void doCSVPerformanceTest() { try { final String mapping = ConsoleMenu.getString("CSV File ", "SampleCSV.csv"); - final boolean data = ConsoleMenu.getBoolean("Verbose", false); - CSVPerformanceTest.call(mapping,data); + final boolean data = ConsoleMenu.getBoolean("Traverse the entire parsed file", true); + final boolean verbose = ConsoleMenu.getBoolean("Verbose", false); + CSVPerformanceTest.call(mapping,verbose,data); } catch (Exception e) { e.printStackTrace(); } @@ -212,4 +214,8 @@ } } + public void doGC() { + System.gc(); + } + } Modified: trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/csvperformancetest/CSVPerformanceTest.java =================================================================== --- trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/csvperformancetest/CSVPerformanceTest.java 2006-10-18 11:29:19 UTC (rev 98) +++ trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/csvperformancetest/CSVPerformanceTest.java 2006-10-18 13:40:13 UTC (rev 99) @@ -34,19 +34,20 @@ String filename = (String) settings.get("csvFile"); String verbose = (String) settings.get("verbose"); - call(filename, Boolean.valueOf(verbose).booleanValue()); + call(filename, Boolean.valueOf(verbose).booleanValue(), true); } catch (final Exception ex) { ex.printStackTrace(); } } - public static void call(String filename, boolean verbose) throws Exception, InterruptedException { + public static void call(String filename, boolean verbose, boolean traverse) throws Exception, InterruptedException { DataSet ds = null; String[] colNames = null; // delimited by a comma // text qualified by double quotes // ignore first record + System.out.println("Parsing...."); long timeStarted = System.currentTimeMillis(); ds = new DataSet(new File(filename), ",", "\"", false); long timeFinished = System.currentTimeMillis(); @@ -56,35 +57,46 @@ if (timeFinished - timeStarted < 1000) { timeMessage = (timeFinished - timeStarted) + " Milleseconds..."; } else { - timeMessage = ((timeFinished - timeStarted) / 1000) + " Seconds..."; + timeMessage = ((float) ((timeFinished - timeStarted) / 1000.0)) + " Seconds..."; } System.out.println(""); System.out.println("********FILE PARSED IN: " + timeMessage + " ******"); - Thread.sleep(2000); // sleep for a couple seconds to the message - // above can be read - if (verbose) { + if (traverse) { + if (verbose) { + Thread.sleep(2000); // sleep for a couple seconds to the message + // above can be read + } timeStarted = System.currentTimeMillis(); colNames = ds.getColumns(); - + int rowCount = 0; + int colCount = colNames.length; while (ds.next()) { + rowCount++; for (int i = 0; i < colNames.length; i++) { - System.out.println("COLUMN NAME: " + colNames[i] + " VALUE: " + ds.getString(colNames[i])); + String string = ds.getString(colNames[i]); + + if (verbose) { + System.out.println("COLUMN NAME: " + colNames[i] + " VALUE: " + string); + } } - System.out.println("==========================================================================="); + if (verbose) { + System.out.println("==========================================================================="); + } } timeFinished = System.currentTimeMillis(); if (timeFinished - timeStarted < 1000) { timeMessage = (timeFinished - timeStarted) + " Milleseconds..."; } else { - timeMessage = ((timeFinished - timeStarted) / 1000) + " Seconds..."; + timeMessage = ((float)((timeFinished - timeStarted) / 1000.0)) + " Seconds..."; } System.out.println(""); - System.out.println("********Displayed Data To Console In: " + timeMessage + " ******"); + System.out.println("********Traversed Data In: " + timeMessage + " (rows: " + rowCount + " Col:" + colCount + + ") ******"); } Modified: trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/delimiteddynamiccolumns/DelimitedWithPZMap.java =================================================================== --- trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/delimiteddynamiccolumns/DelimitedWithPZMap.java 2006-10-18 11:29:19 UTC (rev 98) +++ trunk/PZFileReaderSamples/src/main/java/net/sf/pzfilereader/examples/delimiteddynamiccolumns/DelimitedWithPZMap.java 2006-10-18 13:40:13 UTC (rev 99) @@ -39,7 +39,6 @@ // text qualified by double quotes // ignore first record DataSet ds = null; - String[] colNames = null; OrderBy orderby = null; ds = new DataSet(new File(mapping), new File(data), ",", "\"", true, false); @@ -49,7 +48,7 @@ orderby.addOrderColumn(new OrderColumn("LASTNAME", true)); // ds.orderRows(orderby); - colNames = ds.getColumns(); + String[] colNames = ds.getColumns(); while (ds.next()) { for (int i = 0; i < colNames.length; i++) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |