|
From: <be...@us...> - 2006-10-17 16:52:09
|
Revision: 71
http://svn.sourceforge.net/pzfilereader/?rev=71&view=rev
Author: benoitx
Date: 2006-10-17 09:51:42 -0700 (Tue, 17 Oct 2006)
Log Message:
-----------
Moved remotely
Added Paths:
-----------
trunk/PZFileReader/src/main/java/net/sf/pzfilereader/LargeDataSet.java
Removed Paths:
-------------
trunk/PZFileReader/src/main/java/com/pz/reader/LargeDataSet.java
Deleted: trunk/PZFileReader/src/main/java/com/pz/reader/LargeDataSet.java
===================================================================
--- trunk/PZFileReader/src/main/java/com/pz/reader/LargeDataSet.java 2006-10-17 16:51:10 UTC (rev 70)
+++ trunk/PZFileReader/src/main/java/com/pz/reader/LargeDataSet.java 2006-10-17 16:51:42 UTC (rev 71)
@@ -1,563 +0,0 @@
-/*
- Copyright 2006 Paul Zepernick
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software distributed
- under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- CONDITIONS OF ANY KIND, either express or implied. See the License for
- the specific language governing permissions and limitations under the License.
- */
-package com.pz.reader;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import com.pz.reader.ordering.OrderBy;
-import com.pz.reader.structure.ColumnMetaData;
-import com.pz.reader.structure.Row;
-import com.pz.reader.util.ParserUtils;
-import com.pz.reader.xml.PZMapParser;
-
-/**
- * @author Paul Zepernick
- *
- * Provides limited DataSet functionality for large files. This will not read
- * the file into memory. The following methods have been disabled previous(),
- * goTop(), goBottom(), remove(), getIndex(), absolute(), orderRows()
- */
-public class LargeDataSet extends DataSet {
-
- private static final String DELIMITED_FILE = "delimited";
-
- private static final String FIXEDLENGTH_FILE = "fixed";
-
- private String fileType; // file type being parsed
-
- private BufferedReader br = null; // reader used to read the file
-
- private InputStreamReader isr = null;
-
- private InputStream is = null; // stream used to read the file
-
- private int lineCount = 0; // keeps track of the current line being
- // procssed in the file
-
- // used for delimited files
- private boolean ignoreFirstRecord = false;
-
- private boolean createMDFromFile = false;
-
- private boolean processedFirst = false;
-
- private String delimiter = null;
-
- private String qualifier = null;
-
- private int columnCount = 0;
-
- /** used for fixed length files, map of record lengths corresponding to the
- * ID's in the columnMD array.
- */
- private Map recordLengths = null;
-
- /**
- * Constructor based on InputStream. Constructs a new LargeDataSet using the
- * PZMAP XML file layout method. This is used for a DELIMITED text file.
- * esacpe sequence reference:<br>
- * \n newline <br>
- * \t tab <br>
- * \b backspace <br>
- * \r return <br>
- * \f form feed <br> \\ backslash <br> \' single quote <br> \" double quote
- *
- * @param pzmapXMLStream -
- * Reference to the xml file holding the pzmap
- * @param dataSourceStream -
- * text file datasource InputStream to read from
- * @param delimiter -
- * Char the file is delimited By
- * @param qualifier -
- * Char text is qualified by
- * @param ignoreFirstRecord -
- * skips the first line that contains data in the file
- * @param handleShortLines -
- * Adds missing columns as empty's to the DataSet instead of
- * logging them as an error
- * @exception Exception
- */
- public LargeDataSet(final InputStream pzmapXMLStream, final InputStream dataSourceStream, final String delimiter,
- final String qualifier, final boolean ignoreFirstRecord, final boolean handleShortLines) throws Exception {
-
- this.fileType = DELIMITED_FILE;
- this.is = dataSourceStream;
- this.isr = new InputStreamReader(is);
- this.br = new BufferedReader(this.isr);
- this.delimiter = delimiter;
- this.qualifier = qualifier;
- this.ignoreFirstRecord = ignoreFirstRecord;
- setHandleShortLines(handleShortLines);
- setColumnMD(PZMapParser.parse(pzmapXMLStream));
-
- }
-
- /**
- * Constructs a new LargeDataSet using the first line of data found in the
- * text file as the column names. This is used for a DELIMITED text file.
- * esacpe sequence reference:<br>
- * \n newline <br>
- * \t tab <br>
- * \b backspace <br>
- * \r return <br>
- * \f form feed <br> \\ backslash <br> \' single quote <br> \" double quote
- *
- * @param dataSource -
- * text file datasource to read from
- * @param delimiter -
- * Char the file is delimited By
- * @param qualifier -
- * Char text is qualified by
- * @param handleShortLines -
- * when flaged as true, lines with less columns then the amount
- * of column headers will be added as empty's instead of
- * producing an error
- * @exception Exception
- */
- public LargeDataSet(final File dataSource, final String delimiter, final String qualifier, final boolean handleShortLines)
- throws Exception {
- this(ParserUtils.createInputStream(dataSource), delimiter, qualifier, handleShortLines);
- }
-
- /**
- * Constructs a new LargeDataSet using the first line of data found in the
- * text file as the column names. This is used for a DELIMITED text file.
- * esacpe sequence reference:<br>
- * \n newline <br>
- * \t tab <br>
- * \b backspace <br>
- * \r return <br>
- * \f form feed <br> \\ backslash <br> \' single quote <br> \" double quote
- *
- * @param dataSource -
- * text file datasource to read from
- * @param delimiter -
- * Char the file is delimited By
- * @param qualifier -
- * Char text is qualified by
- * @param handleShortLines -
- * when flaged as true, lines with less columns then the amount
- * of column headers will be added as empty's instead of
- * producing an error
- * @exception Exception
- */
- public LargeDataSet(final InputStream dataSource, final String delimiter, final String qualifier,
- final boolean handleShortLines) throws Exception {
- this.fileType = DELIMITED_FILE;
- setHandleShortLines(handleShortLines);
- this.is = dataSource;
- this.isr = new InputStreamReader(is);
- this.br = new BufferedReader(this.isr);
- this.delimiter = delimiter;
- this.qualifier = qualifier;
- this.ignoreFirstRecord = false;
- this.createMDFromFile = true;
- }
-
- /**
- * New constructor based on InputStream. Constructs a new LargeDataSet using
- * the PZMAP XML file layout method. This is used for a FIXED LENGTH text
- * file.
- *
- * @param pzmapXMLStream -
- * Reference to the xml file InputStream holding the pzmap
- * @param dataSourceStream -
- * Delimited file InputStream to read from
- * @param handleShortLines -
- * Pad lines out to fit the fixed length
- * @exception Exception
- */
- public LargeDataSet(final InputStream pzmapXMLStream, final InputStream dataSourceStream, final boolean handleShortLines)
- throws Exception {
- this.fileType = FIXEDLENGTH_FILE;
- this.is = dataSourceStream;
- this.isr = new InputStreamReader(is);
- this.br = new BufferedReader(this.isr);
- setHandleShortLines(handleShortLines);
-
- setColumnMD(PZMapParser.parse(pzmapXMLStream));
-
- }
-
- /**
- * Loads up the next record from the file, returns false if EOF
- *
- * @return boolean
- */
- public boolean next() {
- try {
- if (this.fileType.equals(DELIMITED_FILE)) {
- return readNextDelimited();
- }
-
- // assume fixed length file
- return readNextFixedLen();
- } catch (final Exception ex) {
- throw new RuntimeException(ex);
- } finally {
- // Advance the pointer in the DataSet to the first position if
- // needed
- // we only throw one record into the array at a time
- if (super.getIndex() == -1) {
- super.next();
- }
- }
- }
-
- /**
- * Not Supported!
- *
- * @return boolean
- */
- public boolean previous() {
- throw new RuntimeException("previous() is Not Implemented For LargeDataSet");
- }
-
- /**
- * Not Supported!
- *
- * @param excelFileToBeWritten
- * @exception Exception
- */
- public void writeToExcel(final File excelFileToBeWritten) throws Exception {
- throw new RuntimeException("writeToExcel() is Not Implemented For LargeDataSet");
- }
-
- /**
- * Not Supported!
- *
- * @param ob -
- * OrderBy object
- * @exception Exception
- * @see com.pz.reader.ordering.OrderBy
- * @see com.pz.reader.ordering.OrderColumn
- */
- public void orderRows(final OrderBy ob) throws Exception {
- throw new RuntimeException("orderRows() is Not Implemented For LargeDataSet");
- }
-
- /**
- * Not Supported!
- *
- * @param localPointer -
- * int
- * @exception IndexOutOfBoundsException
- */
- public void absolute(final int localPointer) {
- throw new RuntimeException("absolute() is Not Implemented For LargeDataSet");
- }
-
- /**
- * Not Supported!
- */
- public void remove() {
- throw new RuntimeException("remove() is Not Implemented For LargeDataSet");
- }
-
- /**
- * Not Supported!
- *
- * @return int
- */
- public int getIndex() {
- throw new RuntimeException("getIndex() is Not Implemented For LargeDataSet");
- }
-
- /**
- * Not Supported!
- */
- public void goBottom() {
- throw new RuntimeException("goBottom() is Not Implemented For LargeDataSet");
- }
-
- /**
- * Not Supported!
- */
- public void goTop() {
- throw new RuntimeException("goTop() is Not Implemented For LargeDataSet");
- }
-
- /**
- * Not Supported!
- *
- * @param columnName -
- * String Name of the column
- * @param value -
- * String value to assign to the column.
- * @exception Exception -
- * exception will be thrown if pointer in not on a valid row
- */
- public void setValue(final String columnName, final String value) throws Exception {
- throw new RuntimeException("setValue() is Not Implemented For LargeDataSet");
-
- }
-
- /**
- * erases the dataset and releases memory for the JVM to reclaim. This will
- * also close out the readers used to read the file in.
- */
- public void freeMemory() {
- super.freeMemory();
-
- ParserUtils.closeReader(br);
- ParserUtils.closeReader(isr);
- ParserUtils.closeReader(is);
- }
-
- // reads the next record and sets it into the row array
- private boolean readNextDelimited() throws Exception {
- String line = null;
- Row row = null;
- List columns = null;
- boolean processingMultiLine = false;
- boolean readRecordOk = false;
- String lineData = "";
- String mdkey = null;
- List cmds = null;
-
- if (getRows() == null) {
- setRows(new ArrayList());
- }
- if (getErrors() == null) {
- setErrors(new ArrayList());
- }
-
- getRows().clear();
- /** loop through each line in the file */
- while ((line = br.readLine()) != null) {
- lineCount++;
- /** empty line skip past it */
- if (!processingMultiLine && line.trim().length() == 0) {
- continue;
- }
- // check to see if the user has elected to skip the first record
- if (!this.processedFirst && this.ignoreFirstRecord) {
- this.processedFirst = true;
- continue;
- } else if (!this.processedFirst && this.createMDFromFile) {
- // column names are coming from inside the CSV file
- this.processedFirst = true;
- setColumnMD(ParserUtils.getColumnMDFromFile(line, delimiter, qualifier));
- continue;
- }
-
- // ********************************************************
- // new functionality as of 2.1.0 check to see if we have
- // any line breaks in the middle of the record, this will only
- // be checked if we have specified a delimiter
- // ********************************************************
- final char[] chrArry = line.trim().toCharArray();
- if (!processingMultiLine && delimiter != null && delimiter.trim().length() > 0) {
- processingMultiLine = ParserUtils.isMultiLine(chrArry, delimiter, qualifier);
- }
-
- // check to see if we have reached the end of the linebreak in the
- // record
-
- if (processingMultiLine && lineData.trim().length() > 0) {
- // need to do one last check here. it is possible that the "
- // could be part of the data
- // excel will escape these with another quote; here is some data
- // "" This would indicate
- // there is more to the multiline
- if (line.trim().endsWith(qualifier) && !line.trim().endsWith(qualifier + qualifier)) {
- // it is safe to assume we have reached the end of the line
- // break
- processingMultiLine = false;
- if (lineData.trim().length() > 0) {
- lineData += "\r\n";
- }
- lineData += line;
- } else {
-
- // check to see if this is the last line of the record
- // looking for a qualifier followed by a delimiter
- if (lineData.trim().length() > 0) {
- lineData += "\r\n";
- }
- lineData += line;
- boolean qualiFound = false;
- for (int i = 0; i < chrArry.length; i++) {
- if (qualiFound) {
- if (chrArry[i] == ' ') {
- continue;
- } else {
- // not a space, if this char is the delimiter,
- // then we have reached the end of
- // the record
- if (chrArry[i] == delimiter.charAt(0)) {
- // processingMultiLine = false;
- // fix put in, setting to false caused bug
- // when processing multiple multi-line
- // columns on the same record
- processingMultiLine = ParserUtils.isMultiLine(chrArry, delimiter, qualifier);
- break;
- }
- qualiFound = false;
- continue;
- }
- } else if (chrArry[i] == qualifier.charAt(0)) {
- qualiFound = true;
- }
- }
- // check to see if we are still in multi line mode, if so
- // grab the next line
- if (processingMultiLine) {
- continue;
- }
- }
- } else {
- // throw the line into lineData var.
- lineData += line;
- if (processingMultiLine) {
- continue; // if we are working on a multiline rec, get the
- // data on the next line
- }
- }
- // ********************************************************************
- // end record line break logic
- // ********************************************************************
-
- // column values
- columns = ParserUtils.splitLine(lineData, this.delimiter, this.qualifier);
- lineData = "";
- mdkey = ParserUtils.getCMDKeyForDelimitedFile(getColumnMD(), columns);
- cmds = ParserUtils.getColumnMetaData(mdkey, getColumnMD());
- this.columnCount = cmds.size();
- // DEBUG
-
- // Incorrect record length on line log the error. Line
- // will not be included in the dataset
- if (columns.size() > columnCount) {
- // log the error
- addError("TOO MANY COLUMNS WANTED: " + columnCount + " GOT: " + columns.size(), lineCount, 2);
- continue;
- } else if (columns.size() < columnCount) {
- if (isHandleShortLines()) {
- // We can pad this line out
- while (columns.size() < columnCount) {
- columns.add("");
- }
-
- // log a warning
- addError("PADDED LINE TO CORRECT NUMBER OF COLUMNS", lineCount, 1);
-
- } else {
- addError("TOO FEW COLUMNS WANTED: " + columnCount + " GOT: " + columns.size(), lineCount, 2);
- continue;
- }
- }
-
- row = new Row();
- row.setCols(columns);
- row.setRowNumber(lineCount);
- // with the LargeDataSet we are never going to store more than 1
- // record in memory
- getRows().add(row);
- readRecordOk = true;
- break;
- }
-
- return readRecordOk;
-
- }
-
- private boolean readNextFixedLen() throws Exception {
- String line = null;
- Row row = null;
- int recordLength = 0;
- final int aLineCount = 0;
- int recPosition = 0;
- String mdkey = null;
- List cmds = null;
- boolean readRecordOk = false;
-
- if (getRows() == null) {
- setRows(new ArrayList());
- }
- if (getErrors() == null) {
- setErrors(new ArrayList());
- }
-
- if (this.recordLengths == null) {
- this.recordLengths = ParserUtils.calculateRecordLengths(getColumnMD());
- }
-
- getRows().clear();
- // loop through each line in the file
- while ((line = br.readLine()) != null) {
- this.lineCount++;
- // empty line skip past it
- if (line.trim().length() == 0) {
- continue;
- }
-
- mdkey = ParserUtils.getCMDKeyForFixedLengthFile(getColumnMD(), line);
- recordLength = ((Integer) recordLengths.get(mdkey)).intValue();
- cmds = ParserUtils.getColumnMetaData(mdkey, getColumnMD());
-
- // Incorrect record length on line log the error. Line will not be
- // included in the
- // dataset
- if (line.length() > recordLength) {
- addError("LINE TOO LONG. LINE IS " + line.length() + " LONG. SHOULD BE " + recordLength, aLineCount, 2);
- continue;
- } else if (line.length() < recordLength) {
- if (isHandleShortLines()) {
- // We can pad this line out
- while (line.length() < recordLength) {
- line += ParserUtils.padding(recordLength-line.length(), ' ');
- }
-
- // log a warning
- addError("PADDED LINE TO CORRECT RECORD LENGTH", aLineCount, 1);
-
- } else {
- addError("LINE TOO SHORT. LINE IS " + line.length() + " LONG. SHOULD BE " + recordLength, aLineCount, 2);
- continue;
- }
- }
-
- recPosition = 1;
- row = new Row();
- row.setMdkey(mdkey.equals("detail") ? null : mdkey); // try to
- // limit the
- // memory
- // use
- // Build the columns for the row
- for (int i = 0; i < cmds.size(); i++) {
- String tempValue = null;
- tempValue = line.substring(recPosition - 1, recPosition + (((ColumnMetaData) cmds.get(i)).getColLength() - 1));
- recPosition += ((ColumnMetaData) cmds.get(i)).getColLength();
- row.addColumn(tempValue.trim());
- }
- row.setRowNumber(aLineCount);
- // add the row to the array
- getRows().add(row);
-
- readRecordOk = true;
- break;
- }
-
- return readRecordOk;
-
- }
-}
Copied: trunk/PZFileReader/src/main/java/net/sf/pzfilereader/LargeDataSet.java (from rev 70, trunk/PZFileReader/src/main/java/com/pz/reader/LargeDataSet.java)
===================================================================
--- trunk/PZFileReader/src/main/java/net/sf/pzfilereader/LargeDataSet.java (rev 0)
+++ trunk/PZFileReader/src/main/java/net/sf/pzfilereader/LargeDataSet.java 2006-10-17 16:51:42 UTC (rev 71)
@@ -0,0 +1,563 @@
+/*
+ Copyright 2006 Paul Zepernick
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software distributed
+ under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ CONDITIONS OF ANY KIND, either express or implied. See the License for
+ the specific language governing permissions and limitations under the License.
+ */
+package com.pz.reader;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import com.pz.reader.ordering.OrderBy;
+import com.pz.reader.structure.ColumnMetaData;
+import com.pz.reader.structure.Row;
+import com.pz.reader.util.ParserUtils;
+import com.pz.reader.xml.PZMapParser;
+
+/**
+ * @author Paul Zepernick
+ *
+ * Provides limited DataSet functionality for large files. This will not read
+ * the file into memory. The following methods have been disabled previous(),
+ * goTop(), goBottom(), remove(), getIndex(), absolute(), orderRows()
+ */
+public class LargeDataSet extends DataSet {
+
+ private static final String DELIMITED_FILE = "delimited";
+
+ private static final String FIXEDLENGTH_FILE = "fixed";
+
+ private String fileType; // file type being parsed
+
+ private BufferedReader br = null; // reader used to read the file
+
+ private InputStreamReader isr = null;
+
+ private InputStream is = null; // stream used to read the file
+
+ private int lineCount = 0; // keeps track of the current line being
+ // procssed in the file
+
+ // used for delimited files
+ private boolean ignoreFirstRecord = false;
+
+ private boolean createMDFromFile = false;
+
+ private boolean processedFirst = false;
+
+ private String delimiter = null;
+
+ private String qualifier = null;
+
+ private int columnCount = 0;
+
+ /** used for fixed length files, map of record lengths corresponding to the
+ * ID's in the columnMD array.
+ */
+ private Map recordLengths = null;
+
+ /**
+ * Constructor based on InputStream. Constructs a new LargeDataSet using the
+ * PZMAP XML file layout method. This is used for a DELIMITED text file.
+ * esacpe sequence reference:<br>
+ * \n newline <br>
+ * \t tab <br>
+ * \b backspace <br>
+ * \r return <br>
+ * \f form feed <br> \\ backslash <br> \' single quote <br> \" double quote
+ *
+ * @param pzmapXMLStream -
+ * Reference to the xml file holding the pzmap
+ * @param dataSourceStream -
+ * text file datasource InputStream to read from
+ * @param delimiter -
+ * Char the file is delimited By
+ * @param qualifier -
+ * Char text is qualified by
+ * @param ignoreFirstRecord -
+ * skips the first line that contains data in the file
+ * @param handleShortLines -
+ * Adds missing columns as empty's to the DataSet instead of
+ * logging them as an error
+ * @exception Exception
+ */
+ public LargeDataSet(final InputStream pzmapXMLStream, final InputStream dataSourceStream, final String delimiter,
+ final String qualifier, final boolean ignoreFirstRecord, final boolean handleShortLines) throws Exception {
+
+ this.fileType = DELIMITED_FILE;
+ this.is = dataSourceStream;
+ this.isr = new InputStreamReader(is);
+ this.br = new BufferedReader(this.isr);
+ this.delimiter = delimiter;
+ this.qualifier = qualifier;
+ this.ignoreFirstRecord = ignoreFirstRecord;
+ setHandleShortLines(handleShortLines);
+ setColumnMD(PZMapParser.parse(pzmapXMLStream));
+
+ }
+
+ /**
+ * Constructs a new LargeDataSet using the first line of data found in the
+ * text file as the column names. This is used for a DELIMITED text file.
+ * esacpe sequence reference:<br>
+ * \n newline <br>
+ * \t tab <br>
+ * \b backspace <br>
+ * \r return <br>
+ * \f form feed <br> \\ backslash <br> \' single quote <br> \" double quote
+ *
+ * @param dataSource -
+ * text file datasource to read from
+ * @param delimiter -
+ * Char the file is delimited By
+ * @param qualifier -
+ * Char text is qualified by
+ * @param handleShortLines -
+ * when flaged as true, lines with less columns then the amount
+ * of column headers will be added as empty's instead of
+ * producing an error
+ * @exception Exception
+ */
+ public LargeDataSet(final File dataSource, final String delimiter, final String qualifier, final boolean handleShortLines)
+ throws Exception {
+ this(ParserUtils.createInputStream(dataSource), delimiter, qualifier, handleShortLines);
+ }
+
+ /**
+ * Constructs a new LargeDataSet using the first line of data found in the
+ * text file as the column names. This is used for a DELIMITED text file.
+ * esacpe sequence reference:<br>
+ * \n newline <br>
+ * \t tab <br>
+ * \b backspace <br>
+ * \r return <br>
+ * \f form feed <br> \\ backslash <br> \' single quote <br> \" double quote
+ *
+ * @param dataSource -
+ * text file datasource to read from
+ * @param delimiter -
+ * Char the file is delimited By
+ * @param qualifier -
+ * Char text is qualified by
+ * @param handleShortLines -
+ * when flaged as true, lines with less columns then the amount
+ * of column headers will be added as empty's instead of
+ * producing an error
+ * @exception Exception
+ */
+ public LargeDataSet(final InputStream dataSource, final String delimiter, final String qualifier,
+ final boolean handleShortLines) throws Exception {
+ this.fileType = DELIMITED_FILE;
+ setHandleShortLines(handleShortLines);
+ this.is = dataSource;
+ this.isr = new InputStreamReader(is);
+ this.br = new BufferedReader(this.isr);
+ this.delimiter = delimiter;
+ this.qualifier = qualifier;
+ this.ignoreFirstRecord = false;
+ this.createMDFromFile = true;
+ }
+
+ /**
+ * New constructor based on InputStream. Constructs a new LargeDataSet using
+ * the PZMAP XML file layout method. This is used for a FIXED LENGTH text
+ * file.
+ *
+ * @param pzmapXMLStream -
+ * Reference to the xml file InputStream holding the pzmap
+ * @param dataSourceStream -
+ * Delimited file InputStream to read from
+ * @param handleShortLines -
+ * Pad lines out to fit the fixed length
+ * @exception Exception
+ */
+ public LargeDataSet(final InputStream pzmapXMLStream, final InputStream dataSourceStream, final boolean handleShortLines)
+ throws Exception {
+ this.fileType = FIXEDLENGTH_FILE;
+ this.is = dataSourceStream;
+ this.isr = new InputStreamReader(is);
+ this.br = new BufferedReader(this.isr);
+ setHandleShortLines(handleShortLines);
+
+ setColumnMD(PZMapParser.parse(pzmapXMLStream));
+
+ }
+
+ /**
+ * Loads up the next record from the file, returns false if EOF
+ *
+ * @return boolean
+ */
+ public boolean next() {
+ try {
+ if (this.fileType.equals(DELIMITED_FILE)) {
+ return readNextDelimited();
+ }
+
+ // assume fixed length file
+ return readNextFixedLen();
+ } catch (final Exception ex) {
+ throw new RuntimeException(ex);
+ } finally {
+ // Advance the pointer in the DataSet to the first position if
+ // needed
+ // we only throw one record into the array at a time
+ if (super.getIndex() == -1) {
+ super.next();
+ }
+ }
+ }
+
+ /**
+ * Not Supported!
+ *
+ * @return boolean
+ */
+ public boolean previous() {
+ throw new RuntimeException("previous() is Not Implemented For LargeDataSet");
+ }
+
+ /**
+ * Not Supported!
+ *
+ * @param excelFileToBeWritten
+ * @exception Exception
+ */
+ public void writeToExcel(final File excelFileToBeWritten) throws Exception {
+ throw new RuntimeException("writeToExcel() is Not Implemented For LargeDataSet");
+ }
+
+ /**
+ * Not Supported!
+ *
+ * @param ob -
+ * OrderBy object
+ * @exception Exception
+ * @see com.pz.reader.ordering.OrderBy
+ * @see com.pz.reader.ordering.OrderColumn
+ */
+ public void orderRows(final OrderBy ob) throws Exception {
+ throw new RuntimeException("orderRows() is Not Implemented For LargeDataSet");
+ }
+
+ /**
+ * Not Supported!
+ *
+ * @param localPointer -
+ * int
+ * @exception IndexOutOfBoundsException
+ */
+ public void absolute(final int localPointer) {
+ throw new RuntimeException("absolute() is Not Implemented For LargeDataSet");
+ }
+
+ /**
+ * Not Supported!
+ */
+ public void remove() {
+ throw new RuntimeException("remove() is Not Implemented For LargeDataSet");
+ }
+
+ /**
+ * Not Supported!
+ *
+ * @return int
+ */
+ public int getIndex() {
+ throw new RuntimeException("getIndex() is Not Implemented For LargeDataSet");
+ }
+
+ /**
+ * Not Supported!
+ */
+ public void goBottom() {
+ throw new RuntimeException("goBottom() is Not Implemented For LargeDataSet");
+ }
+
+ /**
+ * Not Supported!
+ */
+ public void goTop() {
+ throw new RuntimeException("goTop() is Not Implemented For LargeDataSet");
+ }
+
+ /**
+ * Not Supported!
+ *
+ * @param columnName -
+ * String Name of the column
+ * @param value -
+ * String value to assign to the column.
+ * @exception Exception -
+ * exception will be thrown if pointer in not on a valid row
+ */
+ public void setValue(final String columnName, final String value) throws Exception {
+ throw new RuntimeException("setValue() is Not Implemented For LargeDataSet");
+
+ }
+
+ /**
+ * erases the dataset and releases memory for the JVM to reclaim. This will
+ * also close out the readers used to read the file in.
+ */
+ public void freeMemory() {
+ super.freeMemory();
+
+ ParserUtils.closeReader(br);
+ ParserUtils.closeReader(isr);
+ ParserUtils.closeReader(is);
+ }
+
+ // reads the next record and sets it into the row array
+ private boolean readNextDelimited() throws Exception {
+ String line = null;
+ Row row = null;
+ List columns = null;
+ boolean processingMultiLine = false;
+ boolean readRecordOk = false;
+ String lineData = "";
+ String mdkey = null;
+ List cmds = null;
+
+ if (getRows() == null) {
+ setRows(new ArrayList());
+ }
+ if (getErrors() == null) {
+ setErrors(new ArrayList());
+ }
+
+ getRows().clear();
+ /** loop through each line in the file */
+ while ((line = br.readLine()) != null) {
+ lineCount++;
+ /** empty line skip past it */
+ if (!processingMultiLine && line.trim().length() == 0) {
+ continue;
+ }
+ // check to see if the user has elected to skip the first record
+ if (!this.processedFirst && this.ignoreFirstRecord) {
+ this.processedFirst = true;
+ continue;
+ } else if (!this.processedFirst && this.createMDFromFile) {
+ // column names are coming from inside the CSV file
+ this.processedFirst = true;
+ setColumnMD(ParserUtils.getColumnMDFromFile(line, delimiter, qualifier));
+ continue;
+ }
+
+ // ********************************************************
+ // new functionality as of 2.1.0 check to see if we have
+ // any line breaks in the middle of the record, this will only
+ // be checked if we have specified a delimiter
+ // ********************************************************
+ final char[] chrArry = line.trim().toCharArray();
+ if (!processingMultiLine && delimiter != null && delimiter.trim().length() > 0) {
+ processingMultiLine = ParserUtils.isMultiLine(chrArry, delimiter, qualifier);
+ }
+
+ // check to see if we have reached the end of the linebreak in the
+ // record
+
+ if (processingMultiLine && lineData.trim().length() > 0) {
+ // need to do one last check here. it is possible that the "
+ // could be part of the data
+ // excel will escape these with another quote; here is some data
+ // "" This would indicate
+ // there is more to the multiline
+ if (line.trim().endsWith(qualifier) && !line.trim().endsWith(qualifier + qualifier)) {
+ // it is safe to assume we have reached the end of the line
+ // break
+ processingMultiLine = false;
+ if (lineData.trim().length() > 0) {
+ lineData += "\r\n";
+ }
+ lineData += line;
+ } else {
+
+ // check to see if this is the last line of the record
+ // looking for a qualifier followed by a delimiter
+ if (lineData.trim().length() > 0) {
+ lineData += "\r\n";
+ }
+ lineData += line;
+ boolean qualiFound = false;
+ for (int i = 0; i < chrArry.length; i++) {
+ if (qualiFound) {
+ if (chrArry[i] == ' ') {
+ continue;
+ } else {
+ // not a space, if this char is the delimiter,
+ // then we have reached the end of
+ // the record
+ if (chrArry[i] == delimiter.charAt(0)) {
+ // processingMultiLine = false;
+ // fix put in, setting to false caused bug
+ // when processing multiple multi-line
+ // columns on the same record
+ processingMultiLine = ParserUtils.isMultiLine(chrArry, delimiter, qualifier);
+ break;
+ }
+ qualiFound = false;
+ continue;
+ }
+ } else if (chrArry[i] == qualifier.charAt(0)) {
+ qualiFound = true;
+ }
+ }
+ // check to see if we are still in multi line mode, if so
+ // grab the next line
+ if (processingMultiLine) {
+ continue;
+ }
+ }
+ } else {
+ // throw the line into lineData var.
+ lineData += line;
+ if (processingMultiLine) {
+ continue; // if we are working on a multiline rec, get the
+ // data on the next line
+ }
+ }
+ // ********************************************************************
+ // end record line break logic
+ // ********************************************************************
+
+ // column values
+ columns = ParserUtils.splitLine(lineData, this.delimiter, this.qualifier);
+ lineData = "";
+ mdkey = ParserUtils.getCMDKeyForDelimitedFile(getColumnMD(), columns);
+ cmds = ParserUtils.getColumnMetaData(mdkey, getColumnMD());
+ this.columnCount = cmds.size();
+ // DEBUG
+
+ // Incorrect record length on line log the error. Line
+ // will not be included in the dataset
+ if (columns.size() > columnCount) {
+ // log the error
+ addError("TOO MANY COLUMNS WANTED: " + columnCount + " GOT: " + columns.size(), lineCount, 2);
+ continue;
+ } else if (columns.size() < columnCount) {
+ if (isHandleShortLines()) {
+ // We can pad this line out
+ while (columns.size() < columnCount) {
+ columns.add("");
+ }
+
+ // log a warning
+ addError("PADDED LINE TO CORRECT NUMBER OF COLUMNS", lineCount, 1);
+
+ } else {
+ addError("TOO FEW COLUMNS WANTED: " + columnCount + " GOT: " + columns.size(), lineCount, 2);
+ continue;
+ }
+ }
+
+ row = new Row();
+ row.setCols(columns);
+ row.setRowNumber(lineCount);
+ // with the LargeDataSet we are never going to store more than 1
+ // record in memory
+ getRows().add(row);
+ readRecordOk = true;
+ break;
+ }
+
+ return readRecordOk;
+
+ }
+
+ private boolean readNextFixedLen() throws Exception {
+ String line = null;
+ Row row = null;
+ int recordLength = 0;
+ final int aLineCount = 0;
+ int recPosition = 0;
+ String mdkey = null;
+ List cmds = null;
+ boolean readRecordOk = false;
+
+ if (getRows() == null) {
+ setRows(new ArrayList());
+ }
+ if (getErrors() == null) {
+ setErrors(new ArrayList());
+ }
+
+ if (this.recordLengths == null) {
+ this.recordLengths = ParserUtils.calculateRecordLengths(getColumnMD());
+ }
+
+ getRows().clear();
+ // loop through each line in the file
+ while ((line = br.readLine()) != null) {
+ this.lineCount++;
+ // empty line skip past it
+ if (line.trim().length() == 0) {
+ continue;
+ }
+
+ mdkey = ParserUtils.getCMDKeyForFixedLengthFile(getColumnMD(), line);
+ recordLength = ((Integer) recordLengths.get(mdkey)).intValue();
+ cmds = ParserUtils.getColumnMetaData(mdkey, getColumnMD());
+
+ // Incorrect record length on line log the error. Line will not be
+ // included in the
+ // dataset
+ if (line.length() > recordLength) {
+ addError("LINE TOO LONG. LINE IS " + line.length() + " LONG. SHOULD BE " + recordLength, aLineCount, 2);
+ continue;
+ } else if (line.length() < recordLength) {
+ if (isHandleShortLines()) {
+ // We can pad this line out
+ while (line.length() < recordLength) {
+ line += ParserUtils.padding(recordLength-line.length(), ' ');
+ }
+
+ // log a warning
+ addError("PADDED LINE TO CORRECT RECORD LENGTH", aLineCount, 1);
+
+ } else {
+ addError("LINE TOO SHORT. LINE IS " + line.length() + " LONG. SHOULD BE " + recordLength, aLineCount, 2);
+ continue;
+ }
+ }
+
+ recPosition = 1;
+ row = new Row();
+ row.setMdkey(mdkey.equals("detail") ? null : mdkey); // try to
+ // limit the
+ // memory
+ // use
+ // Build the columns for the row
+ for (int i = 0; i < cmds.size(); i++) {
+ String tempValue = null;
+ tempValue = line.substring(recPosition - 1, recPosition + (((ColumnMetaData) cmds.get(i)).getColLength() - 1));
+ recPosition += ((ColumnMetaData) cmds.get(i)).getColLength();
+ row.addColumn(tempValue.trim());
+ }
+ row.setRowNumber(aLineCount);
+ // add the row to the array
+ getRows().add(row);
+
+ readRecordOk = true;
+ break;
+ }
+
+ return readRecordOk;
+
+ }
+}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|