[Jreepad-CVS] jreepad/src/jreepad/io XmlReader.java, NONE, 1.1 TreepadReader.java, NONE, 1.1 AutoDe
Brought to you by:
danstowell
From: PeWu <pe...@us...> - 2007-01-20 13:01:24
|
Update of /cvsroot/jreepad/jreepad/src/jreepad/io In directory sc8-pr-cvs6.sourceforge.net:/tmp/cvs-serv6732/src/jreepad/io Modified Files: JreepadReader.java Added Files: XmlReader.java TreepadReader.java AutoDetectReader.java Log Message: refactoring: moved XML and HJT readers from JreepadNode to separate classes Index: JreepadReader.java =================================================================== RCS file: /cvsroot/jreepad/jreepad/src/jreepad/io/JreepadReader.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** JreepadReader.java 18 Jan 2007 09:37:27 -0000 1.1 --- JreepadReader.java 20 Jan 2007 13:01:19 -0000 1.2 *************** *** 1,6 **** package jreepad.io; import java.io.IOException; ! import java.io.Reader; import jreepad.JreepadNode; --- 1,25 ---- + /* + Jreepad - personal information manager. + Copyright (C) 2004-2006 Dan Stowell + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + The full license can be read online here: + + http://www.gnu.org/copyleft/gpl.html + */ + package jreepad.io; import java.io.IOException; ! import java.io.InputStream; import jreepad.JreepadNode; *************** *** 14,17 **** public interface JreepadReader { ! public JreepadNode read(Reader in) throws IOException; } --- 33,36 ---- public interface JreepadReader { ! public JreepadNode read(InputStream in) throws IOException; } --- NEW FILE: AutoDetectReader.java --- /* Jreepad - personal information manager. Copyright (C) 2004-2006 Dan Stowell This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. The full license can be read online here: http://www.gnu.org/copyleft/gpl.html */ package jreepad.io; import java.io.IOException; import java.io.InputStream; import jreepad.JreepadNode; /** * Reads a Jreepad file automatically detecting file type (XML or HJT). * * @version $Id$ */ public class AutoDetectReader implements JreepadReader { XmlReader xmlReader; TreepadReader treepadReader; public AutoDetectReader(String encoding, boolean autoDetectHtmlArticles) { xmlReader = new XmlReader(); treepadReader = new TreepadReader(encoding, autoDetectHtmlArticles); } public JreepadNode read(InputStream in) throws IOException { in = new RewindableInputStream(in); // Read first line String currentLine = ((RewindableInputStream)in).readLine(); in.reset(); // reset stream, so the specific readers read from the beginning if (currentLine.startsWith("<?xml version=\"1.0\"")) { // Try and find out what character encoding to use int encPos = currentLine.indexOf("encoding="); String xmlEncoding = null; if (encPos != -1) { xmlEncoding = currentLine.substring(encPos + 10); encPos = xmlEncoding.indexOf("\""); if (encPos == -1) encPos = xmlEncoding.indexOf("'"); if (encPos != -1) xmlEncoding = xmlEncoding.substring(0, encPos); // System.out.println("Start of XML loading: decided on the following character // encoding: " + xmlEncoding); } xmlReader.setEncoding(xmlEncoding); return xmlReader.read(in); } else if ((currentLine.toLowerCase().startsWith("<treepad") && currentLine.endsWith(">"))) { treepadReader.setFileFormat(1); return treepadReader.read(in); } else if ((currentLine.toLowerCase().startsWith("<hj-treepad") && currentLine.endsWith(">"))) { treepadReader.setFileFormat(1); return treepadReader.read(in); } else { System.out.println("First line of file does not indicate a recognised format:\n" + currentLine + "\n"); throw new IOException("First line of file does not indicate a recognised format:\n\n" + currentLine); } } public boolean isAutoDetectHtmlArticles() { return treepadReader.isAutoDetectHtmlArticles(); } public void setAutoDetectHtmlArticles(boolean autoDetectHtmlArticles) { treepadReader.setAutoDetectHtmlArticles(autoDetectHtmlArticles); } public String getEncoding() { return treepadReader.getEncoding(); } public void setEncoding(String encoding) { treepadReader.setEncoding(encoding); } /** * This class wraps the byte inputstreams we're presented with. We need it because * java.io.InputStreams don't provide functionality to reread processed bytes, and they have a * habit of reading more than one character when you call their read() methods. This means that, * once we discover the true (declared) encoding of a document, we can neither backtrack to read * the whole doc again nor start reading where we are with a new reader. This class allows * rewinding an inputStream by allowing a mark to be set, and the stream reset to that position. * <strong>The class assumes that it needs to read one character per invocation when it's read() * method is inovked, but uses the underlying InputStream's read(char[], offset length) * method--it won't buffer data read this way!</strong> * * @xerces.internal * @author Neil Graham, IBM * @author Glenn Marcy, IBM */ protected static class RewindableInputStream extends InputStream { private static int BUFFER_SIZE = 2048; private InputStream fInputStream; private byte[] fData; private int fStartOffset; private int fEndOffset; private int fOffset; private int fLength; private int fMark; public RewindableInputStream(InputStream is) { fData = new byte[BUFFER_SIZE]; fInputStream = is; fStartOffset = 0; fEndOffset = -1; fOffset = 0; fLength = 0; fMark = 0; } public void setStartOffset(int offset) { fStartOffset = offset; } public void rewind() { fOffset = fStartOffset; System.out.println("Rewinding " + fOffset + "/" + fLength + " -> " + fStartOffset + "(end=" + fEndOffset + ")"); } public int read() throws IOException { int b = 0; if (fOffset < fLength) { return fData[fOffset++] & 0xff; } if (fOffset == fEndOffset) { return -1; } if (fOffset == fData.length) { byte[] newData = new byte[fOffset << 1]; System.arraycopy(fData, 0, newData, 0, fOffset); fData = newData; } b = fInputStream.read(); if (b == -1) { fEndOffset = fOffset; return -1; } fData[fLength++] = (byte)b; fOffset++; return b & 0xff; } public int read(byte[] b, int off, int len) throws IOException { int bytesLeft = fLength - fOffset; if (bytesLeft == 0) { if (fOffset == fEndOffset) { return -1; } return fInputStream.read(b, off, len); } if (len < bytesLeft) { if (len <= 0) { return 0; } } else { len = bytesLeft; } if (b != null) { System.arraycopy(fData, fOffset, b, off, len); } fOffset += len; return len; } public long skip(long n) throws IOException { int bytesLeft; if (n <= 0) { return 0; } bytesLeft = fLength - fOffset; if (bytesLeft == 0) { if (fOffset == fEndOffset) { return 0; } return fInputStream.skip(n); } if (n <= bytesLeft) { fOffset += n; return n; } fOffset += bytesLeft; if (fOffset == fEndOffset) { return bytesLeft; } n -= bytesLeft; /* * In a manner of speaking, when this class isn't permitting more than one byte at a * time to be read, it is "blocking". The available() method should indicate how much * can be read without blocking, so while we're in this mode, it should only indicate * that bytes in its buffer are available; otherwise, the result of available() on the * underlying InputStream is appropriate. */ return fInputStream.skip(n) + bytesLeft; } public int available() throws IOException { int bytesLeft = fLength - fOffset; if (bytesLeft == 0) { if (fOffset == fEndOffset) { return -1; } return fInputStream.available(); } return bytesLeft; } public void mark(int howMuch) { fMark = fOffset; } public void reset() { fOffset = fMark; } public boolean markSupported() { return true; } public void close() throws IOException { if (fInputStream != null) { fInputStream.close(); fInputStream = null; } } public String readLine() throws IOException { byte[] bytes = new byte[BUFFER_SIZE]; int len = 0; while (len < BUFFER_SIZE) { int ret = read(); if (ret == -1 || ret == 0x0a || ret == 0x0d) break; bytes[len] = (byte)(ret & 0xff); len++; } return new String(bytes, 0, len); } } // end of RewindableInputStream class } --- NEW FILE: XmlReader.java --- /* Jreepad - personal information manager. Copyright (C) 2004-2006 Dan Stowell This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. The full license can be read online here: http://www.gnu.org/copyleft/gpl.html */ package jreepad.io; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import jreepad.JreepadNode; /** * Reads XML input into Jreepad. * * @version $Id$ */ public class XmlReader implements JreepadReader { private String encoding; public XmlReader() { this(null); } public XmlReader(String encoding) { this.encoding = encoding; } public JreepadNode read(InputStream in) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader(in, encoding)); String currentLine; String currentXmlContent = ""; int nodeTagOffset = 0; // Spool until we're at the very first node while ((currentLine = reader.readLine()) != null && (nodeTagOffset = currentXmlContent.indexOf("<node")) == -1 && (nodeTagOffset == -1 || currentXmlContent.indexOf('>', nodeTagOffset) == -1)) { currentXmlContent += (currentLine + "\n"); } if (currentLine != null) currentXmlContent += (currentLine + "\n"); // System.out.println("XMLparse: I've spooled to the first node and content is now: " + // currentXmlContent); // So currentXmlContent now contains all of the opening tag, including its attributes etc // Strip off anything BEFORE the node opening currentXmlContent = currentXmlContent.substring(nodeTagOffset); // System.out.println("XMLparse: I've stripped anything before the first node and content is // now: " + currentXmlContent); return readNode(reader, currentXmlContent, 0).node; } // This function should return any XML string content that remains unprocessed // Also returns newly created node ReturnValue readNode(BufferedReader reader, String currentXmlContent, int depth) throws IOException { // System.out.println("XMLparse recursive: depth "+depth); // String currentXmlContent should BEGIN with the <node> tag. This is assumed, and if not // true may cause problems! String currentLine; int titleOffset, typeOffset, startTagOffset, endTagOffset; String title, typeString, content = ""; JreepadNode node = new JreepadNode(); // Extract the attributes titleOffset = currentXmlContent.indexOf("title="); typeOffset = currentXmlContent.indexOf("type="); if (titleOffset != -1) title = currentXmlContent.substring(titleOffset + 7, currentXmlContent.indexOf('"', titleOffset + 7)); else title = "<Untitled node>"; if (typeOffset != -1) typeString = currentXmlContent.substring(typeOffset + 6, currentXmlContent.indexOf('"', typeOffset + 6)); else typeString = "text/plain"; if (typeString.equals("text/csv")) node.setArticleMode(JreepadNode.ARTICLEMODE_CSV); else if (typeString.equals("text/html")) node.setArticleMode(JreepadNode.ARTICLEMODE_HTML); else if (typeString.equals("text/textile")) node.setArticleMode(JreepadNode.ARTICLEMODE_TEXTILEHTML); else if (typeString.equals("application/x-jreepad-softlink")) node.setArticleMode(JreepadNode.ARTICLEMODE_SOFTLINK); else node.setArticleMode(JreepadNode.ARTICLEMODE_ORDINARY); node.setTitle(xmlUnescapeChars(title)); // OK, so we've interpreted the attributes etc. Now we need to trim the opening tag away currentXmlContent = currentXmlContent.substring(currentXmlContent.indexOf('>') + 1); // System.out.println("XMLparse: I've stripped off the <node> tag and content is now: " + // currentXmlContent); boolean readingContent = true; // Once the baby nodes come in, we're not interested in // adding any more to the content while ((currentLine = reader.readLine()) != null) { // System.out.println("XMLparserecursive: Here's a line: " + currentLine); currentLine += "\n"; // We want to keep the newlines, but the BufferedReader doesn't // give us them // We're reading CONTENT into the current node. currentXmlContent += currentLine; // System.out.println("\n\nThe content that I'm currently trying to process // is:\n"+currentXmlContent); // Look out for <node which tells us we're starting a child startTagOffset = currentXmlContent.indexOf("<node"); // Look out for </node> which tells us we're finishing this node and returning to the // parent endTagOffset = currentXmlContent.indexOf("</node>"); while (!(startTagOffset == -1 || endTagOffset == -1)) { if (startTagOffset == -1 || endTagOffset < startTagOffset) { // Process the nearest end tag if (readingContent) content += xmlUnescapeChars(currentXmlContent.substring(0, endTagOffset)); String returnFromBaby = currentXmlContent.substring(endTagOffset + 7); // System.out.println("\n\nBaby intends to return:"+returnFromBaby); node.setContent(content); return new ReturnValue(returnFromBaby, node); } else { if (readingContent) { content += xmlUnescapeChars(currentXmlContent.substring(0, startTagOffset)); node.setContent(content); } // Having found a child node, we want to STOP adding anything to the current // node's content (e.g. newlines...) readingContent = false; // Process the nearest start tag // System.out.println("\n\nJust before passing to baby: content // is:\n"+currentXmlContent); ReturnValue returnValue = readNode(reader, currentXmlContent .substring(startTagOffset), depth + 1); currentXmlContent = returnValue.xmlContent; // System.out.println("\n\nJust after passing to baby: content // is:\n"+currentXmlContent); node.add(returnValue.node); } startTagOffset = currentXmlContent.indexOf("<node"); endTagOffset = currentXmlContent.indexOf("</node>"); } } // End while // Just make sure we haven't wasted any content... endTagOffset = currentXmlContent.indexOf('<'); if (readingContent && (endTagOffset != -1)) content += xmlUnescapeChars(currentXmlContent.substring(0, endTagOffset)); node.setContent(content); // System.out.println("THE MAIN WHILE LOOP HAS ENDED. SPARE CONTENT:\n" + // currentXmlContent); return new ReturnValue("", node); } private static String xmlUnescapeChars(String in) { char[] c = in.toCharArray(); StringBuffer ret = new StringBuffer(); StringBuffer entity = new StringBuffer(); String ent; int i, j; OuterLoop: for (i = 0; i < c.length; i++) if (c[i] == '&') { entity = new StringBuffer(); for (j = 0; j < 8; j++) // Add things into the entity buffer until we hit a // semicolon { i++; if (i == c.length) { ret.append('&' + entity.toString()); continue OuterLoop; } else if (c[i] != ';') entity.append(c[i]); else break; // Reached end of the entity (or end of the whole string!) } ent = entity.toString(); if (ent.equals("lt")) ret.append("<"); else if (ent.equals("gt")) ret.append(">"); else if (ent.equals("amp")) ret.append("&"); else if (ent.equals("quot")) ret.append("\""); else ret.append('&' + ent + ';'); } else ret.append(c[i]); return ret.toString(); } public String getEncoding() { return encoding; } public void setEncoding(String encoding) { this.encoding = encoding; } /** * Container class to make it possible to return two objects from readNode(). */ private static class ReturnValue { public String xmlContent; public JreepadNode node; public ReturnValue(String xmlContent, JreepadNode node) { this.xmlContent = xmlContent; this.node = node; } } } --- NEW FILE: TreepadReader.java --- /* Jreepad - personal information manager. Copyright (C) 2004-2006 Dan Stowell This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. The full license can be read online here: http://www.gnu.org/copyleft/gpl.html */ package jreepad.io; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.LineNumberReader; import java.util.Stack; import jreepad.JreepadNode; /** * Reads a treepad file into Jreepad. * * @version $Id$ */ public class TreepadReader implements JreepadReader { private boolean autoDetectHtmlArticles; private String encoding; private int fileFormat; public TreepadReader(String encoding, boolean autoDetectHtmlArticles) { this.encoding = encoding; this.autoDetectHtmlArticles = autoDetectHtmlArticles; } public JreepadNode read(InputStream in) throws IOException { LineNumberReader reader = new LineNumberReader(new InputStreamReader(in, encoding)); reader.readLine(); // skip first line // TODO check for treepadness Stack nodeStack = new Stack(); int depthMarker; JreepadNode newNode; JreepadNode rootNode = null; String dtLine, nodeLine, titleLine, depthLine; StringBuffer currentContent; String currentLine; dtLine = "dt=text"; while ((fileFormat == 2 || (dtLine = reader.readLine()) != null) && (nodeLine = reader.readLine()) != null && (titleLine = reader.readLine()) != null && (depthLine = reader.readLine()) != null) { // Read "dt=text" [or error] - NB THE OLDER FORMAT DOESN'T INCLUDE THIS LINE SO WE SKIP // IT if (dtLine.equals("") && nodeLine.startsWith("<bmarks>")) throw new IOException( "This is not a Treepad-Lite-compatible file!\n\nFiles created in more advanced versions of Treepad\ncontain features that are not available in Jreepad."); if (fileFormat != 2) if (!(dtLine.toLowerCase().startsWith("dt=text"))) throw new IOException("Unrecognised node dt format at line " + reader.getLineNumber() + ": " + dtLine); // Read "<node>" [or error] if (!(nodeLine.toLowerCase().startsWith("<node>"))) throw new IOException("Unrecognised node format at line " + (reader.getLineNumber() + 1) + ": " + nodeLine); // Read THE CONTENT! [loop until we find "<end node> 5P9i0s8y19Z"] currentContent = new StringBuffer(); while ((currentLine = reader.readLine()) != null && !currentLine.equals("<end node> 5P9i0s8y19Z")) { currentContent.append(currentLine + "\n"); } // Now, having established the content and the title and the depth, we'll create the // child String content = currentContent.substring(0, Math.max(currentContent.length() - 1, 0)); newNode = new JreepadNode(titleLine, content); // babyNode = new JreepadNode(titleLine, currentContent.substring(0, // Math.max(currentContent.length()-2,0)), // (JreepadNode)(nodeStack.peek())); // Turn it into a HTML-mode node if it matches "<html> ... </html>" String compareContent = newNode.getContent().toLowerCase().trim(); int newArticleMode = (autoDetectHtmlArticles && compareContent.startsWith("<html>") && compareContent .endsWith("</html>")) ? JreepadNode.ARTICLEMODE_HTML : JreepadNode.ARTICLEMODE_ORDINARY; newNode.setArticleMode(newArticleMode); if (depthLine.equals("0")) { rootNode = newNode; } else { depthMarker = Integer.parseInt(depthLine); while (nodeStack.size() > depthMarker) nodeStack.pop(); ((JreepadNode)(nodeStack.peek())).add(newNode); } nodeStack.push(newNode); } return rootNode; } public boolean isAutoDetectHtmlArticles() { return autoDetectHtmlArticles; } public void setAutoDetectHtmlArticles(boolean autoDetectHtmlArticles) { this.autoDetectHtmlArticles = autoDetectHtmlArticles; } public String getEncoding() { return encoding; } public void setEncoding(String encoding) { this.encoding = encoding; } public int getFileFormat() { return fileFormat; } public void setFileFormat(int fileFormat) { this.fileFormat = fileFormat; } } |