[Jreepad-CVS] jreepad/src/jreepad/io XmlReader.java, NONE, 1.1 TreepadReader.java, NONE, 1.1 AutoDe

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/jreepad/jreepad/src/jreepad/io
In directory sc8-pr-cvs6.sourceforge.net:/tmp/cvs-serv6732/src/jreepad/io

Modified Files:
	JreepadReader.java 
Added Files:
	XmlReader.java TreepadReader.java AutoDetectReader.java 
Log Message:
refactoring: moved XML and HJT readers from JreepadNode to separate classes

Index: JreepadReader.java
===================================================================
RCS file: /cvsroot/jreepad/jreepad/src/jreepad/io/JreepadReader.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** JreepadReader.java	18 Jan 2007 09:37:27 -0000	1.1
--- JreepadReader.java	20 Jan 2007 13:01:19 -0000	1.2
***************
*** 1,6 ****
  package jreepad.io;

  import java.io.IOException;
! import java.io.Reader;

  import jreepad.JreepadNode;
--- 1,25 ----
+ /*
+            Jreepad - personal information manager.
+            Copyright (C) 2004-2006 Dan Stowell
+ 
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ The full license can be read online here:
+ 
+            http://www.gnu.org/copyleft/gpl.html
+ */
+ 
  package jreepad.io;

  import java.io.IOException;
! import java.io.InputStream;

  import jreepad.JreepadNode;
***************
*** 14,17 ****
  public interface JreepadReader
  {
!     public JreepadNode read(Reader in) throws IOException;
  }
--- 33,36 ----
  public interface JreepadReader
  {
!     public JreepadNode read(InputStream in) throws IOException;
  }

--- NEW FILE: AutoDetectReader.java ---
/*
           Jreepad - personal information manager.
           Copyright (C) 2004-2006 Dan Stowell

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

The full license can be read online here:

           http://www.gnu.org/copyleft/gpl.html
*/

package jreepad.io;

import java.io.IOException;
import java.io.InputStream;

import jreepad.JreepadNode;

/**
 * Reads a Jreepad file automatically detecting file type (XML or HJT).
 *
 * @version $Id$
 */
public class AutoDetectReader
    implements JreepadReader
{
    XmlReader xmlReader;

    TreepadReader treepadReader;

    public AutoDetectReader(String encoding, boolean autoDetectHtmlArticles)
    {
        xmlReader = new XmlReader();
        treepadReader = new TreepadReader(encoding, autoDetectHtmlArticles);
    }

    public JreepadNode read(InputStream in)
        throws IOException
    {
        in = new RewindableInputStream(in);

        // Read first line
        String currentLine = ((RewindableInputStream)in).readLine();
        in.reset(); // reset stream, so the specific readers read from the beginning

        if (currentLine.startsWith("<?xml version=\"1.0\""))
        {
            // Try and find out what character encoding to use
            int encPos = currentLine.indexOf("encoding=");
            String xmlEncoding = null;
            if (encPos != -1)
            {
                xmlEncoding = currentLine.substring(encPos + 10);
                encPos = xmlEncoding.indexOf("\"");
                if (encPos == -1)
                    encPos = xmlEncoding.indexOf("'");
                if (encPos != -1)
                    xmlEncoding = xmlEncoding.substring(0, encPos);
                // System.out.println("Start of XML loading: decided on the following character
                // encoding: " + xmlEncoding);
            }
            xmlReader.setEncoding(xmlEncoding);
            return xmlReader.read(in);
        }
        else if ((currentLine.toLowerCase().startsWith("<treepad") && currentLine.endsWith(">")))
        {
            treepadReader.setFileFormat(1);
            return treepadReader.read(in);
        }
        else if ((currentLine.toLowerCase().startsWith("<hj-treepad") && currentLine.endsWith(">")))
        {
            treepadReader.setFileFormat(1);
            return treepadReader.read(in);
        }
        else
        {
            System.out.println("First line of file does not indicate a recognised format:\n"
                + currentLine + "\n");
            throw new IOException("First line of file does not indicate a recognised format:\n\n"
                + currentLine);
        }
    }

    public boolean isAutoDetectHtmlArticles()
    {
        return treepadReader.isAutoDetectHtmlArticles();
    }

    public void setAutoDetectHtmlArticles(boolean autoDetectHtmlArticles)
    {
        treepadReader.setAutoDetectHtmlArticles(autoDetectHtmlArticles);
    }

    public String getEncoding()
    {
        return treepadReader.getEncoding();
    }

    public void setEncoding(String encoding)
    {
        treepadReader.setEncoding(encoding);
    }

    /**
     * This class wraps the byte inputstreams we're presented with. We need it because
     * java.io.InputStreams don't provide functionality to reread processed bytes, and they have a
     * habit of reading more than one character when you call their read() methods. This means that,
     * once we discover the true (declared) encoding of a document, we can neither backtrack to read
     * the whole doc again nor start reading where we are with a new reader. This class allows
     * rewinding an inputStream by allowing a mark to be set, and the stream reset to that position.
     * <strong>The class assumes that it needs to read one character per invocation when it's read()
     * method is inovked, but uses the underlying InputStream's read(char[], offset length)
     * method--it won't buffer data read this way!</strong>
     *
     * @xerces.internal
     * @author Neil Graham, IBM
     * @author Glenn Marcy, IBM
     */
    protected static class RewindableInputStream
        extends InputStream
    {
        private static int BUFFER_SIZE = 2048;

        private InputStream fInputStream;

        private byte[] fData;

        private int fStartOffset;

        private int fEndOffset;

        private int fOffset;

        private int fLength;

        private int fMark;

        public RewindableInputStream(InputStream is)
        {
            fData = new byte[BUFFER_SIZE];
            fInputStream = is;
            fStartOffset = 0;
            fEndOffset = -1;
            fOffset = 0;
            fLength = 0;
            fMark = 0;
        }

        public void setStartOffset(int offset)
        {
            fStartOffset = offset;
        }

        public void rewind()
        {
            fOffset = fStartOffset;
            System.out.println("Rewinding " + fOffset + "/" + fLength + " -> " + fStartOffset
                + "(end=" + fEndOffset + ")");
        }

        public int read()
            throws IOException
        {
            int b = 0;
            if (fOffset < fLength)
            {
                return fData[fOffset++] & 0xff;
            }
            if (fOffset == fEndOffset)
            {
                return -1;
            }
            if (fOffset == fData.length)
            {
                byte[] newData = new byte[fOffset << 1];
                System.arraycopy(fData, 0, newData, 0, fOffset);
                fData = newData;
            }
            b = fInputStream.read();
            if (b == -1)
            {
                fEndOffset = fOffset;
                return -1;
            }
            fData[fLength++] = (byte)b;
            fOffset++;
            return b & 0xff;
        }

        public int read(byte[] b, int off, int len)
            throws IOException
        {
            int bytesLeft = fLength - fOffset;
            if (bytesLeft == 0)
            {
                if (fOffset == fEndOffset)
                {
                    return -1;
                }
                return fInputStream.read(b, off, len);
            }
            if (len < bytesLeft)
            {
                if (len <= 0)
                {
                    return 0;
                }
            }
            else
            {
                len = bytesLeft;
            }
            if (b != null)
            {
                System.arraycopy(fData, fOffset, b, off, len);
            }
            fOffset += len;
            return len;
        }

        public long skip(long n)
            throws IOException
        {
            int bytesLeft;
            if (n <= 0)
            {
                return 0;
            }
            bytesLeft = fLength - fOffset;
            if (bytesLeft == 0)
            {
                if (fOffset == fEndOffset)
                {
                    return 0;
                }
                return fInputStream.skip(n);
            }
            if (n <= bytesLeft)
            {
                fOffset += n;
                return n;
            }
            fOffset += bytesLeft;
            if (fOffset == fEndOffset)
            {
                return bytesLeft;
            }
            n -= bytesLeft;
            /*
             * In a manner of speaking, when this class isn't permitting more than one byte at a
             * time to be read, it is "blocking". The available() method should indicate how much
             * can be read without blocking, so while we're in this mode, it should only indicate
             * that bytes in its buffer are available; otherwise, the result of available() on the
             * underlying InputStream is appropriate.
             */
            return fInputStream.skip(n) + bytesLeft;
        }

        public int available()
            throws IOException
        {
            int bytesLeft = fLength - fOffset;
            if (bytesLeft == 0)
            {
                if (fOffset == fEndOffset)
                {
                    return -1;
                }
                return fInputStream.available();
            }
            return bytesLeft;
        }

        public void mark(int howMuch)
        {
            fMark = fOffset;
        }

        public void reset()
        {
            fOffset = fMark;
        }

        public boolean markSupported()
        {
            return true;
        }

        public void close()
            throws IOException
        {
            if (fInputStream != null)
            {
                fInputStream.close();
                fInputStream = null;
            }
        }

        public String readLine()
            throws IOException
        {
            byte[] bytes = new byte[BUFFER_SIZE];
            int len = 0;
            while (len < BUFFER_SIZE)
            {
                int ret = read();
                if (ret == -1 || ret == 0x0a || ret == 0x0d)
                    break;
                bytes[len] = (byte)(ret & 0xff);
                len++;
            }
            return new String(bytes, 0, len);
        }

    } // end of RewindableInputStream class
}

--- NEW FILE: XmlReader.java ---
/*
           Jreepad - personal information manager.
           Copyright (C) 2004-2006 Dan Stowell

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

The full license can be read online here:

           http://www.gnu.org/copyleft/gpl.html
*/

package jreepad.io;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

import jreepad.JreepadNode;

/**
 * Reads XML input into Jreepad.
 *
 * @version $Id$
 */
public class XmlReader implements JreepadReader
{
    private String encoding;

    public XmlReader()
    {
        this(null);
    }

    public XmlReader(String encoding)
    {
        this.encoding = encoding;
    }

    public JreepadNode read(InputStream in)
        throws IOException
    {
        BufferedReader reader = new BufferedReader(new InputStreamReader(in, encoding));

        String currentLine;
        String currentXmlContent = "";
        int nodeTagOffset = 0;

        // Spool until we're at the very first node
        while ((currentLine = reader.readLine()) != null
            && (nodeTagOffset = currentXmlContent.indexOf("<node")) == -1
            && (nodeTagOffset == -1 || currentXmlContent.indexOf('>', nodeTagOffset) == -1))
        {
            currentXmlContent += (currentLine + "\n");
        }
        if (currentLine != null)
            currentXmlContent += (currentLine + "\n");

        // System.out.println("XMLparse: I've spooled to the first node and content is now: " +
        // currentXmlContent);

        // So currentXmlContent now contains all of the opening tag, including its attributes etc
        // Strip off anything BEFORE the node opening
        currentXmlContent = currentXmlContent.substring(nodeTagOffset);

        // System.out.println("XMLparse: I've stripped anything before the first node and content is
        // now: " + currentXmlContent);

        return readNode(reader, currentXmlContent, 0).node;
    }

    // This function should return any XML string content that remains unprocessed
    // Also returns newly created node
    ReturnValue readNode(BufferedReader reader, String currentXmlContent, int depth)
        throws IOException
    {

        // System.out.println("XMLparse recursive: depth "+depth);

        // String currentXmlContent should BEGIN with the <node> tag. This is assumed, and if not
        // true may cause problems!
        String currentLine;
        int titleOffset, typeOffset, startTagOffset, endTagOffset;
        String title, typeString, content = "";
        JreepadNode node = new JreepadNode();

        // Extract the attributes
        titleOffset = currentXmlContent.indexOf("title=");
        typeOffset = currentXmlContent.indexOf("type=");
        if (titleOffset != -1)
            title = currentXmlContent.substring(titleOffset + 7, currentXmlContent.indexOf('"',
                titleOffset + 7));
        else
            title = "<Untitled node>";
        if (typeOffset != -1)
            typeString = currentXmlContent.substring(typeOffset + 6, currentXmlContent.indexOf('"',
                typeOffset + 6));
        else
            typeString = "text/plain";

        if (typeString.equals("text/csv"))
            node.setArticleMode(JreepadNode.ARTICLEMODE_CSV);
        else if (typeString.equals("text/html"))
            node.setArticleMode(JreepadNode.ARTICLEMODE_HTML);
        else if (typeString.equals("text/textile"))
            node.setArticleMode(JreepadNode.ARTICLEMODE_TEXTILEHTML);
        else if (typeString.equals("application/x-jreepad-softlink"))
            node.setArticleMode(JreepadNode.ARTICLEMODE_SOFTLINK);
        else
            node.setArticleMode(JreepadNode.ARTICLEMODE_ORDINARY);

        node.setTitle(xmlUnescapeChars(title));

        // OK, so we've interpreted the attributes etc. Now we need to trim the opening tag away
        currentXmlContent = currentXmlContent.substring(currentXmlContent.indexOf('>') + 1);

        // System.out.println("XMLparse: I've stripped off the <node> tag and content is now: " +
        // currentXmlContent);

        boolean readingContent = true; // Once the baby nodes come in, we're not interested in
        // adding any more to the content
        while ((currentLine = reader.readLine()) != null)
        {
            // System.out.println("XMLparserecursive: Here's a line: " + currentLine);
            currentLine += "\n"; // We want to keep the newlines, but the BufferedReader doesn't
            // give us them

            // We're reading CONTENT into the current node.
            currentXmlContent += currentLine;

            // System.out.println("\n\nThe content that I'm currently trying to process
            // is:\n"+currentXmlContent);

            // Look out for <node which tells us we're starting a child
            startTagOffset = currentXmlContent.indexOf("<node");
            // Look out for </node> which tells us we're finishing this node and returning to the
            // parent
            endTagOffset = currentXmlContent.indexOf("</node>");

            while (!(startTagOffset == -1 || endTagOffset == -1))
            {
                if (startTagOffset == -1 || endTagOffset < startTagOffset)
                {
                    // Process the nearest end tag
                    if (readingContent)
                        content += xmlUnescapeChars(currentXmlContent.substring(0, endTagOffset));
                    String returnFromBaby = currentXmlContent.substring(endTagOffset + 7);
                    // System.out.println("\n\nBaby intends to return:"+returnFromBaby);
                    node.setContent(content);
                    return new ReturnValue(returnFromBaby, node);
                }
                else
                {
                    if (readingContent)
                    {
                        content += xmlUnescapeChars(currentXmlContent.substring(0, startTagOffset));
                        node.setContent(content);
                    }

                    // Having found a child node, we want to STOP adding anything to the current
                    // node's content (e.g. newlines...)
                    readingContent = false;

                    // Process the nearest start tag
                    // System.out.println("\n\nJust before passing to baby: content
                    // is:\n"+currentXmlContent);
                    ReturnValue returnValue = readNode(reader, currentXmlContent
                        .substring(startTagOffset), depth + 1);
                    currentXmlContent = returnValue.xmlContent;
                    // System.out.println("\n\nJust after passing to baby: content
                    // is:\n"+currentXmlContent);
                    node.add(returnValue.node);
                }

                startTagOffset = currentXmlContent.indexOf("<node");
                endTagOffset = currentXmlContent.indexOf("</node>");
            }

        } // End while

        // Just make sure we haven't wasted any content...
        endTagOffset = currentXmlContent.indexOf('<');
        if (readingContent && (endTagOffset != -1))
            content += xmlUnescapeChars(currentXmlContent.substring(0, endTagOffset));
        node.setContent(content);
        // System.out.println("THE MAIN WHILE LOOP HAS ENDED. SPARE CONTENT:\n" +
        // currentXmlContent);
        return new ReturnValue("", node);
    }

    private static String xmlUnescapeChars(String in)
    {
        char[] c = in.toCharArray();
        StringBuffer ret = new StringBuffer();
        StringBuffer entity = new StringBuffer();
        String ent;

        int i, j;
        OuterLoop: for (i = 0; i < c.length; i++)
            if (c[i] == '&')
            {
                entity = new StringBuffer();
                for (j = 0; j < 8; j++) // Add things into the entity buffer until we hit a
                // semicolon
                {
                    i++;
                    if (i == c.length)
                    {
                        ret.append('&' + entity.toString());
                        continue OuterLoop;
                    }
                    else if (c[i] != ';')
                        entity.append(c[i]);
                    else
                        break; // Reached end of the entity (or end of the whole string!)
                }
                ent = entity.toString();
                if (ent.equals("lt"))
                    ret.append("<");
                else if (ent.equals("gt"))
                    ret.append(">");
                else if (ent.equals("amp"))
                    ret.append("&");
                else if (ent.equals("quot"))
                    ret.append("\"");
                else
                    ret.append('&' + ent + ';');
            }
            else
                ret.append(c[i]);

        return ret.toString();
    }

    public String getEncoding()
    {
        return encoding;
    }

    public void setEncoding(String encoding)
    {
        this.encoding = encoding;
    }

    /**
     * Container class to make it possible to return two objects from readNode().
     */
    private static class ReturnValue
    {
        public String xmlContent;

        public JreepadNode node;

        public ReturnValue(String xmlContent, JreepadNode node)
        {
            this.xmlContent = xmlContent;
            this.node = node;
        }
    }
}

--- NEW FILE: TreepadReader.java ---
/*
           Jreepad - personal information manager.
           Copyright (C) 2004-2006 Dan Stowell

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

The full license can be read online here:

           http://www.gnu.org/copyleft/gpl.html
*/

package jreepad.io;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.util.Stack;

import jreepad.JreepadNode;

/**
 * Reads a treepad file into Jreepad.
 *
 * @version $Id$
 */
public class TreepadReader implements JreepadReader
{

    private boolean autoDetectHtmlArticles;

    private String encoding;

    private int fileFormat;

    public TreepadReader(String encoding, boolean autoDetectHtmlArticles)
    {
        this.encoding = encoding;
        this.autoDetectHtmlArticles = autoDetectHtmlArticles;
    }

    public JreepadNode read(InputStream in)
        throws IOException
    {
        LineNumberReader reader = new LineNumberReader(new InputStreamReader(in, encoding));
        reader.readLine(); // skip first line // TODO check for treepadness

        Stack nodeStack = new Stack();
        int depthMarker;
        JreepadNode newNode;
        JreepadNode rootNode = null;
        String dtLine, nodeLine, titleLine, depthLine;
        StringBuffer currentContent;
        String currentLine;
        dtLine = "dt=text";

        while ((fileFormat == 2 || (dtLine = reader.readLine()) != null)
            && (nodeLine = reader.readLine()) != null && (titleLine = reader.readLine()) != null
            && (depthLine = reader.readLine()) != null)
        {
            // Read "dt=text" [or error] - NB THE OLDER FORMAT DOESN'T INCLUDE THIS LINE SO WE SKIP
            // IT
            if (dtLine.equals("") && nodeLine.startsWith("<bmarks>"))
                throw new IOException(
                    "This is not a Treepad-Lite-compatible file!\n\nFiles created in more advanced versions of Treepad\ncontain features that are not available in Jreepad.");

            if (fileFormat != 2)
                if (!(dtLine.toLowerCase().startsWith("dt=text")))
                    throw new IOException("Unrecognised node dt format at line " + reader.getLineNumber() + ": "
                        + dtLine);
            // Read "<node>" [or error]
            if (!(nodeLine.toLowerCase().startsWith("<node>")))
                throw new IOException("Unrecognised node format at line " + (reader.getLineNumber() + 1) + ": "
                    + nodeLine);

            // Read THE CONTENT! [loop until we find "<end node> 5P9i0s8y19Z"]
            currentContent = new StringBuffer();
            while ((currentLine = reader.readLine()) != null
                && !currentLine.equals("<end node> 5P9i0s8y19Z"))
            {
                currentContent.append(currentLine + "\n");
            }

            // Now, having established the content and the title and the depth, we'll create the
            // child
            String content = currentContent.substring(0, Math.max(currentContent.length() - 1, 0));
            newNode = new JreepadNode(titleLine, content);
            // babyNode = new JreepadNode(titleLine, currentContent.substring(0,
            // Math.max(currentContent.length()-2,0)),
            // (JreepadNode)(nodeStack.peek()));

            // Turn it into a HTML-mode node if it matches "<html> ... </html>"
            String compareContent = newNode.getContent().toLowerCase().trim();
            int newArticleMode = (autoDetectHtmlArticles && compareContent.startsWith("<html>") && compareContent
                .endsWith("</html>")) ? JreepadNode.ARTICLEMODE_HTML : JreepadNode.ARTICLEMODE_ORDINARY;
            newNode.setArticleMode(newArticleMode);

            if (depthLine.equals("0"))
            {
                rootNode = newNode;
            }
            else
            {
                depthMarker = Integer.parseInt(depthLine);
                while (nodeStack.size() > depthMarker)
                    nodeStack.pop();

                ((JreepadNode)(nodeStack.peek())).add(newNode);
            }
            nodeStack.push(newNode);
        }
        return rootNode;
    }

    public boolean isAutoDetectHtmlArticles()
    {
        return autoDetectHtmlArticles;
    }

    public void setAutoDetectHtmlArticles(boolean autoDetectHtmlArticles)
    {
        this.autoDetectHtmlArticles = autoDetectHtmlArticles;
    }

    public String getEncoding()
    {
        return encoding;
    }

    public void setEncoding(String encoding)
    {
        this.encoding = encoding;
    }

    public int getFileFormat()
    {
        return fileFormat;
    }

    public void setFileFormat(int fileFormat)
    {
        this.fileFormat = fileFormat;
    }
}

[Jreepad-CVS] jreepad/src/jreepad/io XmlReader.java, NONE, 1.1 TreepadReader.java, NONE, 1.1 AutoDe

[Jreepad-CVS] jreepad/src/jreepad/io XmlReader.java, NONE, 1.1 TreepadReader.java, NONE, 1.1 AutoDetectReader.java, NONE, 1.1 JreepadReader.java, 1.1, 1.2