Thread: [Htmlparser-cvs] htmlparser/src/org/htmlparser/sax Attributes.java,NONE,1.1 Feedback.java,NONE,1.1 L
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2004-07-14 01:58:15
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/sax In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv11717/src/org/htmlparser/sax Added Files: Attributes.java Feedback.java Locator.java XMLReader.java package.html Log Message: Implement rudimentary sax parser. Currently exposes DOM parser via sax project (http://sourceforge.net/projects/sax) interfaces. --- NEW FILE: Locator.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2004 Derrick Oswald // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/sax/Locator.java,v $ // $Author: derrickoswald $ // $Date: 2004/07/14 01:58:02 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.sax; import org.htmlparser.Parser; import org.htmlparser.lexer.Lexer; /** * Transforms character offsets into line and column in the HTML file. */ public class Locator implements org.xml.sax.Locator { /** * Underlying parser object. */ protected Parser mParser; /** * Creates a locator for the given parser. * @param parser The parser with the {@link org.htmlparser.lexer.Page Page} being accessed. */ public Locator (Parser parser) { mParser = parser; } /** * Return the public identifier for the current document event. * * <p>The return value is the public identifier of the document * entity or of the external parsed entity in which the markup * triggering the event appears.</p> * * @return A string containing the public identifier, or * null if none is available. * @see #getSystemId */ public String getPublicId () { return (null); // I assume this would be <title></title> } /** * Return the system identifier for the current document event. * * <p>The return value is the system identifier of the document * entity or of the external parsed entity in which the markup * triggering the event appears.</p> * * <p>If the system identifier is a URL, the parser must resolve it * fully before passing it to the application. For example, a file * name must always be provided as a <em>file:...</em> URL, and other * kinds of relative URI are also resolved against their bases.</p> * * @return A string containing the system identifier, or null * if none is available. * @see #getPublicId */ public String getSystemId () { return (mParser.getURL ()); } /** * Return the line number where the current document event ends. * Lines are delimited by line ends, which are defined in * the XML specification. * * <p><strong>Warning:</strong> The return value from the method * is intended only as an approximation for the sake of diagnostics; * it is not intended to provide sufficient information * to edit the character content of the original XML document. * In some cases, these "line" numbers match what would be displayed * as columns, and in others they may not match the source text * due to internal entity expansion. </p> * * <p>The return value is an approximation of the line number * in the document entity or external parsed entity where the * markup triggering the event appears.</p> * * <p>If possible, the SAX driver should provide the line position * of the first character after the text associated with the document * event. The first line is line 1.</p> * * @return The line number, or -1 if none is available. * @see #getColumnNumber */ public int getLineNumber () { Lexer lexer; lexer = mParser.getLexer (); return (lexer.getPage ().row (lexer.getCursor ())); } /** * Return the column number where the current document event ends. * This is one-based number of Java <code>char</code> values since * the last line end. * * <p><strong>Warning:</strong> The return value from the method * is intended only as an approximation for the sake of diagnostics; * it is not intended to provide sufficient information * to edit the character content of the original XML document. * For example, when lines contain combining character sequences, wide * characters, surrogate pairs, or bi-directional text, the value may * not correspond to the column in a text editor's display. </p> * * <p>The return value is an approximation of the column number * in the document entity or external parsed entity where the * markup triggering the event appears.</p> * * <p>If possible, the SAX driver should provide the line position * of the first character after the text associated with the document * event. The first column in each line is column 1.</p> * * @return The column number, or -1 if none is available. * @see #getLineNumber */ public int getColumnNumber () { Lexer lexer; lexer = mParser.getLexer (); return (lexer.getPage ().column (lexer.getCursor ())); } } --- NEW FILE: package.html --- <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> <html> <head> <!-- HTMLParser Library $Name: $ - A java-based parser for HTML http://sourceforge.org/projects/htmlparser Copyright (C) 2004 Derrick Oswald Revision Control Information $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/sax/package.html,v $ $Author: derrickoswald $ $Date: 2004/07/14 01:58:02 $ $Revision: 1.1 $ This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --> </head> <body bgcolor="white"> The sax package implements a SAX (Simple API for XML) parser for HTML. It uses the SAX 2 interfaces available from the <A href="http://sourceforge.net/projects/sax/">sax</A> project.<br> The HTML parser sax package is currently in it's infancy and just exposes the DOM Parser via a SAX interface. The driver name is "org.htmlparser.sax.XMLReader" and a simplistic test program is available in the org.htmlparser.tests package as SAXTest.java.<br> Some major pieces are missing, like namespace support (HTML files won't generally have much in the way of namespaces), attribute type info, resolvers and DTD handlers, among many other things.<br> </body> </html> --- NEW FILE: XMLReader.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2004 Derrick Oswald // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/sax/XMLReader.java,v $ // $Author: derrickoswald $ // $Date: 2004/07/14 01:58:02 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.sax; import java.io.IOException; import org.xml.sax.ContentHandler; import org.xml.sax.DTDHandler; import org.xml.sax.EntityResolver; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXNotRecognizedException; import org.xml.sax.SAXNotSupportedException; import org.xml.sax.SAXParseException; import org.xml.sax.helpers.NamespaceSupport; import org.htmlparser.Node; import org.htmlparser.Parser; import org.htmlparser.Remark; import org.htmlparser.Tag; import org.htmlparser.Text; import org.htmlparser.util.DefaultParserFeedback; import org.htmlparser.util.NodeIterator; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; import org.htmlparser.util.ParserFeedback; /** * SAX parser. * Generates callbacks on the {@link ContentHandler} based on encountered nodes. * <br><em>Preliminary</em>. * <pre> * org.xml.sax.XMLReader reader = org.xml.sax.helpers.XMLReaderFactory.createXMLReader ("org.htmlparser.sax.XMLReader"); * org.xml.sax.ContentHandler content = new MyContentHandler (); * reader.setContentHandler (content); * org.xml.sax.ErrorHandler errors = new MyErrorHandler (); * reader.setErrorHandler (errors); * reader.parse ("http://cbc.ca"); * </pre> */ public class XMLReader implements org.xml.sax.XMLReader { /** * Determines if namespace handling is on. * All XMLReaders are required to recognize the feature names: * <ul> * <li><code>http://xml.org/sax/features/namespaces</code> - * a value of "true" indicates namespace URIs and unprefixed * local names for element and attribute names will be available</li> * <li><code>http://xml.org/sax/features/namespace-prefixes</code> - * a value of "true" indicates that XML qualified names (with * prefixes) and attributes (including xmlns* attributes) will * be available. * </ul> */ protected boolean mNameSpaces; // namespaces /** * Determines if namespace prefix handling is on. * @see #mNameSpaces */ protected boolean mNameSpacePrefixes; // namespace-prefixes /** * <em> not implemented</em> */ protected EntityResolver mEntityResolver; /** * <em> not implemented</em> */ protected DTDHandler mDTDHandler; /** * The content callback object. */ protected ContentHandler mContentHandler; /** * The error handler object. */ protected ErrorHandler mErrorHandler; /** * The underlying DOM parser. */ protected Parser mParser; /** * Namspace utility object. */ protected NamespaceSupport mSupport; /** * Qualified name parts. */ protected String mParts[]; /** * Create an SAX parser. */ public XMLReader () { mNameSpaces = true; mNameSpacePrefixes = false; mEntityResolver = null; mDTDHandler = null; mContentHandler = null; mErrorHandler = null; mSupport = new NamespaceSupport (); mSupport.pushContext (); mSupport.declarePrefix ("", "http://www.w3.org/TR/REC-html40"); // todo: // xmlns:html='http://www.w3.org/TR/REC-html40' // or xmlns:html='http://www.w3.org/1999/xhtml' mParts = new String[3]; } //////////////////////////////////////////////////////////////////// // Configuration. //////////////////////////////////////////////////////////////////// /** * Look up the value of a feature flag. * * <p>The feature name is any fully-qualified URI. It is * possible for an XMLReader to recognize a feature name but * temporarily be unable to return its value. * Some feature values may be available only in specific * contexts, such as before, during, or after a parse. * Also, some feature values may not be programmatically accessible. * (In the case of an adapter for SAX1 {@link Parser}, there is no * implementation-independent way to expose whether the underlying * parser is performing validation, expanding external entities, * and so forth.) </p> * * <p>All XMLReaders are required to recognize the * http://xml.org/sax/features/namespaces and the * http://xml.org/sax/features/namespace-prefixes feature names.</p> * * <p>Typical usage is something like this:</p> * * <pre> * XMLReader r = new MySAXDriver(); * * // try to activate validation * try { * r.setFeature("http://xml.org/sax/features/validation", true); * } catch (SAXException e) { * System.err.println("Cannot activate validation."); * } * * // register event handlers * r.setContentHandler(new MyContentHandler()); * r.setErrorHandler(new MyErrorHandler()); * * // parse the first document * try { * r.parse("http://www.foo.com/mydoc.xml"); * } catch (IOException e) { * System.err.println("I/O exception reading XML document"); * } catch (SAXException e) { * System.err.println("XML exception reading document."); * } * </pre> * * <p>Implementors are free (and encouraged) to invent their own features, * using names built on their own URIs.</p> * * @param name The feature name, which is a fully-qualified URI. * @return The current value of the feature (true or false). * @exception org.xml.sax.SAXNotRecognizedException If the feature * value can't be assigned or retrieved. * @exception org.xml.sax.SAXNotSupportedException When the * XMLReader recognizes the feature name but * cannot determine its value at this time. * @see #setFeature */ public boolean getFeature (String name) throws SAXNotRecognizedException, SAXNotSupportedException { boolean ret; if (name.equals ("http://xml.org/sax/features/namespaces")) ret = mNameSpaces; else if (name.equals ("http://xml.org/sax/features/namespace-prefixes")) ret = mNameSpacePrefixes; else throw new SAXNotSupportedException (name + " not yet understood"); return (ret); } /** * Set the value of a feature flag. * * <p>The feature name is any fully-qualified URI. It is * possible for an XMLReader to expose a feature value but * to be unable to change the current value. * Some feature values may be immutable or mutable only * in specific contexts, such as before, during, or after * a parse.</p> * * <p>All XMLReaders are required to support setting * http://xml.org/sax/features/namespaces to true and * http://xml.org/sax/features/namespace-prefixes to false.</p> * * @param name The feature name, which is a fully-qualified URI. * @param value The requested value of the feature (true or false). * @exception org.xml.sax.SAXNotRecognizedException If the feature * value can't be assigned or retrieved. * @exception org.xml.sax.SAXNotSupportedException When the * XMLReader recognizes the feature name but * cannot set the requested value. * @see #getFeature */ public void setFeature (String name, boolean value) throws SAXNotRecognizedException, SAXNotSupportedException { if (name.equals ("http://xml.org/sax/features/namespaces")) mNameSpaces = value; else if (name.equals ("http://xml.org/sax/features/namespace-prefixes")) mNameSpacePrefixes = value; else throw new SAXNotSupportedException (name + " not yet understood"); } /** * Look up the value of a property. * * <p>The property name is any fully-qualified URI. It is * possible for an XMLReader to recognize a property name but * temporarily be unable to return its value. * Some property values may be available only in specific * contexts, such as before, during, or after a parse.</p> * * <p>XMLReaders are not required to recognize any specific * property names, though an initial core set is documented for * SAX2.</p> * * <p>Implementors are free (and encouraged) to invent their own properties, * using names built on their own URIs.</p> * * @param name The property name, which is a fully-qualified URI. * @return The current value of the property. * @exception org.xml.sax.SAXNotRecognizedException If the property * value can't be assigned or retrieved. * @exception org.xml.sax.SAXNotSupportedException When the * XMLReader recognizes the property name but * cannot determine its value at this time. * @see #setProperty */ public Object getProperty (String name) throws SAXNotRecognizedException, SAXNotSupportedException { throw new SAXNotSupportedException (name + " not yet understood"); } /** * Set the value of a property. * * <p>The property name is any fully-qualified URI. It is * possible for an XMLReader to recognize a property name but * to be unable to change the current value. * Some property values may be immutable or mutable only * in specific contexts, such as before, during, or after * a parse.</p> * * <p>XMLReaders are not required to recognize setting * any specific property names, though a core set is defined by * SAX2.</p> * * <p>This method is also the standard mechanism for setting * extended handlers.</p> * * @param name The property name, which is a fully-qualified URI. * @param value The requested value for the property. * @exception org.xml.sax.SAXNotRecognizedException If the property * value can't be assigned or retrieved. * @exception org.xml.sax.SAXNotSupportedException When the * XMLReader recognizes the property name but * cannot set the requested value. */ public void setProperty (String name, Object value) throws SAXNotRecognizedException, SAXNotSupportedException { throw new SAXNotSupportedException (name + " not yet understood"); } //////////////////////////////////////////////////////////////////// // Event handlers. //////////////////////////////////////////////////////////////////// /** * Allow an application to register an entity resolver. * * <p>If the application does not register an entity resolver, * the XMLReader will perform its own default resolution.</p> * * <p>Applications may register a new or different resolver in the * middle of a parse, and the SAX parser must begin using the new * resolver immediately.</p> * * @param resolver The entity resolver. * @see #getEntityResolver */ public void setEntityResolver (EntityResolver resolver) { mEntityResolver = resolver; } /** * Return the current entity resolver. * * @return The current entity resolver, or null if none * has been registered. * @see #setEntityResolver */ public EntityResolver getEntityResolver () { return (mEntityResolver); } /** * Allow an application to register a DTD event handler. * * <p>If the application does not register a DTD handler, all DTD * events reported by the SAX parser will be silently ignored.</p> * * <p>Applications may register a new or different handler in the * middle of a parse, and the SAX parser must begin using the new * handler immediately.</p> * * @param handler The DTD handler. * @see #getDTDHandler */ public void setDTDHandler (DTDHandler handler) { mDTDHandler = handler; } /** * Return the current DTD handler. * * @return The current DTD handler, or null if none * has been registered. * @see #setDTDHandler */ public DTDHandler getDTDHandler () { return (mDTDHandler); } /** * Allow an application to register a content event handler. * * <p>If the application does not register a content handler, all * content events reported by the SAX parser will be silently * ignored.</p> * * <p>Applications may register a new or different handler in the * middle of a parse, and the SAX parser must begin using the new * handler immediately.</p> * * @param handler The content handler. * @see #getContentHandler */ public void setContentHandler (ContentHandler handler) { mContentHandler = handler; } /** * Return the current content handler. * * @return The current content handler, or null if none * has been registered. * @see #setContentHandler */ public ContentHandler getContentHandler () { return (mContentHandler); } /** * Allow an application to register an error event handler. * * <p>If the application does not register an error handler, all * error events reported by the SAX parser will be silently * ignored; however, normal processing may not continue. It is * highly recommended that all SAX applications implement an * error handler to avoid unexpected bugs.</p> * * <p>Applications may register a new or different handler in the * middle of a parse, and the SAX parser must begin using the new * handler immediately.</p> * * @param handler The error handler. * @see #getErrorHandler */ public void setErrorHandler (ErrorHandler handler) { mErrorHandler = handler; } /** * Return the current error handler. * * @return The current error handler, or null if none * has been registered. * @see #setErrorHandler */ public ErrorHandler getErrorHandler () { return (mErrorHandler); } //////////////////////////////////////////////////////////////////// // Parsing. //////////////////////////////////////////////////////////////////// /** * Parse an XML document. * * <p>The application can use this method to instruct the XML * reader to begin parsing an XML document from any valid input * source (a character stream, a byte stream, or a URI).</p> * * <p>Applications may not invoke this method while a parse is in * progress (they should create a new XMLReader instead for each * nested XML document). Once a parse is complete, an * application may reuse the same XMLReader object, possibly with a * different input source. * Configuration of the XMLReader object (such as handler bindings and * values established for feature flags and properties) is unchanged * by completion of a parse, unless the definition of that aspect of * the configuration explicitly specifies other behavior. * (For example, feature flags or properties exposing * characteristics of the document being parsed.) * </p> * * <p>During the parse, the XMLReader will provide information * about the XML document through the registered event * handlers.</p> * * <p>This method is synchronous: it will not return until parsing * has ended. If a client application wants to terminate * parsing early, it should throw an exception.</p> * * @param input The input source for the top-level of the * XML document. * @exception org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * @exception java.io.IOException An IO exception from the parser, * possibly from a byte stream or character stream * supplied by the application. * @see org.xml.sax.InputSource * @see #parse(java.lang.String) * @see #setEntityResolver * @see #setDTDHandler * @see #setContentHandler * @see #setErrorHandler */ public void parse (InputSource input) throws IOException, SAXException { throw new SAXException ("parse (InputSource input) is not yet supported"); } /** * Parse an XML document from a system identifier (URI). * * <p>This method is a shortcut for the common case of reading a * document from a system identifier. It is the exact * equivalent of the following:</p> * * <pre> * parse(new InputSource(systemId)); * </pre> * * <p>If the system identifier is a URL, it must be fully resolved * by the application before it is passed to the parser.</p> * * @param systemId The system identifier (URI). * @exception org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * @exception java.io.IOException An IO exception from the parser, * possibly from a byte stream or character stream * supplied by the application. * @see #parse(org.xml.sax.InputSource) */ public void parse (String systemId) throws IOException, SAXException { Locator locator; ParserFeedback feedback; if (null != mContentHandler) try { mParser = new Parser (systemId); locator = new Locator (mParser); if (null != mErrorHandler) feedback = new Feedback (mErrorHandler, locator); else feedback = new DefaultParserFeedback (DefaultParserFeedback.QUIET); mParser.setFeedback (feedback); // OK, try a simplistic parse mContentHandler.setDocumentLocator (locator); try { mContentHandler.startDocument (); for (NodeIterator iterator = mParser.elements (); iterator.hasMoreNodes (); ) doSAX (iterator.nextNode ()); mContentHandler.endDocument (); } catch (SAXException se) { if (null != mErrorHandler) mErrorHandler.fatalError ( new SAXParseException ("contentHandler threw me", locator, se)); } } catch (ParserException pe) { if (null != mErrorHandler) mErrorHandler.fatalError ( new SAXParseException (pe.getMessage (), "", systemId, 0, 0)); } } /** * Process nodes recursively on the DocumentHandler. * Calls methods on the handler based on the type and whether it's an end tag. * Processes composite tags recursively. * Does rudimentary namespace processing according to the state of {@link #mNameSpaces} * and {@link #mNameSpacePrefixes}. * @param node The htmlparser node to traverse. */ protected void doSAX (Node node) throws ParserException, SAXException { Tag tag; Tag end; if (node instanceof Remark) { String text = mParser.getLexer ().getPage ().getText (node.getStartPosition (), node.getEndPosition ()); mContentHandler.ignorableWhitespace (text.toCharArray (), 0, text.length ()); } else if (node instanceof Text) { String text = mParser.getLexer ().getPage ().getText (node.getStartPosition (), node.getEndPosition ()); mContentHandler.characters (text.toCharArray (), 0, text.length ()); } else if (node instanceof Tag) { tag = (Tag)node; if (mNameSpaces) mSupport.processName (tag.getTagName (), mParts, false); else { mParts[0] = ""; mParts[1] = ""; } if (mNameSpacePrefixes) mParts[2] = tag.getTagName (); else if (mNameSpaces) mParts[2] = ""; else mParts[2] = tag.getTagName (); mContentHandler.startElement ( mParts[0], // uri mParts[1], // local mParts[2], // raw new Attributes (tag, mSupport, mParts)); NodeList children = tag.getChildren (); if (null != children) for (int i = 0; i < children.size (); i++) doSAX (children.elementAt (i)); end = tag.getEndTag (); if (null != end) { if (mNameSpaces) mSupport.processName (end.getTagName (), mParts, false); else { mParts[0] = ""; mParts[1] = ""; } if (mNameSpacePrefixes) mParts[2] = end.getTagName (); else if (mNameSpaces) mParts[2] = ""; else mParts[2] = end.getTagName (); mContentHandler.endElement ( mParts[0], // uri mParts[1], // local mParts[2]); // raw } } } } --- NEW FILE: Feedback.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2004 Derrick Oswald // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/sax/Feedback.java,v $ // $Author: derrickoswald $ // $Date: 2004/07/14 01:58:02 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.sax; import org.xml.sax.ErrorHandler; import org.xml.sax.Locator; import org.xml.sax.SAXParseException; import org.htmlparser.util.ParserException; import org.htmlparser.util.ParserFeedback; import org.xml.sax.SAXException; /** * Mediates between the feedback mechanism of the htmlparser and an error handler. */ public class Feedback implements ParserFeedback { /** * The error handler to call back on. */ protected ErrorHandler mErrorHandler; /** * The locator for tag positions. */ protected Locator mLocator; /** * Create a feedback/error handler mediator. * @param handler The callback object. * @param locator A locator for error locations. */ public Feedback (ErrorHandler handler, Locator locator) { mErrorHandler = handler; mLocator = locator; } /** * <em>Just eats the info message.</em> * @param message {@inheritDoc} */ public void info (String message) { // swallow } /** * Calls {@link ErrorHandler#warning(SAXParseException) ErrorHandler.warning}. * @param message {@inheritDoc} */ public void warning (String message) { try { mErrorHandler.warning ( new SAXParseException (message, mLocator)); } catch (SAXException se) { se.printStackTrace (); } } /** * Calls {@link ErrorHandler#error(SAXParseException) ErrorHandler.error}. * @param message {@inheritDoc} */ public void error (String message, ParserException e) { try { mErrorHandler.error ( new SAXParseException (message, mLocator, e)); } catch (SAXException se) { se.printStackTrace (); } } } --- NEW FILE: Attributes.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2004 Derrick Oswald // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/sax/Attributes.java,v $ // $Author: derrickoswald $ // $Date: 2004/07/14 01:58:02 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.sax; import java.util.Vector; import org.htmlparser.Attribute; import org.htmlparser.Tag; import org.xml.sax.helpers.NamespaceSupport; /** * Provides access to the tag attributes. */ public class Attributes implements org.xml.sax.Attributes { /** * The tag from which attributes are exposed. */ protected Tag mTag; /** * The utility class that converts namespaces. */ protected NamespaceSupport mSupport; /** * Elements of the qname. * Allocated once for all uses of {@link #mSupport}. */ protected String[] mParts; /** * Create an attibute access object. * @param tag The tag to expose. * @param support The namespace converter. * @param parts The elements of the qualified name. */ public Attributes (Tag tag, NamespaceSupport support, String[] parts) { mTag = tag; mSupport = support; mParts = parts; } //////////////////////////////////////////////////////////////////// // Indexed access. //////////////////////////////////////////////////////////////////// /** * Return the number of attributes in the list. * * <p>Once you know the number of attributes, you can iterate * through the list.</p> * * @return The number of attributes in the list. * @see #getURI(int) * @see #getLocalName(int) * @see #getQName(int) * @see #getType(int) * @see #getValue(int) */ public int getLength () { return (mTag.getAttributesEx ().size () - 1); } /** * Look up an attribute's Namespace URI by index. * * @param index The attribute index (zero-based). * @return The Namespace URI, or the empty string if none * is available, or null if the index is out of * range. * @see #getLength */ public String getURI (int index) { mSupport.processName (getQName (index), mParts, true); return (mParts[0]); } /** * Look up an attribute's local name by index. * * @param index The attribute index (zero-based). * @return The local name, or the empty string if Namespace * processing is not being performed, or null * if the index is out of range. * @see #getLength */ public String getLocalName (int index) { mSupport.processName (getQName (index), mParts, true); return (mParts[1]); } /** * Look up an attribute's XML qualified (prefixed) name by index. * * @param index The attribute index (zero-based). * @return The XML qualified name, or the empty string * if none is available, or null if the index * is out of range. * @see #getLength */ public String getQName (int index) { Attribute attribute; String ret; attribute = (Attribute)(mTag.getAttributesEx ().elementAt (index + 1)); if (attribute.isWhitespace ()) ret = "#text"; else ret = attribute.getName (); return (ret); } /** * Look up an attribute's type by index. * * <p>The attribute type is one of the strings "CDATA", "ID", * "IDREF", "IDREFS", "NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", * or "NOTATION" (always in upper case).</p> * * <p>If the parser has not read a declaration for the attribute, * or if the parser does not report attribute types, then it must * return the value "CDATA" as stated in the XML 1.0 Recommendation * (clause 3.3.3, "Attribute-Value Normalization").</p> * * <p>For an enumerated attribute that is not a notation, the * parser will report the type as "NMTOKEN".</p> * * @param index The attribute index (zero-based). * @return The attribute's type as a string, or null if the * index is out of range. * @see #getLength */ public String getType (int index) { return ("CDATA"); } /** * Look up an attribute's value by index. * * <p>If the attribute value is a list of tokens (IDREFS, * ENTITIES, or NMTOKENS), the tokens will be concatenated * into a single string with each token separated by a * single space.</p> * * @param index The attribute index (zero-based). * @return The attribute's value as a string, or null if the * index is out of range. * @see #getLength */ public String getValue (int index) { Attribute attribute; String ret; attribute = (Attribute)(mTag.getAttributesEx ().elementAt (index + 1)); ret = attribute.getValue (); if (null == ret) ret = ""; return (ret); } //////////////////////////////////////////////////////////////////// // Name-based query. //////////////////////////////////////////////////////////////////// /** * Look up the index of an attribute by Namespace name. * * @param uri The Namespace URI, or the empty string if * the name has no Namespace URI. * @param localName The attribute's local name. * @return The index of the attribute, or -1 if it does not * appear in the list. */ public int getIndex (String uri, String localName) { Vector attributes; int size; Attribute attribute; String string; int ret; ret = -1; attributes = mTag.getAttributesEx (); if (null != attributes) { size = attributes.size (); for (int i = 1; i < size; i++) { attribute = (Attribute)attributes.elementAt (i); string = attribute.getName (); if (null != string) // not whitespace { mSupport.processName (string, mParts, true); if ( uri.equals (mParts[0]) & localName.equalsIgnoreCase (mParts[1])) { ret = i; i = size; // exit fast } } } } return (ret); } /** * Look up the index of an attribute by XML qualified (prefixed) name. * * @param qName The qualified (prefixed) name. * @return The index of the attribute, or -1 if it does not * appear in the list. */ public int getIndex (String qName) { mSupport.processName (qName, mParts, true); return (getIndex (mParts[0], mParts[1])); } /** * Look up an attribute's type by Namespace name. * * <p>See {@link #getType(int) getType(int)} for a description * of the possible types.</p> * * @param uri The Namespace URI, or the empty String if the * name has no Namespace URI. * @param localName The local name of the attribute. * @return The attribute type as a string, or null if the * attribute is not in the list or if Namespace * processing is not being performed. */ public String getType (String uri, String localName) { return (null); } /** * Look up an attribute's type by XML qualified (prefixed) name. * * <p>See {@link #getType(int) getType(int)} for a description * of the possible types.</p> * * @param qName The XML qualified name. * @return The attribute type as a string, or null if the * attribute is not in the list or if qualified names * are not available. */ public String getType (String qName) { return (null); } /** * Look up an attribute's value by Namespace name. * * <p>See {@link #getValue(int) getValue(int)} for a description * of the possible values.</p> * * @param uri The Namespace URI, or the empty String if the * name has no Namespace URI. * @param localName The local name of the attribute. * @return The attribute value as a string, or null if the * attribute is not in the list. */ public String getValue (String uri, String localName) { return (mTag.getAttribute (localName)); } /** * Look up an attribute's value by XML qualified (prefixed) name. * * <p>See {@link #getValue(int) getValue(int)} for a description * of the possible values.</p> * * @param qName The XML qualified name. * @return The attribute value as a string, or null if the * attribute is not in the list or if qualified names * are not available. */ public String getValue (String qName) { mSupport.processName (qName, mParts, true); return (getValue (mParts[0], mParts[1])); } } |