[Htmlparser-cvs] htmlparser/src/org/htmlparser Parser.java,1.65,1.66
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-10-13 21:48:50
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv16902 Modified Files: Parser.java Log Message: Eliminated ParserHelper static class. Add fixAttributes() to handle bad tags. Provide for more than just an equals sign between the attribute name and the value. Unquote the values in getAttributes() hashtable. Fixed a bug regarding factory creation in script scanner. Returned temporaryFailures classes to servicability. Skip JSP testing, fix tests broken because of unquoted attribute values. Some JavaDoc cleanup. Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.65 retrieving revision 1.66 diff -C2 -d -r1.65 -r1.66 *** Parser.java 3 Oct 2003 00:20:44 -0000 1.65 --- Parser.java 13 Oct 2003 21:48:11 -0000 1.66 *************** *** 28,43 **** package org.htmlparser; ! ////////////////// ! // Java Imports // ! ////////////////// ! import java.io.BufferedInputStream; import java.io.IOException; - import java.io.InputStream; - import java.io.InputStreamReader; - import java.io.ObjectInputStream; - import java.io.ObjectOutputStream; import java.io.Serializable; ! import java.io.StringReader; ! import java.io.UnsupportedEncodingException; import java.net.URLConnection; import java.util.HashMap; --- 28,37 ---- package org.htmlparser; ! ! import java.io.File; import java.io.IOException; import java.io.Serializable; ! import java.net.MalformedURLException; ! import java.net.URL; import java.net.URLConnection; import java.util.HashMap; *************** *** 45,56 **** import java.util.Map; import java.util.Vector; ! import org.htmlparser.RemarkNode; ! import org.htmlparser.StringNode; import org.htmlparser.lexer.Lexer; import org.htmlparser.lexer.Page; import org.htmlparser.lexer.nodes.NodeFactory; import org.htmlparser.lexer.nodes.TagNode; ! ! import org.htmlparser.parserHelper.ParserHelper; import org.htmlparser.scanners.AppletScanner; import org.htmlparser.scanners.BodyScanner; --- 39,51 ---- import java.util.Map; import java.util.Vector; ! ! import org.htmlparser.Node; import org.htmlparser.lexer.Lexer; import org.htmlparser.lexer.Page; import org.htmlparser.lexer.nodes.NodeFactory; import org.htmlparser.lexer.nodes.TagNode; ! import org.htmlparser.nodeDecorators.DecodingNode; ! import org.htmlparser.nodeDecorators.EscapeCharacterRemovingNode; ! import org.htmlparser.nodeDecorators.NonBreakingSpaceConvertingNode; import org.htmlparser.scanners.AppletScanner; import org.htmlparser.scanners.BodyScanner; *************** *** 76,79 **** --- 71,75 ---- import org.htmlparser.util.DefaultParserFeedback; import org.htmlparser.util.IteratorImpl; + import org.htmlparser.util.LinkProcessor; import org.htmlparser.util.NodeIterator; import org.htmlparser.util.NodeList; *************** *** 326,330 **** public Parser(String resourceLocn, ParserFeedback feedback) throws ParserException { ! this (ParserHelper.openConnection (resourceLocn, feedback), feedback); } --- 322,326 ---- public Parser(String resourceLocn, ParserFeedback feedback) throws ParserException { ! this (openConnection (resourceLocn, feedback), feedback); } *************** *** 419,423 **** { if ((null != url) && !"".equals (url)) ! setConnection (ParserHelper.openConnection (url, getFeedback ())); } --- 415,419 ---- { if ((null != url) && !"".equals (url)) ! setConnection (openConnection (url, getFeedback ())); } *************** *** 520,533 **** } - public TagScanner getPreviousOpenScanner () - { - return (mScanner); - } - - public void setPreviousOpenScanner (TagScanner scanner) - { - mScanner = scanner; - } - // // Public methods --- 516,519 ---- *************** *** 583,592 **** public NodeIterator elements() throws ParserException { - return (createIteratorImpl ()); - } - - public IteratorImpl createIteratorImpl() - throws ParserException - { boolean remove_scanner; Node node; --- 569,572 ---- *************** *** 783,786 **** --- 763,856 ---- /** + * Opens a connection using the given url. + * @param url The url to open. + * @param feedback The ibject to use for messages or <code>null</code>. + * @exception ParserException if an i/o exception occurs accessing the url. + */ + public static URLConnection openConnection (URL url, ParserFeedback feedback) + throws + ParserException + { + URLConnection ret; + + try + { + ret = url.openConnection (); + } + catch (IOException ioe) + { + String msg = "HTMLParser.openConnection() : Error in opening a connection to " + url.toExternalForm (); + ParserException ex = new ParserException (msg, ioe); + if (null != feedback) + feedback.error (msg, ex); + throw ex; + } + + return (ret); + } + + /** + * Opens a connection based on a given string. + * The string is either a file, in which case <code>file://localhost</code> + * is prepended to a canonical path derived from the string, or a url that + * begins with one of the known protocol strings, i.e. <code>http://</code>. + * Embedded spaces are silently converted to %20 sequences. + * @param string The name of a file or a url. + * @param feedback The object to use for messages or <code>null</code> for no feedback. + * @exception ParserException if the string is not a valid url or file. + */ + public static URLConnection openConnection (String string, ParserFeedback feedback) + throws + ParserException + { + final String prefix = "file://localhost"; + String resource; + URL url; + StringBuffer buffer; + URLConnection ret; + + try + { + url = new URL (LinkProcessor.fixSpaces (string)); + ret = openConnection (url, feedback); + } + catch (MalformedURLException murle) + { // try it as a file + try + { + File file = new File (string); + resource = file.getCanonicalPath (); + buffer = new StringBuffer (prefix.length () + resource.length ()); + buffer.append (prefix); + if (!resource.startsWith ("/")) + buffer.append ("/"); + buffer.append (resource); + url = new URL (LinkProcessor.fixSpaces (buffer.toString ())); + ret = openConnection (url, feedback); + if (null != feedback) + feedback.info (url.toExternalForm ()); + } + catch (MalformedURLException murle2) + { + String msg = "HTMLParser.openConnection() : Error in opening a connection to " + string; + ParserException ex = new ParserException (msg, murle2); + if (null != feedback) + feedback.error (msg, ex); + throw ex; + } + catch (IOException ioe) + { + String msg = "HTMLParser.openConnection() : Error in opening a connection to " + string; + ParserException ex = new ParserException (msg, ioe); + if (null != feedback) + feedback.error (msg, ex); + throw ex; + } + } + + return (ret); + } + + /** * The main program, which can be executed from the command line */ *************** *** 917,921 **** public Node createStringNode (Lexer lexer, int start, int end) { ! return (new StringNode (lexer.getPage (), start, end)); } --- 987,1004 ---- public Node createStringNode (Lexer lexer, int start, int end) { ! Node ret; ! ! ret = new StringNode (lexer.getPage (), start, end); ! if (null != stringNodeFactory) ! { ! if (stringNodeFactory.shouldDecodeNodes ()) ! ret = new DecodingNode (ret); ! if (stringNodeFactory.shouldRemoveEscapeCharacters ()) ! ret = new EscapeCharacterRemovingNode (ret); ! if (stringNodeFactory.shouldConvertNonBreakingSpace ()) ! ret = new NonBreakingSpaceConvertingNode (ret); ! } ! ! return (ret); } *************** *** 933,936 **** --- 1016,1021 ---- /** * Create a new tag node. + * This recurses into the created tag by calling the tag's scanner, + * if it is in the list of registered scanners. * @param lexer The lexer parsing this tag. * @param start The beginning position of the tag. *************** *** 953,960 **** name = ret.getTagName (); scanner = (TagScanner)scanners.get (name); ! save = getPreviousOpenScanner (); if ((null != scanner) && scanner.evaluate (ret.getText (), save)) { ! setPreviousOpenScanner (scanner); try { --- 1038,1045 ---- name = ret.getTagName (); scanner = (TagScanner)scanners.get (name); ! save = mScanner; if ((null != scanner) && scanner.evaluate (ret.getText (), save)) { ! mScanner = scanner; try { *************** *** 963,967 **** finally { ! setPreviousOpenScanner (save); } } --- 1048,1052 ---- finally { ! mScanner = save; } } |