Thread: [Htmlparser-cvs] htmlparser/src/org/htmlparser Parser.java,1.114,1.115
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2006-05-30 02:53:59
|
Update of //cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs5.sourceforge.net:/tmp/cvs-serv18017 Modified Files: Parser.java Log Message: Allow passing HTML in the Parser constructor. So now it allows HTML, a URL or a file name. Index: Parser.java =================================================================== RCS file: //cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.114 retrieving revision 1.115 diff -C2 -d -r1.114 -r1.115 *** Parser.java 27 May 2006 18:43:25 -0000 1.114 --- Parser.java 30 May 2006 02:53:56 -0000 1.115 *************** *** 298,302 **** * it in. * @see #Parser(URLConnection,ParserFeedback) ! * @param resourceLocn Either the URL or the filename (autodetects). * A standard HTTP GET is performed to read the content of the URL. * @param feedback The HTMLParserFeedback object to use when information, --- 298,306 ---- * it in. * @see #Parser(URLConnection,ParserFeedback) ! * @param resource Either a URL, a filename or a string of HTML. ! * The string is considered HTML if the first non-whitespace character ! * is a <. The use of a url or file is autodetected by first attempting ! * to open the resource as a URL, if that fails it is assumed to be a file ! * name. * A standard HTTP GET is performed to read the content of the URL. * @param feedback The HTMLParserFeedback object to use when information, *************** *** 305,313 **** * @throws ParserException If the URL is invalid. */ ! public Parser (String resourceLocn, ParserFeedback feedback) throws ParserException { ! this (getConnectionManager ().openConnection (resourceLocn), feedback); } --- 309,340 ---- * @throws ParserException If the URL is invalid. */ ! public Parser (String resource, ParserFeedback feedback) throws ParserException { ! int length; ! boolean html; ! char ch; ! ! if (null == resource) ! throw new IllegalArgumentException ("resource cannot be null"); ! setFeedback (feedback); ! length = resource.length (); ! html = false; ! for (int i = 0; i < length; i++) ! { ! ch = resource.charAt (i); ! if (!Character.isWhitespace (ch)) ! { ! if ('<' == ch) ! html = true; ! break; ! } ! } ! if (html) ! setLexer (new Lexer (new Page (resource))); ! else ! setLexer (new Lexer (getConnectionManager ().openConnection (resource))); ! setNodeFactory (new PrototypicalNodeFactory ()); } *************** *** 315,325 **** * Creates a Parser object with the location of the resource (URL or file). * A DefaultHTMLParserFeedback object is used for feedback. ! * @param resourceLocn Either the URL or the filename (autodetects). * @throws ParserException If the resourceLocn argument does not resolve * to a valid page or file. */ ! public Parser (String resourceLocn) throws ParserException { ! this (resourceLocn, STDOUT); } --- 342,353 ---- * Creates a Parser object with the location of the resource (URL or file). * A DefaultHTMLParserFeedback object is used for feedback. ! * @param resource Either HTML, a URL or a filename (autodetects). * @throws ParserException If the resourceLocn argument does not resolve * to a valid page or file. + * @see #Parser(string,ParserFeedback) */ ! public Parser (String resource) throws ParserException { ! this (resource, STDOUT); } *************** *** 808,821 **** try { - parser = new Parser (); if (1 < args.length) filter = new TagNameFilter (args[1]); else - { // for a simple dump, use more verbose settings filter = null; parser.setFeedback (Parser.STDOUT); getConnectionManager ().setMonitor (parser); } - parser.setURL (args[0]); System.out.println (parser.parse (filter)); } --- 836,850 ---- try { if (1 < args.length) filter = new TagNameFilter (args[1]); else filter = null; + parser = new Parser (args[0]); + if (1 < args.length) + { + // for a simple dump, use more verbose settings parser.setFeedback (Parser.STDOUT); getConnectionManager ().setMonitor (parser); } System.out.println (parser.parse (filter)); } |