htmlparser-cvs Mailing List for HTML Parser (Page 6)
Brought to you by:
derrickoswald
You can subscribe to this list here.
2003 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(141) |
Jun
(108) |
Jul
(66) |
Aug
(127) |
Sep
(155) |
Oct
(149) |
Nov
(72) |
Dec
(72) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2004 |
Jan
(100) |
Feb
(36) |
Mar
(21) |
Apr
(3) |
May
(87) |
Jun
(28) |
Jul
(84) |
Aug
(5) |
Sep
(14) |
Oct
|
Nov
|
Dec
|
2005 |
Jan
(1) |
Feb
(39) |
Mar
(26) |
Apr
(38) |
May
(14) |
Jun
(10) |
Jul
|
Aug
|
Sep
(13) |
Oct
(8) |
Nov
(10) |
Dec
|
2006 |
Jan
|
Feb
(1) |
Mar
(17) |
Apr
(20) |
May
(28) |
Jun
(24) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2015 |
Jan
|
Feb
|
Mar
(1) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: Derrick O. <der...@us...> - 2005-06-19 12:01:23
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/http In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26937/htmlparser/src/org/htmlparser/http Modified Files: ConnectionManager.java package.html Added Files: HttpHeader.java Log Message: Changes to allow compilation of htmllexer.jar by gcj. Move non-JDK1.1 functionality to HttpHeader class. Unhook NodeList from filters by removing searchFor(cls) - use keepAllNodesThatMatch(new NodeClassFilter(cls)) instead. Include classes missing from closure set. Index: package.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/http/package.html,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** package.html 10 Apr 2005 23:20:43 -0000 1.2 --- package.html 19 Jun 2005 12:01:14 -0000 1.3 *************** *** 44,47 **** --- 44,48 ---- <a href="http://www.ietf.org/rfc/rfc2616.txt">HTTP protocol</a>. Each of these capabilities requires conditioning the HTTP connection. + A HTTP header utility class is also included. <p>The {@link org.htmlparser.http.ConnectionMonitor} interface is a callback mechanism for the ConnectionManager to notify an interested application *************** *** 72,85 **** manager.setUser ("FredB"); manager.setPassword ("holy$cow"); ! // set up an inner class for callbacks ConnectionMonitor monitor = new ConnectionMonitor () { public void preConnect (HttpURLConnection connection) { ! System.out.println (ConnectionManager.getRequestHeader (connection)); } public void postConnect (HttpURLConnection connection) { ! System.out.println (ConnectionManager.getResponseHeader (connection)); } }; --- 73,86 ---- manager.setUser ("FredB"); manager.setPassword ("holy$cow"); ! // set up (an inner class) for callbacks ConnectionMonitor monitor = new ConnectionMonitor () { public void preConnect (HttpURLConnection connection) { ! System.out.println (HttpHeader.getRequestHeader (connection)); } public void postConnect (HttpURLConnection connection) { ! System.out.println (HttpHeader.getResponseHeader (connection)); } }; --- NEW FILE: HttpHeader.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2004 Derrick Oswald // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/http/HttpHeader.java,v $ // $Author: derrickoswald $ // $Date: 2005/06/19 12:01:13 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.http; import java.io.IOException; import java.net.HttpURLConnection; import java.util.Iterator; import java.util.List; import java.util.Map; /** * Utility methods to display HTTP headers. */ public class HttpHeader { /** * Private constructor. * This class is completely static. */ private HttpHeader () { } /** * Gets the request header for the connection. * <em>This header is generated from the contents of the connection * and may not be exactly the same as the request that will be sent.</em> * @param connection The connection to convert into an HTTP request header. * @return The string that would be sent by the HTTP request. */ public static String getRequestHeader (HttpURLConnection connection) { // dump it StringBuffer buffer; Map map; String key; List items; buffer = new StringBuffer (1024); buffer.append (connection.getRequestMethod ()); buffer.append (" "); buffer.append (connection.getURL ()); buffer.append (" HTTP/1.1\n"); map = connection.getRequestProperties (); for (Iterator iter = map.keySet ().iterator (); iter.hasNext (); ) { key = (String)iter.next (); items = (List)map.get (key); buffer.append (key); buffer.append (": "); for (int i = 0; i < items.size (); i++) { if (0 != i) buffer.append (", "); buffer.append (items.get (i)); } buffer.append ("\n"); } return (buffer.toString ()); } /** * Gets the response header for the connection. * Calling this method on an un-connected connection will * generate an error, as will an attempt to get information * from a connected but invalid connection. * <em>This header is generated from the contents of the connection * and may not be exactly the same as the response that was received.</em> * @param conn The connection to convert into an HTTP response header. * @return The string that was sent as the HTTP response. */ public static String getResponseHeader (HttpURLConnection conn) { // dump it StringBuffer buffer; int code; String message; String key; String value; buffer = new StringBuffer (1024); try { code = conn.getResponseCode (); if (-1 != code) { message = conn.getResponseMessage (); buffer.append ("HTTP/1.1 "); buffer.append (code); buffer.append (" "); buffer.append (message); buffer.append ("\n"); for (int i = 0; null != (value = conn.getHeaderField (i)); i++) { key = conn.getHeaderFieldKey (i); if (null != key) { buffer.append (key); buffer.append (": "); buffer.append (value); buffer.append ("\n"); } } } } catch (IOException ioe) { buffer.append (ioe.toString ()); } return (buffer.toString ()); } } Index: ConnectionManager.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/http/ConnectionManager.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** ConnectionManager.java 15 May 2005 11:49:04 -0000 1.4 --- ConnectionManager.java 19 Jun 2005 12:01:13 -0000 1.5 *************** *** 39,45 **** import java.util.Enumeration; import java.util.Hashtable; - import java.util.Iterator; - import java.util.List; - import java.util.Map; import java.util.Properties; import java.util.StringTokenizer; --- 39,42 ---- *************** *** 249,343 **** /** - * Gets the request header for the connection. - * <em>This header is generated from the contents of the connection - * and may not be exactly the same as the request that will be sent.</em> - * @param connection The connection to convert into an HTTP request header. - * @return The string that would be sent by the HTTP request. - */ - public static String getRequestHeader (HttpURLConnection connection) - { - // dump it - StringBuffer buffer; - Map map; - String key; - List items; - - buffer = new StringBuffer (1024); - buffer.append (connection.getRequestMethod ()); - buffer.append (" "); - buffer.append (connection.getURL ()); - buffer.append (" HTTP/1.1\n"); - map = connection.getRequestProperties (); - for (Iterator iter = map.keySet ().iterator (); iter.hasNext (); ) - { - key = (String)iter.next (); - items = (List)map.get (key); - buffer.append (key); - buffer.append (": "); - for (int i = 0; i < items.size (); i++) - { - if (0 != i) - buffer.append (", "); - buffer.append (items.get (i)); - } - buffer.append ("\n"); - } - - return (buffer.toString ()); - } - - /** - * Gets the response header for the connection. - * Calling this method on an un-connected connection will - * generate an error, as will an attempt to get information - * from a connected but invalid connection. - * <em>This header is generated from the contents of the connection - * and may not be exactly the same as the response that was received.</em> - * @param conn The connection to convert into an HTTP response header. - * @return The string that was sent as the HTTP response. - */ - public static String getResponseHeader (HttpURLConnection conn) - { - // dump it - StringBuffer buffer; - int code; - String message; - String key; - String value; - - buffer = new StringBuffer (1024); - try - { - code = conn.getResponseCode (); - if (-1 != code) - { - message = conn.getResponseMessage (); - buffer.append ("HTTP/1.1 "); - buffer.append (code); - buffer.append (" "); - buffer.append (message); - buffer.append ("\n"); - for (int i = 0; null != (value = conn.getHeaderField (i)); i++) - { - key = conn.getHeaderFieldKey (i); - if (null != key) - { - buffer.append (key); - buffer.append (": "); - buffer.append (value); - buffer.append ("\n"); - } - } - } - } - catch (IOException ioe) - { - buffer.append (ioe.toString ()); - } - - return (buffer.toString ()); - } - - /** * Get the current request header properties. * A String-to-String map of header keys and values, --- 246,249 ---- *************** *** 630,634 **** ret.setRequestProperty (key, value); } ! // set the proxy name and password if ((null != getProxyUser ()) --- 536,540 ---- ret.setRequestProperty (key, value); } ! // set the proxy name and password if ((null != getProxyUser ()) |
From: Derrick O. <der...@us...> - 2005-06-19 12:01:22
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26937/htmlparser/src/org/htmlparser/util Modified Files: NodeList.java Log Message: Changes to allow compilation of htmllexer.jar by gcj. Move non-JDK1.1 functionality to HttpHeader class. Unhook NodeList from filters by removing searchFor(cls) - use keepAllNodesThatMatch(new NodeClassFilter(cls)) instead. Include classes missing from closure set. Index: NodeList.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/NodeList.java,v retrieving revision 1.58 retrieving revision 1.59 diff -C2 -d -r1.58 -r1.59 *** NodeList.java 12 Mar 2005 13:39:47 -0000 1.58 --- NodeList.java 19 Jun 2005 12:01:14 -0000 1.59 *************** *** 285,307 **** /** - * Convenience method to search for nodes of the given type non-recursively. - * @param classType The class to search for. - */ - public NodeList searchFor (Class classType) - { - return (searchFor (classType, false)); - } - - /** - * Convenience method to search for nodes of the given type. - * @param classType The class to search for. - * @param recursive If <code>true<code> digs into the children recursively. - */ - public NodeList searchFor (Class classType, boolean recursive) - { - return (extractAllNodesThatMatch (new NodeClassFilter (classType), recursive)); - } - - /** * Utility to apply a visitor to a node list. * Provides for a visitor to modify the contents of a page and get the --- 285,288 ---- |
From: Derrick O. <der...@us...> - 2005-06-19 12:01:22
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26937/htmlparser/src/org/htmlparser Modified Files: Parser.java Log Message: Changes to allow compilation of htmllexer.jar by gcj. Move non-JDK1.1 functionality to HttpHeader class. Unhook NodeList from filters by removing searchFor(cls) - use keepAllNodesThatMatch(new NodeClassFilter(cls)) instead. Include classes missing from closure set. Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.106 retrieving revision 1.107 diff -C2 -d -r1.106 -r1.107 *** Parser.java 14 Jun 2005 10:37:34 -0000 1.106 --- Parser.java 19 Jun 2005 12:01:13 -0000 1.107 *************** *** 35,38 **** --- 35,39 ---- import org.htmlparser.http.ConnectionManager; import org.htmlparser.http.ConnectionMonitor; + import org.htmlparser.http.HttpHeader; import org.htmlparser.lexer.Lexer; import org.htmlparser.lexer.Page; *************** *** 740,744 **** ParserException { ! getFeedback ().info (ConnectionManager.getRequestHeader (connection)); } --- 741,745 ---- ParserException { ! getFeedback ().info (HttpHeader.getRequestHeader (connection)); } *************** *** 755,759 **** ParserException { ! getFeedback ().info (ConnectionManager.getResponseHeader (connection)); } --- 756,760 ---- ParserException { ! getFeedback ().info (HttpHeader.getResponseHeader (connection)); } |
From: Derrick O. <der...@us...> - 2005-06-19 12:01:21
|
Update of /cvsroot/htmlparser/htmlparser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26937/htmlparser Modified Files: build.xml Log Message: Changes to allow compilation of htmllexer.jar by gcj. Move non-JDK1.1 functionality to HttpHeader class. Unhook NodeList from filters by removing searchFor(cls) - use keepAllNodesThatMatch(new NodeClassFilter(cls)) instead. Include classes missing from closure set. Index: build.xml =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/build.xml,v retrieving revision 1.78 retrieving revision 1.79 diff -C2 -d -r1.78 -r1.79 *** build.xml 24 Apr 2005 17:48:05 -0000 1.78 --- build.xml 19 Jun 2005 12:01:13 -0000 1.79 *************** *** 270,273 **** --- 270,274 ---- <include name="org/htmlparser/Node.java"/> <include name="org/htmlparser/NodeFactory.java"/> + <include name="org/htmlparser/NodeFilter.java"/> <include name="org/htmlparser/Remark.java"/> <include name="org/htmlparser/Tag.java"/> *************** *** 282,285 **** --- 283,287 ---- <include name="org/htmlparser/util/SimpleNodeIterator.java"/> <include name="org/htmlparser/util/sort/**/*.java"/> + <include name="org/htmlparser/visitors/NodeVisitor.java"/> <include name="org/htmlparser/parserHelper/SpecialHashtable.class"/> </javac> *************** *** 308,311 **** --- 310,314 ---- <include name="org/htmlparser/Node.class"/> <include name="org/htmlparser/NodeFactory.class"/> + <include name="org/htmlparser/NodeFilter.class"/> <include name="org/htmlparser/Remark.class"/> <include name="org/htmlparser/Tag.class"/> *************** *** 324,327 **** --- 327,331 ---- <include name="org/htmlparser/util/EncodingChangeException.class"/> <include name="org/htmlparser/util/sort/**/*.class"/> + <include name="org/htmlparser/visitors/NodeVisitor.class"/> <include name="org/htmlparser/parserHelper/SpecialHashtable.class"/> <manifest> |
From: Derrick O. <der...@us...> - 2005-06-14 10:37:50
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3209/src/org/htmlparser Modified Files: Parser.java Log Message: Update version to 1.5-20050614 Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.105 retrieving revision 1.106 diff -C2 -d -r1.105 -r1.106 *** Parser.java 24 Apr 2005 17:48:27 -0000 1.105 --- Parser.java 14 Jun 2005 10:37:34 -0000 1.106 *************** *** 125,129 **** */ public static final String ! VERSION_TYPE = "Integration Build" ; --- 125,129 ---- */ public static final String ! VERSION_TYPE = "Release Build" ; *************** *** 132,136 **** */ public static final String ! VERSION_DATE = "Mar 13, 2005" ; --- 132,136 ---- */ public static final String ! VERSION_DATE = "Jun 14, 2005" ; |
From: Derrick O. <der...@us...> - 2005-06-14 10:37:50
|
Update of /cvsroot/htmlparser/htmlparser/docs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3209/docs Modified Files: release.txt changes.txt Log Message: Update version to 1.5-20050614 Index: release.txt =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/docs/release.txt,v retrieving revision 1.69 retrieving revision 1.70 diff -C2 -d -r1.69 -r1.70 *** release.txt 6 Apr 2005 10:28:01 -0000 1.69 --- release.txt 14 Jun 2005 10:37:33 -0000 1.70 *************** *** 1,3 **** ! HTMLParser Version 1.5 (Integration Build Mar 13, 2005) ********************************************* --- 1,3 ---- ! HTMLParser Version 1.5 (Release Build Jun 14, 2005) ********************************************* *************** *** 29,35 **** New APIs Implement rudimentary sax parser. Currently exposes DOM parser via sax project ! A new http package is added, the primary class being Connectionmanager which ! handles proxies, passwords and cookies. Some testing still needed. ! Also removed some line separator cruft. Added parseCDATA to the Lexer, used in script and style scanners. Note that this is significantly new behaviour that now adheres to appendix --- 29,35 ---- New APIs Implement rudimentary sax parser. Currently exposes DOM parser via sax project ! A new http package is added, the primary class being Connectionmanager which ! handles proxies, passwords and cookies. Some testing still needed. ! Also removed some line separator cruft. Added parseCDATA to the Lexer, used in script and style scanners. Note that this is significantly new behaviour that now adheres to appendix *************** *** 41,51 **** Updated the logo and included the LGPL license. Fixed the Windows batch files. ! Added optional "classes" property to build.xml. This directory is where class files are put. It defaults to src. To use: ant -Dclasses=classdir <target> where classdir is/will-be a peer directory to src. Refactoring ! Added static STRICT flag to ScriptScanner to revert to legacy handling of broken ETAGO (</). If STRICT is true, scan according to HTML specification, else if false, scan with quote smart state machine which heuristically --- 41,52 ---- Updated the logo and included the LGPL license. Fixed the Windows batch files. ! Added optional "classes" property to build.xml. This directory is where class files are put. It defaults to src. To use: ant -Dclasses=classdir <target> where classdir is/will-be a peer directory to src. + Fixed various end user experience issues. Refactoring ! Added static STRICT flag to ScriptScanner to revert to legacy handling of broken ETAGO (</). If STRICT is true, scan according to HTML specification, else if false, scan with quote smart state machine which heuristically *************** *** 66,72 **** --- 67,75 ---- Incorporate patch #1004985 Page.java, by making getCharset() and findCharset() static. Incorporated some speed optimizations based on profiling. + Deprecated node decorators. Filters Added CssSelectorNodeFilter and RegExFilter. Added the filter builder tool. + Added link pattern filters LinkRegexFilter and LinkStringFilter. Enhancement Requests Index: changes.txt =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/docs/changes.txt,v retrieving revision 1.205 retrieving revision 1.206 diff -C2 -d -r1.205 -r1.206 *** changes.txt 13 Mar 2005 15:36:08 -0000 1.205 --- changes.txt 14 Jun 2005 10:37:33 -0000 1.206 *************** *** 16,19 **** --- 16,241 ---- ******************************************************************************* + Release Build 1.5 - 20050614 + -------------------------------- + + 2005-05-15 07:49 derrickoswald + + * resources/htmlparser_checks.xml, + src/org/htmlparser/Attribute.java, + src/org/htmlparser/NodeFactory.java, + src/org/htmlparser/NodeFilter.java, src/org/htmlparser/Remark.java, + src/org/htmlparser/Tag.java, src/org/htmlparser/Text.java, + src/org/htmlparser/beans/BeanyBaby.java, + src/org/htmlparser/beans/FilterBean.java, + src/org/htmlparser/beans/HTMLLinkBean.java, + src/org/htmlparser/beans/HTMLTextBean.java, + src/org/htmlparser/beans/LinkBean.java, + src/org/htmlparser/beans/StringBean.java, + src/org/htmlparser/filters/CssSelectorNodeFilter.java, + src/org/htmlparser/filters/HasAttributeFilter.java, + src/org/htmlparser/filters/HasChildFilter.java, + src/org/htmlparser/filters/HasParentFilter.java, + src/org/htmlparser/filters/HasSiblingFilter.java, + src/org/htmlparser/filters/IsEqualFilter.java, + src/org/htmlparser/filters/LinkRegexFilter.java, + src/org/htmlparser/filters/LinkStringFilter.java, + src/org/htmlparser/filters/NodeClassFilter.java, + src/org/htmlparser/filters/NotFilter.java, + src/org/htmlparser/filters/OrFilter.java, + src/org/htmlparser/filters/RegexFilter.java, + src/org/htmlparser/filters/StringFilter.java, + src/org/htmlparser/filters/TagNameFilter.java, + src/org/htmlparser/http/ConnectionManager.java, + src/org/htmlparser/http/Cookie.java, + src/org/htmlparser/lexer/Cursor.java, + src/org/htmlparser/lexer/InputStreamSource.java, + src/org/htmlparser/lexer/Lexer.java, + src/org/htmlparser/lexer/Page.java, + src/org/htmlparser/lexer/PageAttribute.java, + src/org/htmlparser/lexer/PageIndex.java, + src/org/htmlparser/lexer/Source.java, + src/org/htmlparser/lexer/Stream.java, + src/org/htmlparser/lexer/StringSource.java, + src/org/htmlparser/scanners/ScriptDecoder.java, + src/org/htmlparser/tests/lexerTests/KitTest.java, + src/org/htmlparser/tests/lexerTests/LexerTests.java, + src/org/htmlparser/tests/lexerTests/PageTests.java, + src/org/htmlparser/tests/lexerTests/TagTests.java, + src/org/htmlparser/tests/tagTests/InputTagTest.java, + src/org/htmlparser/tests/utilTests/SortTest.java, + src/org/htmlparser/util/ParserUtils.java: + + Documentation revamp part four. + Remove some checkstyle warnings. + + 2005-05-13 06:44 derrickoswald + + * docs/contributors.html, src/org/htmlparser/sax/XMLReader.java: + + Add parse(InputSource) suggested by Jamie McCrindle. + + 2005-05-10 18:11 derrickoswald + + * src/org/htmlparser/tests/tagTests/SelectTagTest.java: + + Remove Shamil's email address. + + 2005-04-24 13:48 derrickoswald + + * build.xml, docs/main.html, lib/checkstyle-all-3.1.jar, + lib/fit.jar, resources/htmlparser_checks.xml, + src/doc-files/building.html, src/doc-files/overview.html, + src/doc-files/using.html, src/org/htmlparser/Node.java, + src/org/htmlparser/Parser.java, + src/org/htmlparser/PrototypicalNodeFactory.java, + src/org/htmlparser/tags/package.html, + src/org/htmlparser/tests/ParserTest.java, + src/org/htmlparser/visitors/NodeVisitor.java: + + Documentation revamp part three. + Reworked some JavaDoc descriptions. + Added "HTML Parser for dummies" introductory text. + Removed checkstyle.jar and fit.jar (and it's cruft). + + 2005-04-12 07:27 derrickoswald + + * src/org/htmlparser/: Attribute.java, beans/package.html, + lexer/Cursor.java, lexer/InputStreamSource.java, lexer/Lexer.java, + lexer/Page.java, lexer/PageAttribute.java, lexer/Source.java, + lexer/Stream.java, lexer/StringSource.java, lexer/package.html, + lexerapplications/thumbelina/PicturePanel.java, + parserapplications/LinkExtractor.java, + parserapplications/SiteCapturer.java, + parserapplications/StringExtractor.java, + parserapplications/WikiCapturer.java, + parserapplications/package.html, + parserapplications/filterbuilder/Filter.java, + parserapplications/filterbuilder/FilterBuilder.java, + parserapplications/filterbuilder/HtmlTreeCellRenderer.java, + parserapplications/filterbuilder/HtmlTreeModel.java, + parserapplications/filterbuilder/SubFilterList.java, + parserapplications/filterbuilder/layouts/NullLayoutManager.java, + parserapplications/filterbuilder/layouts/VerticalLayoutManager.java, + parserapplications/filterbuilder/wrappers/AndFilterWrapper.java, + parserapplications/filterbuilder/wrappers/HasAttributeFilterWrapper.java, + parserapplications/filterbuilder/wrappers/HasChildFilterWrapper.java, + parserapplications/filterbuilder/wrappers/HasParentFilterWrapper.java, + parserapplications/filterbuilder/wrappers/HasSiblingFilterWrapper.java, + parserapplications/filterbuilder/wrappers/NodeClassFilterWrapper.java, + parserapplications/filterbuilder/wrappers/NotFilterWrapper.java, + parserapplications/filterbuilder/wrappers/OrFilterWrapper.java, + parserapplications/filterbuilder/wrappers/RegexFilterWrapper.java, + parserapplications/filterbuilder/wrappers/StringFilterWrapper.java, + parserapplications/filterbuilder/wrappers/TagNameFilterWrapper.java, + sax/Feedback.java, sax/XMLReader.java: + + Documentation revamp part two. + + 2005-04-10 19:20 derrickoswald + + * bin/beanybaby.bat, bin/beanybaby.cmd, bin/filterbuilder.bat, + bin/filterbuilder.cmd, bin/lexer.bat, bin/lexer.cmd, + bin/linkextractor.bat, bin/linkextractor.cmd, bin/parser.bat, + bin/parser.cmd, bin/sitecapturer, bin/sitecapturer.cmd, + bin/stringextractor.bat, bin/stringextractor.cmd, + bin/thumbelina.bat, bin/thumbelina.cmd, bin/translate.bat, + bin/translate.cmd, src/org/htmlparser/Attribute.java, + src/org/htmlparser/Node.java, src/org/htmlparser/NodeFactory.java, + src/org/htmlparser/PrototypicalNodeFactory.java, + src/org/htmlparser/Remark.java, + src/org/htmlparser/StringNodeFactory.java, + src/org/htmlparser/Tag.java, src/org/htmlparser/Text.java, + src/org/htmlparser/beans/BeanyBaby.java, + src/org/htmlparser/beans/FilterBean.java, + src/org/htmlparser/beans/HTMLLinkBean.java, + src/org/htmlparser/beans/HTMLTextBean.java, + src/org/htmlparser/beans/LinkBean.java, + src/org/htmlparser/beans/StringBean.java, + src/org/htmlparser/beans/package.html, + src/org/htmlparser/filters/AndFilter.java, + src/org/htmlparser/filters/CssSelectorNodeFilter.java, + src/org/htmlparser/filters/HasAttributeFilter.java, + src/org/htmlparser/filters/HasChildFilter.java, + src/org/htmlparser/filters/HasParentFilter.java, + src/org/htmlparser/filters/HasSiblingFilter.java, + src/org/htmlparser/filters/LinkRegexFilter.java, + src/org/htmlparser/filters/LinkStringFilter.java, + src/org/htmlparser/filters/NodeClassFilter.java, + src/org/htmlparser/filters/NotFilter.java, + src/org/htmlparser/filters/OrFilter.java, + src/org/htmlparser/filters/RegexFilter.java, + src/org/htmlparser/filters/TagNameFilter.java, + src/org/htmlparser/http/ConnectionManager.java, + src/org/htmlparser/http/ConnectionMonitor.java, + src/org/htmlparser/http/Cookie.java, + src/org/htmlparser/http/package.html, + src/org/htmlparser/nodeDecorators/AbstractNodeDecorator.java, + src/org/htmlparser/nodeDecorators/DecodingNode.java, + src/org/htmlparser/nodeDecorators/EscapeCharacterRemovingNode.java, + src/org/htmlparser/nodeDecorators/NonBreakingSpaceConvertingNode.java, + src/org/htmlparser/nodeDecorators/package.html, + src/org/htmlparser/nodes/AbstractNode.java, + src/org/htmlparser/nodes/RemarkNode.java, + src/org/htmlparser/nodes/TagNode.java, + src/org/htmlparser/nodes/TextNode.java, + src/org/htmlparser/nodes/package.html, + src/org/htmlparser/parserapplications/filterbuilder/FilterBuilder.java, + src/org/htmlparser/scanners/CompositeTagScanner.java, + src/org/htmlparser/tags/BaseHrefTag.java, + src/org/htmlparser/tags/BodyTag.java, + src/org/htmlparser/tags/CompositeTag.java, + src/org/htmlparser/tags/DoctypeTag.java, + src/org/htmlparser/tags/FormTag.java, + src/org/htmlparser/tags/FrameSetTag.java, + src/org/htmlparser/tags/FrameTag.java, + src/org/htmlparser/tags/HeadTag.java, + src/org/htmlparser/tags/ImageTag.java, + src/org/htmlparser/tags/JspTag.java, + src/org/htmlparser/tags/LabelTag.java, + src/org/htmlparser/tags/LinkTag.java, + src/org/htmlparser/tags/MetaTag.java, + src/org/htmlparser/tags/OptionTag.java, + src/org/htmlparser/tags/ScriptTag.java, + src/org/htmlparser/tags/SelectTag.java, + src/org/htmlparser/tags/TableRow.java, + src/org/htmlparser/tags/TableTag.java, + src/org/htmlparser/tags/TextareaTag.java, + src/org/htmlparser/tags/TitleTag.java, + src/org/htmlparser/tags/package.html, + src/org/htmlparser/tests/lexerTests/KitTest.java, + src/org/htmlparser/tests/lexerTests/LexerTests.java: + + Documentation revamp part one. + Deprecated node decorators. + Added doSemanticAction for Text and Comment nodes. + Added missing sitecapturer scripts. + Fixed DOS batch files to work when called from any location. + + 2005-04-06 06:27 derrickoswald + + * build.xml, docs/release.txt, docs/samples.html: + + End user experience issues: + remove multiple wiki files in zip + fix sample application links + change readme.txt to use Windows line endings + change copyright date + + 2005-04-06 06:20 derrickoswald + + * docs/contributors.html, + src/org/htmlparser/filters/LinkRegexFilter.java, + src/org/htmlparser/filters/LinkStringFilter.java: + + Add link pattern filters submitted by John Derrick. + + 2005-04-04 20:48 derrickoswald + + * src/org/htmlparser/: NodeFilter.java, Parser.java, package.html, + parserapplications/SiteCapturer.java: + + Update javadocs. + Enable SiteCapturer to handle resource names containing spaces. + Integration Build 1.5 - 20050313 -------------------------------- |
From: Derrick O. <der...@us...> - 2005-06-14 10:37:50
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3209/src/org/htmlparser/filters Modified Files: AndFilter.java Log Message: Update version to 1.5-20050614 Index: AndFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/AndFilter.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** AndFilter.java 10 Apr 2005 23:20:43 -0000 1.3 --- AndFilter.java 14 Jun 2005 10:37:33 -0000 1.4 *************** *** 34,39 **** */ public class AndFilter ! implements ! NodeFilter { /** --- 34,39 ---- */ public class AndFilter ! implements ! NodeFilter { /** *************** *** 54,58 **** /** ! * Creates a new instance of an AndFilter that accepts nodes acceptable to both filters. * @param left One filter. * @param right The other filter. --- 54,58 ---- /** ! * Creates an AndFilter that accepts nodes acceptable to both filters. * @param left One filter. * @param right The other filter. *************** *** 61,65 **** { NodeFilter[] predicates; ! predicates = new NodeFilter[2]; predicates[0] = left; --- 61,65 ---- { NodeFilter[] predicates; ! predicates = new NodeFilter[2]; predicates[0] = left; *************** *** 76,80 **** return (mPredicates); } ! /** * Set the predicates for this AndFilter. --- 76,80 ---- return (mPredicates); } ! /** * Set the predicates for this AndFilter. *************** *** 101,111 **** { boolean ret; ! ret = true; ! for (int i = 0; ret && (i < mPredicates.length); i++) if (!mPredicates[i].accept (node)) ret = false; ! return (ret); } --- 101,111 ---- { boolean ret; ! ret = true; ! for (int i = 0; ret && (i < mPredicates.length); i++) if (!mPredicates[i].accept (node)) ret = false; ! return (ret); } |
From: Derrick O. <der...@us...> - 2005-05-15 11:50:13
|
Update of /cvsroot/htmlparser/htmlparser/resources In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31674/resources Modified Files: htmlparser_checks.xml Log Message: Documentation revamp part four. Remove some checkstyle warnings. Index: htmlparser_checks.xml =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/resources/htmlparser_checks.xml,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** htmlparser_checks.xml 24 Apr 2005 17:48:25 -0000 1.2 --- htmlparser_checks.xml 15 May 2005 11:49:02 -0000 1.3 *************** *** 136,140 **** <!-- Checks for common coding problems --> <!-- See http://checkstyle.sf.net/config_coding.html --> - <module name="AvoidInlineConditionals"/> <module name="DoubleCheckedLocking"/> <!-- MY FAVOURITE --> <module name="EmptyStatement"/> --- 136,139 ---- |
From: Derrick O. <der...@us...> - 2005-05-15 11:49:15
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31674/src/org/htmlparser/tests/tagTests Modified Files: InputTagTest.java Log Message: Documentation revamp part four. Remove some checkstyle warnings. Index: InputTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/InputTagTest.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** InputTagTest.java 22 May 2004 03:57:31 -0000 1.40 --- InputTagTest.java 15 May 2005 11:49:05 -0000 1.41 *************** *** 122,124 **** } ! } \ No newline at end of file --- 122,124 ---- } ! } |
From: Derrick O. <der...@us...> - 2005-05-15 11:49:15
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/http In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31674/src/org/htmlparser/http Modified Files: ConnectionManager.java Cookie.java Log Message: Documentation revamp part four. Remove some checkstyle warnings. Index: Cookie.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/http/Cookie.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** Cookie.java 10 Apr 2005 23:20:43 -0000 1.2 --- Cookie.java 15 May 2005 11:49:04 -0000 1.3 *************** *** 58,72 **** * Cookies are being standardized by the IETF. This class supports the original * Cookie specification (from Netscape Communications Corp.) as well as the ! * updated <a href="http://www.ietf.org/rfc/rfc2109.txt">RFC 2109</a> specification. */ public class Cookie ! implements ! Cloneable, ! Serializable { ! // ! // from RFC 2068, token special case characters ! // ! private static final String mSpecials = "()<>@,;:\\\"/[]?={} \t"; /** --- 58,74 ---- * Cookies are being standardized by the IETF. This class supports the original * Cookie specification (from Netscape Communications Corp.) as well as the ! * updated <a href="http://www.ietf.org/rfc/rfc2109.txt">RFC 2109</a> ! * specification. */ public class Cookie ! implements ! Cloneable, ! Serializable { ! /** ! * Special characters to watch out for. ! * From RFC 2068, token special case characters. ! */ ! private static final String SPECIALS = "()<>@,;:\\\"/[]?={} \t"; /** *************** *** 330,334 **** } ! /* * Return true iff the string counts as an HTTP/1.1 "token". * Valid tokens cannot have characters outside the ASCII range 0x20-0x7e, --- 332,336 ---- } ! /** * Return true iff the string counts as an HTTP/1.1 "token". * Valid tokens cannot have characters outside the ASCII range 0x20-0x7e, *************** *** 342,346 **** char c; boolean ret; ! ret = true; --- 344,348 ---- char c; boolean ret; ! ret = true; *************** *** 349,353 **** { c = value.charAt (i); ! if (c < 0x20 || c >= 0x7f || mSpecials.indexOf (c) != -1) ret = false; } --- 351,355 ---- { c = value.charAt (i); ! if (c < ' ' || c > '~' || SPECIALS.indexOf (c) != -1) ret = false; } *************** *** 379,383 **** { StringBuffer ret; ! ret = new StringBuffer (50); if (getSecure ()) --- 381,385 ---- { StringBuffer ret; ! ret = new StringBuffer (50); if (getSecure ()) *************** *** 394,398 **** ret.append (" for "); ret.append (getDomain ()); ! if (null != getPath ()) ret.append (getPath ()); --- 396,400 ---- ret.append (" for "); ret.append (getDomain ()); ! if (null != getPath ()) ret.append (getPath ()); *************** *** 422,427 **** ret.append (getComment ()); } ! return (ret.toString ()); } ! } \ No newline at end of file --- 424,429 ---- ret.append (getComment ()); } ! return (ret.toString ()); } ! } Index: ConnectionManager.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/http/ConnectionManager.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** ConnectionManager.java 10 Apr 2005 23:20:43 -0000 1.3 --- ConnectionManager.java 15 May 2005 11:49:04 -0000 1.4 *************** *** 39,42 **** --- 39,43 ---- import java.util.Enumeration; import java.util.Hashtable; + import java.util.Iterator; import java.util.List; import java.util.Map; *************** *** 48,52 **** /** ! * Handles proxies, password protected URLs and request properties including cookies. */ public class ConnectionManager --- 49,54 ---- /** ! * Handles proxies, password protected URLs and request properties ! * including cookies. */ public class ConnectionManager *************** *** 59,81 **** static { ! mDefaultRequestProperties.put ("User-Agent", "HTMLParser/" + org.htmlparser.Parser.VERSION_NUMBER); mDefaultRequestProperties.put ("Accept-Encoding", "gzip, deflate"); } ! /** * Messages for page not there (404). */ ! static private final String[] mFourOhFour = { ! "The web site you seek cannot be located, but countless more exist", ! "You step in the stream, but the water has moved on. This page is not here.", ! "Yesterday the page existed. Today it does not. The internet is like that.", ! "That page was so big. It might have been very useful. But now it is gone.", ! "Three things are certain: death, taxes and broken links. Guess which has occured.", ! "Chaos reigns within. Reflect, repent and enter the correct URL. Order shall return.", ! "Stay the patient course. Of little worth is your ire. The page is not found.", "A non-existant URL reduces your expensive computer to a simple stone.", ! "Many people have visited that page. Today, you are not one of the lucky ones.", ! "Cutting the wind with a knife. Bookmarking a URL. Both are ephemeral.", }; --- 61,93 ---- static { ! mDefaultRequestProperties.put ("User-Agent", "HTMLParser/" ! + org.htmlparser.Parser.VERSION_NUMBER); mDefaultRequestProperties.put ("Accept-Encoding", "gzip, deflate"); } ! /** * Messages for page not there (404). */ ! private static final String[] FOUR_OH_FOUR = { ! "The web site you seek cannot be located," ! + " but countless more exist", ! "You step in the stream, but the water has moved on." ! + " This page is not here.", ! "Yesterday the page existed. Today it does not." ! + " The internet is like that.", ! "That page was so big. It might have been very useful." ! + " But now it is gone.", ! "Three things are certain: death, taxes and broken links." ! + " Guess which has occured.", ! "Chaos reigns within. Reflect, repent and enter the correct URL." ! + " Order shall return.", ! "Stay the patient course. Of little worth is your ire." ! + " The page is not found.", "A non-existant URL reduces your expensive computer to a simple stone.", ! "Many people have visited that page." ! + " Today, you are not one of the lucky ones.", ! "Cutting the wind with a knife. Bookmarking a URL." ! + " Both are ephemeral.", }; *************** *** 83,94 **** * Base 64 character translation table. */ ! static private final char[] mCharacterTable = ! "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); ! /** * Request header fields. */ protected Hashtable mRequestProperties; ! /** * The proxy server name. --- 95,107 ---- * Base 64 character translation table. */ ! private static final char[] BASE64_CHAR_TABLE = ! ("ABCDEFGHIJKLMNOPQRSTUVWXYZ" ! + "abcdefghijklmnopqrstuvwxyz0123456789+/").toCharArray (); ! /** * Request header fields. */ protected Hashtable mRequestProperties; ! /** * The proxy server name. *************** *** 142,146 **** /** * Create a connection manager with the given connection properties. ! * @param properties Name value pairs that are to be added to the HTTP request. */ public ConnectionManager (Hashtable properties) --- 155,159 ---- /** * Create a connection manager with the given connection properties. ! * @param properties Name/value pairs to be added to the HTTP request. */ public ConnectionManager (Hashtable properties) *************** *** 155,159 **** mCookieJar = null; mMonitor = null; - } --- 168,171 ---- *************** *** 186,195 **** * connection is fetched. Setting these request header fields affects all * subsequent connections opened by the parser. For more direct control ! * create a <code>URLConnection</code> massage it the way you want and * then set it on the parser.<p> ! * From <a href="http://www.ietf.org/rfc/rfc2616.txt">RFC 2616 Hypertext Transfer Protocol -- HTTP/1.1</a>: * <pre> * 5.3 Request Header Fields ! * * The request-header fields allow the client to pass additional * information about the request, and about the client itself, to the --- 198,208 ---- * connection is fetched. Setting these request header fields affects all * subsequent connections opened by the parser. For more direct control ! * create a <code>URLConnection</code> massage it the way you want and * then set it on the parser.<p> ! * From <a href="http://www.ietf.org/rfc/rfc2616.txt"> ! * RFC 2616 Hypertext Transfer Protocol -- HTTP/1.1</a>: * <pre> * 5.3 Request Header Fields ! * * The request-header fields allow the client to pass additional * information about the request, and about the client itself, to the *************** *** 197,201 **** * equivalent to the parameters on a programming language method * invocation. ! * * request-header = Accept ; Section 14.1 * | Accept-Charset ; Section 14.2 --- 210,214 ---- * equivalent to the parameters on a programming language method * invocation. ! * * request-header = Accept ; Section 14.1 * | Accept-Charset ; Section 14.2 *************** *** 217,221 **** * | TE ; Section 14.39 * | User-Agent ; Section 14.43 ! * * Request-header field names can be extended reliably only in * combination with a change in the protocol version. However, new or --- 230,234 ---- * | TE ; Section 14.39 * | User-Agent ; Section 14.43 ! * * Request-header field names can be extended reliably only in * combination with a change in the protocol version. However, new or *************** *** 249,253 **** String key; List items; ! buffer = new StringBuffer (1024); buffer.append (connection.getRequestMethod ()); --- 262,266 ---- String key; List items; ! buffer = new StringBuffer (1024); buffer.append (connection.getRequestMethod ()); *************** *** 256,262 **** buffer.append (" HTTP/1.1\n"); map = connection.getRequestProperties (); ! for (java.util.Iterator iterator = map.keySet ().iterator (); iterator.hasNext ();) { ! key = (String)iterator.next (); items = (List)map.get (key); buffer.append (key); --- 269,275 ---- buffer.append (" HTTP/1.1\n"); map = connection.getRequestProperties (); ! for (Iterator iter = map.keySet ().iterator (); iter.hasNext (); ) { ! key = (String)iter.next (); items = (List)map.get (key); buffer.append (key); *************** *** 270,274 **** buffer.append ("\n"); } ! return (buffer.toString ()); } --- 283,287 ---- buffer.append ("\n"); } ! return (buffer.toString ()); } *************** *** 281,288 **** * <em>This header is generated from the contents of the connection * and may not be exactly the same as the response that was received.</em> ! * @param connection The connection to convert into an HTTP response header. * @return The string that was sent as the HTTP response. */ ! public static String getResponseHeader (HttpURLConnection connection) { // dump it --- 294,301 ---- * <em>This header is generated from the contents of the connection * and may not be exactly the same as the response that was received.</em> ! * @param conn The connection to convert into an HTTP response header. * @return The string that was sent as the HTTP response. */ ! public static String getResponseHeader (HttpURLConnection conn) { // dump it *************** *** 296,320 **** try { ! code = connection.getResponseCode (); ! if (-1 != code) ! { ! message = connection.getResponseMessage (); ! buffer.append ("HTTP/1.1 "); ! buffer.append (code); ! buffer.append (" "); ! buffer.append (message); ! buffer.append ("\n"); ! for (int i = 0; null != (value = connection.getHeaderField (i)); i++) ! { ! key = connection.getHeaderFieldKey (i); ! if (null != key) ! { ! buffer.append (key); ! buffer.append (": "); ! buffer.append (value); ! buffer.append ("\n"); ! } ! } ! } } catch (IOException ioe) --- 309,333 ---- try { ! code = conn.getResponseCode (); ! if (-1 != code) ! { ! message = conn.getResponseMessage (); ! buffer.append ("HTTP/1.1 "); ! buffer.append (code); ! buffer.append (" "); ! buffer.append (message); ! buffer.append ("\n"); ! for (int i = 0; null != (value = conn.getHeaderField (i)); i++) ! { ! key = conn.getHeaderFieldKey (i); ! if (null != key) ! { ! buffer.append (key); ! buffer.append (": "); ! buffer.append (value); ! buffer.append ("\n"); ! } ! } ! } } catch (IOException ioe) *************** *** 380,384 **** return (mProxyPort); } ! /** * Set the proxy port number. --- 393,397 ---- return (mProxyPort); } ! /** * Set the proxy port number. *************** *** 520,524 **** break; } ! else if (path.startsWith (probe.getPath ())) { cookies.insertElementAt (cookie, j); --- 533,537 ---- break; } ! else if (path.startsWith (probe.getPath ())) { cookies.insertElementAt (cookie, j); *************** *** 534,538 **** mCookieJar.put (domain, cookies); } ! } --- 547,551 ---- mCookieJar.put (domain, cookies); } ! } *************** *** 576,579 **** --- 589,594 ---- String port2 = null; // old http.proxyPort value HttpURLConnection http; + String auth; + String encoded; URLConnection ret; *************** *** 583,661 **** { // set up for proxy ! if ((null != getProxyHost ()) && (0 != getProxyPort ())) ! { ! sysprops = System.getProperties (); ! set = (String)sysprops.put ("proxySet", "true"); ! host = (String)sysprops.put ("proxyHost", getProxyHost ()); ! port = (String)sysprops.put ("proxyPort", Integer.toString (getProxyPort ())); ! // see http://java.sun.com/j2se/1.4.2/docs/guide/net/properties.html ! host2 = (String)sysprops.put ("http.proxyHost", getProxyHost ()); ! port2 = (String)sysprops.put ("http.proxyPort", Integer.toString (getProxyPort ())); ! System.setProperties (sysprops); ! ! } ! ! // open the connection... but don't connect yet ! ret = url.openConnection (); ! if (ret instanceof HttpURLConnection) ! { ! http = (HttpURLConnection)ret; ! ! // set the fixed request properties ! properties = getRequestProperties (); ! if (null != properties) ! for (enumeration = properties.keys (); enumeration.hasMoreElements ();) ! { ! key = (String)enumeration.nextElement (); ! value = (String)properties.get (key); ! ret.setRequestProperty (key, value); ! } ! ! // set the proxy name and password ! if ((null != getProxyUser ()) && (null != getProxyPassword ())) ! { ! String authorization = getProxyUser () + ":" + getProxyPassword (); ! String encodedauthorization = encode (authorization.getBytes("ISO-8859-1")); ! ret.setRequestProperty ("Proxy-Authorization", encodedauthorization); ! } ! ! // set the URL name and password ! if ((null != getUser ()) && (null != getPassword ())) ! { ! String authorization = getUser () + ":" + getPassword (); ! String encodedauthorization = encode (authorization.getBytes("ISO-8859-1")); ! ret.setRequestProperty ("Authorization", "Basic " + encodedauthorization); ! } ! ! // set the cookies based on the url ! addCookies (ret); ! if (null != getMonitor ()) ! getMonitor ().preConnect (http); ! } ! else ! http = null; ! try ! { ! ret.connect (); ! ! if (null != http) ! { ! if (null != getMonitor ()) ! getMonitor ().postConnect (http); ! ! parseCookies (ret); ! } ! } ! catch (UnknownHostException uhe) ! { ! int message = (int)(Math.random () * mFourOhFour.length); ! throw new ParserException (mFourOhFour[message], uhe); ! } ! catch (IOException ioe) ! { ! throw new ParserException (ioe.getMessage (), ioe); ! } } finally --- 598,682 ---- { // set up for proxy ! if ((null != getProxyHost ()) && (0 != getProxyPort ())) ! { ! sysprops = System.getProperties (); ! set = (String)sysprops.put ("proxySet", "true"); ! host = (String)sysprops.put ("proxyHost", getProxyHost ()); ! port = (String)sysprops.put ("proxyPort", ! Integer.toString (getProxyPort ())); ! // see http://java.sun.com/j2se/1.4.2/docs/guide/net/properties.html ! host2 = (String)sysprops.put ("http.proxyHost", ! getProxyHost ()); ! port2 = (String)sysprops.put ("http.proxyPort", ! Integer.toString (getProxyPort ())); ! System.setProperties (sysprops); ! ! } ! ! // open the connection... but don't connect yet ! ret = url.openConnection (); ! if (ret instanceof HttpURLConnection) ! { ! http = (HttpURLConnection)ret; ! ! // set the fixed request properties ! properties = getRequestProperties (); ! if (null != properties) ! for (enumeration = properties.keys (); ! enumeration.hasMoreElements ();) ! { ! key = (String)enumeration.nextElement (); ! value = (String)properties.get (key); ! ret.setRequestProperty (key, value); ! } ! ! // set the proxy name and password ! if ((null != getProxyUser ()) ! && (null != getProxyPassword ())) ! { ! auth = getProxyUser () + ":" + getProxyPassword (); ! encoded = encode (auth.getBytes("ISO-8859-1")); ! ret.setRequestProperty ("Proxy-Authorization", encoded); ! } ! ! // set the URL name and password ! if ((null != getUser ()) && (null != getPassword ())) ! { ! auth = getUser () + ":" + getPassword (); ! encoded = encode (auth.getBytes("ISO-8859-1")); ! ret.setRequestProperty ("Authorization", ! "Basic " + encoded); ! } ! ! // set the cookies based on the url ! addCookies (ret); ! if (null != getMonitor ()) ! getMonitor ().preConnect (http); ! } ! else ! http = null; ! try ! { ! ret.connect (); ! ! if (null != http) ! { ! if (null != getMonitor ()) ! getMonitor ().postConnect (http); ! ! parseCookies (ret); ! } ! } ! catch (UnknownHostException uhe) ! { ! int message = (int)(Math.random () * FOUR_OH_FOUR.length); ! throw new ParserException (FOUR_OH_FOUR[message], uhe); ! } ! catch (IOException ioe) ! { ! throw new ParserException (ioe.getMessage (), ioe); ! } } finally *************** *** 664,669 **** { sysprops = System.getProperties (); ! if (null != set) ! sysprops.put ("proxySet", set); else sysprops.remove ("proxySet"); --- 685,690 ---- { sysprops = System.getProperties (); ! if (null != set) ! sysprops.put ("proxySet", set); else sysprops.remove ("proxySet"); *************** *** 684,688 **** else sysprops.remove ("http.proxyPort"); ! System.setProperties (sysprops); } } --- 705,709 ---- else sysprops.remove ("http.proxyPort"); ! System.setProperties (sysprops); } } *************** *** 690,694 **** catch (IOException ioe) { ! String msg = "HTMLParser.openConnection() : Error in opening a connection to " + url.toExternalForm (); ParserException ex = new ParserException (msg, ioe); throw ex; --- 711,716 ---- catch (IOException ioe) { ! String msg = "Error in opening a connection to " ! + url.toExternalForm (); ParserException ex = new ParserException (msg, ioe); throw ex; *************** *** 698,707 **** } ! /** ! * Encodes a byte array into BASE64 in accordance with <a href="http://www.faqs.org/rfcs/rfc2045.html">RFC 2045</a>. ! * @param array The bytes to convert. ! * @return A BASE64 encoded string. ! */ ! public final static String encode (byte[] array) { int last; // last byte --- 720,730 ---- } ! /** ! * Encodes a byte array into BASE64 in accordance with ! * <a href="http://www.faqs.org/rfcs/rfc2045.html">RFC 2045</a>. ! * @param array The bytes to convert. ! * @return A BASE64 encoded string. ! */ ! public final static String encode (byte[] array) { int last; // last byte *************** *** 716,751 **** int n; // byte to encode int index; // index into output array ! String ret; if ((null != array) && (0 != array.length)) { ! last = array.length - 1; ! count = (last / 3 + 1) << 2; ! separators = (count - 1) / 76; ! length = count + separators; ! encoded = new char[length]; ! index = 0; ! separators = 0; ! for (int i = 0; i <= last; i += 3) ! { ! left = last - i; ! end = (left > 1 ? 2 : left); ! ! // collect 1 to 3 bytes to encode ! block = 0; ! r = 16; ! for (int j = 0; j <= end; j++) ! { ! n = array[i + j]; ! block += (n < 0 ? n + 256 : n) << r; ! r -= 8; ! } ! ! // encode into 2-4 chars padding with '=' if no data left ! encoded[index++] = mCharacterTable[(block >>> 18) & 0x3f]; ! encoded[index++] = mCharacterTable[(block >>> 12) & 0x3f]; ! encoded[index++] = left > 0 ? mCharacterTable[(block >>> 6) & 0x3f] : '='; ! encoded[index++] = left > 1 ? mCharacterTable[block & 0x3f] : '='; ! if ((0 == (index - separators) % 76) && (index < length)) { --- 739,778 ---- int n; // byte to encode int index; // index into output array ! String ret; if ((null != array) && (0 != array.length)) { ! last = array.length - 1; ! count = (last / 3 + 1) << 2; ! separators = (count - 1) / 76; ! length = count + separators; ! encoded = new char[length]; ! index = 0; ! separators = 0; ! for (int i = 0; i <= last; i += 3) ! { ! left = last - i; ! end = (left > 1 ? 2 : left); ! ! // collect 1 to 3 bytes to encode ! block = 0; ! r = 16; ! for (int j = 0; j <= end; j++) ! { ! n = array[i + j]; ! block += (n < 0 ? n + 256 : n) << r; ! r -= 8; ! } ! ! // encode into 2-4 chars padding with '=' if no data left ! encoded[index++] = BASE64_CHAR_TABLE[(block >>> 18) & 0x3f]; ! encoded[index++] = BASE64_CHAR_TABLE[(block >>> 12) & 0x3f]; ! encoded[index++] = left > 0 ? ! BASE64_CHAR_TABLE[(block >>> 6) & 0x3f] : ! '='; ! encoded[index++] = left > 1 ? ! BASE64_CHAR_TABLE[block & 0x3f] : ! '='; ! if ((0 == (index - separators) % 76) && (index < length)) { *************** *** 753,758 **** separators += 1; } ! } ! ret = new String (encoded); } else --- 780,785 ---- separators += 1; } ! } ! ret = new String (encoded); } else *************** *** 764,768 **** /** * Turn spaces into %20. ! * ToDo: make this more generic (see RFE #1010593 provide URL encoding/decoding utilities). * @param url The url containing spaces. * @return The URL with spaces as %20 sequences. --- 791,796 ---- /** * Turn spaces into %20. ! * ToDo: make this more generic ! * (see RFE #1010593 provide URL encoding/decoding utilities). * @param url The url containing spaces. * @return The URL with spaces as %20 sequences. *************** *** 826,830 **** File file = new File (string); resource = file.getCanonicalPath (); ! buffer = new StringBuffer (prefix.length () + resource.length ()); buffer.append (prefix); if (!resource.startsWith ("/")) --- 854,859 ---- File file = new File (string); resource = file.getCanonicalPath (); ! buffer = new StringBuffer (prefix.length () ! + resource.length ()); buffer.append (prefix); if (!resource.startsWith ("/")) *************** *** 836,840 **** catch (MalformedURLException murle2) { ! String msg = "HTMLParser.openConnection() : Error in opening a connection to " + string; ParserException ex = new ParserException (msg, murle2); throw ex; --- 865,869 ---- catch (MalformedURLException murle2) { ! String msg = "Error in opening a connection to " + string; ParserException ex = new ParserException (msg, murle2); throw ex; *************** *** 842,846 **** catch (IOException ioe) { ! String msg = "HTMLParser.openConnection() : Error in opening a connection to " + string; ParserException ex = new ParserException (msg, ioe); throw ex; --- 871,875 ---- catch (IOException ioe) { ! String msg = "Error in opening a connection to " + string; ParserException ex = new ParserException (msg, ioe); throw ex; *************** *** 880,902 **** if (null != mCookieJar) { ! list = null; ! // get the site from the URL ! url = connection.getURL (); ! host = url.getHost (); ! path = url.getPath (); ! if (0 == path.length ()) ! path = "/"; ! if (null != host) ! { // http://www.objectsdevelopment.com/portal/modules/freecontent/content/javawebserver.html ! list = addCookies ((Vector)mCookieJar.get (host), path, list); ! domain = getDomain (host); ! if (null != domain) ! list = addCookies ((Vector)mCookieJar.get (domain), path, list); ! else ! // maybe it is the domain we're accessing ! list = addCookies ((Vector)mCookieJar.get ("." + host), path, list); ! } ! if (null != list) ! connection.setRequestProperty ("Cookie", generateCookieProperty (list)); } } --- 909,934 ---- if (null != mCookieJar) { ! list = null; ! // get the site from the URL ! url = connection.getURL (); ! host = url.getHost (); ! path = url.getPath (); ! if (0 == path.length ()) ! path = "/"; ! if (null != host) ! { // http://www.objectsdevelopment.com/portal/modules/freecontent/content/javawebserver.html ! list = addCookies ((Vector)mCookieJar.get (host), path, list); ! domain = getDomain (host); ! if (null != domain) ! list = addCookies ((Vector)mCookieJar.get (domain), ! path, list); ! else ! // maybe it is the domain we're accessing ! list = addCookies ((Vector)mCookieJar.get ("." + host), ! path, list); ! } ! if (null != list) ! connection.setRequestProperty ("Cookie", ! generateCookieProperty (list)); } } *************** *** 974,980 **** if (ok) { ! // so take everything after the first token ! server = tokenizer.nextToken (); ! length = server.length (); ret = host.substring (length); } --- 1006,1012 ---- if (ok) { ! // so take everything after the first token ! server = tokenizer.nextToken (); ! length = server.length (); ret = host.substring (length); } *************** *** 985,991 **** /** ! * Creates the cookie request property value from the list of valid cookies for the domain. * @param cookies The list of valid cookies to be encoded in the request. ! * @return A string suitable for inclusion as the value of the "Cookie:" request property. */ protected String generateCookieProperty (Vector cookies) --- 1017,1025 ---- /** ! * Creates the cookie request property value from the list of ! * valid cookies for the domain. * @param cookies The list of valid cookies to be encoded in the request. ! * @return A string suitable for inclusion as the value of ! * the "Cookie:" request property. */ protected String generateCookieProperty (Vector cookies) *************** *** 1001,1005 **** version = 0; for (int i = 0; i < cookies.size (); i++) ! version = Math.max (version, ((Cookie)cookies.elementAt (i)).getVersion ()); if (0 != version) { --- 1035,1040 ---- version = 0; for (int i = 0; i < cookies.size (); i++) ! version = Math.max (version, ! ((Cookie)cookies.elementAt (i)).getVersion ()); if (0 != version) { *************** *** 1085,1089 **** { cookie = null; ! continue; } --- 1120,1124 ---- { cookie = null; ! continue; } *************** *** 1111,1159 **** else { ! if (key.equals ("expires")) // Wdy, DD-Mon-YY HH:MM:SS GMT ! { ! String comma = tokenizer.nextToken (); ! String rest = tokenizer.nextToken (); ! SimpleDateFormat format = new SimpleDateFormat ("EEE, dd-MMM-yy kk:mm:ss z"); ! try ! { ! Date date = format.parse (value + comma + rest); ! cookie.setExpiryDate (date); ! } ! catch (ParseException pe) ! { ! // ok now what ! cookie.setExpiryDate (null); ! } ! } ! else ! if (key.equals ("domain")) ! cookie.setDomain (value); ! else ! if (key.equals ("path")) ! cookie.setPath (value); ! else ! if (key.equals ("secure")) ! cookie.setSecure (true); ! else ! if (key.equals ("comment")) ! cookie.setComment (value); ! else ! if (key.equals ("version")) ! cookie.setVersion (Integer.parseInt (value)); ! else ! if (key.equals ("max-age")) ! { ! Date date = new Date (); ! long then = date.getTime () + Integer.parseInt (value) * 1000; ! date.setTime (then); ! cookie.setExpiryDate (date); ! } ! else ! { // error,? unknown attribute, ! // maybe just another cookie not separated by a comma ! cookie = new Cookie (name, value); ! cookies.addElement (cookie); ! } } } --- 1146,1200 ---- else { ! if (key.equals ("expires")) // Wdy, DD-Mon-YY HH:MM:SS GMT ! { ! String comma = tokenizer.nextToken (); ! String rest = tokenizer.nextToken (); ! SimpleDateFormat format = new SimpleDateFormat ( ! "EEE, dd-MMM-yy kk:mm:ss z"); ! try ! { ! Date date = format.parse (value + comma + rest); ! cookie.setExpiryDate (date); ! } ! catch (ParseException pe) ! { ! // ok now what ! cookie.setExpiryDate (null); ! } ! } ! else ! if (key.equals ("domain")) ! cookie.setDomain (value); ! else ! if (key.equals ("path")) ! cookie.setPath (value); ! else ! if (key.equals ("secure")) ! cookie.setSecure (true); ! else ! if (key.equals ("comment")) ! cookie.setComment (value); ! else ! if (key.equals ("version")) ! cookie.setVersion ( ! Integer.parseInt (value)); ! else ! if (key.equals ("max-age")) ! { ! Date date = new Date (); ! long then = date.getTime () ! + Integer.parseInt (value) ! * 1000; ! date.setTime (then); ! cookie.setExpiryDate (date); ! } ! else ! { // error,? unknown attribute, ! // maybe just another cookie ! // not separated by a comma ! cookie = new Cookie (name, ! value); ! cookies.addElement (cookie); ! } } } |
From: Derrick O. <der...@us...> - 2005-05-15 11:49:15
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31674/src/org/htmlparser/tests/utilTests Modified Files: SortTest.java Log Message: Documentation revamp part four. Remove some checkstyle warnings. Index: SortTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/SortTest.java,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** SortTest.java 31 Jul 2004 16:42:32 -0000 1.12 --- SortTest.java 15 May 2005 11:49:05 -0000 1.13 *************** *** 145,151 **** --- 145,166 ---- if (ret > Integer.MAX_VALUE) ret = Integer.MAX_VALUE; + if (0 == ret) + ret = getAbsolutePath ().hashCode () + - f.getAbsolutePath ().hashCode (); return ((int)ret); } + + public String toString () + { + StringBuffer ret; + + ret = new StringBuffer (128); + ret.append (this.getAbsolutePath ()); + ret.append ('@'); + ret.append (this.lastModified ()); + + return (ret.toString ()); + } } *************** *** 228,232 **** directory.removeElementAt (index); int ordinal = Sort.bsearch (directory, test); ! assertEquals ("ordinal not correct value", index, ordinal); // test the ordering of the objects --- 243,260 ---- directory.removeElementAt (index); int ordinal = Sort.bsearch (directory, test); ! if (index != ordinal) ! { ! for (int i = 0; i < directory.size (); i++) ! { ! if (index == i) ! System.out.print ('-'); ! else if (ordinal == i) ! System.out.print ('+'); ! else ! System.out.print (' '); ! System.out.println (directory.elementAt (i)); ! } ! fail ("ordinal not correct value, expected " + index + ", was " + ordinal); ! } // test the ordering of the objects |
From: Derrick O. <der...@us...> - 2005-05-15 11:49:15
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31674/src/org/htmlparser/util Modified Files: ParserUtils.java Log Message: Documentation revamp part four. Remove some checkstyle warnings. Index: ParserUtils.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/ParserUtils.java,v retrieving revision 1.46 retrieving revision 1.47 diff -C2 -d -r1.46 -r1.47 *** ParserUtils.java 27 Aug 2004 09:54:27 -0000 1.46 --- ParserUtils.java 15 May 2005 11:49:05 -0000 1.47 *************** *** 1243,1245 **** } ! } \ No newline at end of file --- 1243,1245 ---- } ! } |
From: Derrick O. <der...@us...> - 2005-05-15 11:49:14
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31674/src/org/htmlparser/filters Modified Files: CssSelectorNodeFilter.java HasAttributeFilter.java HasChildFilter.java HasParentFilter.java HasSiblingFilter.java IsEqualFilter.java LinkRegexFilter.java LinkStringFilter.java NodeClassFilter.java NotFilter.java OrFilter.java RegexFilter.java StringFilter.java TagNameFilter.java Log Message: Documentation revamp part four. Remove some checkstyle warnings. Index: StringFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/StringFilter.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** StringFilter.java 13 Feb 2005 20:36:00 -0000 1.5 --- StringFilter.java 15 May 2005 11:49:04 -0000 1.6 *************** *** 37,45 **** * This is a fairly simplistic filter, so for more sophisticated * string matching, for example newline and whitespace handling, ! * use a {@link RegexFilter} instead. */ public class StringFilter ! implements ! NodeFilter { /** --- 37,45 ---- * This is a fairly simplistic filter, so for more sophisticated * string matching, for example newline and whitespace handling, ! * use a {@link RegexFilter} instead. */ public class StringFilter ! implements ! NodeFilter { /** *************** *** 73,78 **** /** ! * Creates a new instance of a StringFilter that accepts string nodes containing a certain string. ! * The comparison is case insensitive, with conversions done using the default <code>Locale</code>. * @param pattern The pattern to search for. */ --- 73,79 ---- /** ! * Creates a StringFilter that accepts text nodes containing a string. ! * The comparison is case insensitive, with conversions done using ! * the default <code>Locale</code>. * @param pattern The pattern to search for. */ *************** *** 83,108 **** /** ! * Creates a new instance of a StringFilter that accepts string nodes containing a certain string. * @param pattern The pattern to search for. ! * @param case_sensitive If <code>true</code>, comparisons are performed ! * respecting case, with conversions done using the default <code>Locale</code>. */ ! public StringFilter (String pattern, boolean case_sensitive) { ! this (pattern, case_sensitive, null); } ! /** ! * Creates a new instance of a StringFilter that accepts string nodes containing a certain string. * @param pattern The pattern to search for. ! * @param case_sensitive If <code>true</code>, comparisons are performed * respecting case. * @param locale The locale to use when converting to uppercase. * If <code>null</code>, the default <code>Locale</code> is used. */ ! public StringFilter (String pattern, boolean case_sensitive, Locale locale) { mPattern = pattern; ! mCaseSensitive = case_sensitive; mLocale = (null == locale) ? Locale.getDefault () : locale; setUpperPattern (); --- 84,110 ---- /** ! * Creates a StringFilter that accepts text nodes containing a string. * @param pattern The pattern to search for. ! * @param sensitive If <code>true</code>, comparisons are performed ! * respecting case, with conversions done using the default ! * <code>Locale</code>. */ ! public StringFilter (String pattern, boolean sensitive) { ! this (pattern, sensitive, null); } ! /** ! * Creates a StringFilter that accepts text nodes containing a string. * @param pattern The pattern to search for. ! * @param sensitive If <code>true</code>, comparisons are performed * respecting case. * @param locale The locale to use when converting to uppercase. * If <code>null</code>, the default <code>Locale</code> is used. */ ! public StringFilter (String pattern, boolean sensitive, Locale locale) { mPattern = pattern; ! mCaseSensitive = sensitive; mLocale = (null == locale) ? Locale.getDefault () : locale; setUpperPattern (); *************** *** 112,116 **** // protected methods // ! /** * Set the real (upper case) comparison string. --- 114,118 ---- // protected methods // ! /** * Set the real (upper case) comparison string. *************** *** 196,200 **** String string; boolean ret; ! ret = false; if (node instanceof Text) --- 198,202 ---- String string; boolean ret; ! ret = false; if (node instanceof Text) Index: IsEqualFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/IsEqualFilter.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** IsEqualFilter.java 24 Jan 2004 23:57:50 -0000 1.1 --- IsEqualFilter.java 15 May 2005 11:49:04 -0000 1.2 *************** *** 38,45 **** * The node to match. */ ! public Node mNode; /** ! * Creates a new instance of an IsEqualFilter that accepts only the node provided. * @param node The node to match. */ --- 38,45 ---- * The node to match. */ ! protected Node mNode; /** ! * Creates a new IsEqualFilter that accepts only the node provided. * @param node The node to match. */ Index: RegexFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/RegexFilter.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** RegexFilter.java 10 Apr 2005 23:20:43 -0000 1.3 --- RegexFilter.java 15 May 2005 11:49:04 -0000 1.4 *************** *** 60,64 **** * The month is matched by 0[1-9]|1[012], again enclosed by round brackets * to keep the two options together. By using character classes, the first ! * option matches a number between 01 and 09, and the second matches 10, 11 or 12. * The last part of the regex consists of three options. The first matches * the numbers 01 through 09, the second 10 through 29, and the third matches 30 or 31. --- 60,65 ---- * The month is matched by 0[1-9]|1[012], again enclosed by round brackets * to keep the two options together. By using character classes, the first ! * option matches a number between 01 and 09, and the second ! * matches 10, 11 or 12. * The last part of the regex consists of three options. The first matches * the numbers 01 through 09, the second 10 through 29, and the third matches 30 or 31. *************** *** 124,130 **** * @param strategy The type of match: * <ol> ! * <li>{@link #MATCH} use matches() method: attempts to match the entire input sequence against the pattern</li> ! * <li>{@link #LOOKINGAT} use lookingAt() method: attempts to match the input sequence, starting at the beginning, against the pattern</li> ! * <li>{@link #FIND} use find() method: scans the input sequence looking for the next subsequence that matches the pattern</li> * </ol> */ --- 125,134 ---- * @param strategy The type of match: * <ol> ! * <li>{@link #MATCH} use matches() method: attempts to match ! * the entire input sequence against the pattern</li> ! * <li>{@link #LOOKINGAT} use lookingAt() method: attempts to match ! * the input sequence, starting at the beginning, against the pattern</li> ! * <li>{@link #FIND} use find() method: scans the input sequence looking ! * for the next subsequence that matches the pattern</li> * </ol> */ *************** *** 169,172 **** --- 173,180 ---- public void setStrategy (int strategy) { + if ((strategy != MATCH) && (strategy != LOOKINGAT) + && (strategy != FIND)) + throw new IllegalArgumentException ("illegal strategy (" + + strategy + ")"); mStrategy = strategy; } *************** *** 175,179 **** * Accept string nodes that match the regular expression. * @param node The node to check. ! * @return <code>true</code> if the regular expression matches the * text of the node, <code>false</code> otherwise. */ --- 183,187 ---- * Accept string nodes that match the regular expression. * @param node The node to check. ! * @return <code>true</code> if the regular expression matches the * text of the node, <code>false</code> otherwise. */ *************** *** 183,187 **** Matcher matcher; boolean ret; ! ret = false; if (node instanceof Text) --- 191,195 ---- Matcher matcher; boolean ret; ! ret = false; if (node instanceof Text) *************** *** 198,201 **** --- 206,210 ---- break; case FIND: + default: ret = matcher.find (); break; Index: HasParentFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/HasParentFilter.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** HasParentFilter.java 10 Apr 2005 23:20:43 -0000 1.7 --- HasParentFilter.java 15 May 2005 11:49:04 -0000 1.8 *************** *** 38,53 **** */ public class HasParentFilter ! implements ! NodeFilter { /** * The filter to apply to the parent. */ ! public NodeFilter mParentFilter; /** * Performs a recursive search up the node heirarchy if <code>true</code>. */ ! public boolean mRecursive; /** --- 38,53 ---- */ public class HasParentFilter ! implements ! NodeFilter { /** * The filter to apply to the parent. */ ! protected NodeFilter mParentFilter; /** * Performs a recursive search up the node heirarchy if <code>true</code>. */ ! protected boolean mRecursive; /** *************** *** 62,66 **** /** ! * Creates a new instance of HasParentFilter that accepts nodes with direct parent acceptable to the filter. * @param filter The filter to apply to the parent. */ --- 62,67 ---- /** ! * Creates a new instance of HasParentFilter that accepts nodes with ! * the direct parent acceptable to the filter. * @param filter The filter to apply to the parent. */ *************** *** 71,75 **** /** ! * Creates a new instance of HasParentFilter that accepts nodes with a parent acceptable to the filter. * @param filter The filter to apply to the parent. * @param recursive If <code>true</code>, any enclosing node acceptable --- 72,77 ---- /** ! * Creates a new instance of HasParentFilter that accepts nodes with ! * a parent acceptable to the filter. * @param filter The filter to apply to the parent. * @param recursive If <code>true</code>, any enclosing node acceptable *************** *** 92,96 **** return (mParentFilter); } ! /** * Set the filter for this HasParentFilter. --- 94,98 ---- return (mParentFilter); } ! /** * Set the filter for this HasParentFilter. *************** *** 109,113 **** public boolean getRecursive () { ! return mRecursive; } --- 111,115 ---- public boolean getRecursive () { ! return (mRecursive); } *************** *** 139,149 **** if (!(node instanceof Tag) || !((Tag)node).isEndTag ()) { ! parent = node.getParent (); ! if ((null != parent) && (null != getParentFilter ())) ! { ! ret = getParentFilter ().accept (parent); ! if (!ret && getRecursive ()) ! ret = accept (parent); ! } } --- 141,151 ---- if (!(node instanceof Tag) || !((Tag)node).isEndTag ()) { ! parent = node.getParent (); ! if ((null != parent) && (null != getParentFilter ())) ! { ! ret = getParentFilter ().accept (parent); ! if (!ret && getRecursive ()) ! ret = accept (parent); ! } } Index: LinkRegexFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/LinkRegexFilter.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** LinkRegexFilter.java 10 Apr 2005 23:20:43 -0000 1.2 --- LinkRegexFilter.java 15 May 2005 11:49:04 -0000 1.3 *************** *** 27,31 **** package org.htmlparser.filters; ! import java.util.regex.*; import org.htmlparser.Node; --- 27,32 ---- package org.htmlparser.filters; ! import java.util.regex.Matcher; ! import java.util.regex.Pattern; import org.htmlparser.Node; *************** *** 42,51 **** /** * The regular expression to use on the link. ! */ protected Pattern mRegex; ! /** ! * Creates a new instance of LinkRegexFilter that accepts LinkTag nodes containing ! * a URL that matches the supplied regex pattern. The match is case insensitive. * @param regexPattern The pattern to match. */ --- 43,53 ---- /** * The regular expression to use on the link. ! */ protected Pattern mRegex; ! /** ! * Creates a LinkRegexFilter that accepts LinkTag nodes containing ! * a URL that matches the supplied regex pattern. ! * The match is case insensitive. * @param regexPattern The pattern to match. */ *************** *** 54,60 **** this (regexPattern, true); } ! /** ! * Creates a new instance of LinkRegexFilter that accepts LinkTag nodes containing * a URL that matches the supplied regex pattern. * @param regexPattern The regex pattern to match. --- 56,62 ---- this (regexPattern, true); } ! /** ! * Creates a LinkRegexFilter that accepts LinkTag nodes containing * a URL that matches the supplied regex pattern. * @param regexPattern The regex pattern to match. *************** *** 68,75 **** mRegex = Pattern.compile (regexPattern, Pattern.CASE_INSENSITIVE); } ! /** ! * Accept nodes that are assignable from the LinkTag class and have a URL that ! * matches the regex pattern supplied in the constructor. * @param node The node to check. * @return <code>true</code> if the node is a link with the pattern. --- 70,77 ---- mRegex = Pattern.compile (regexPattern, Pattern.CASE_INSENSITIVE); } ! /** ! * Accept nodes that are a LinkTag and have a URL ! * that matches the regex pattern supplied in the constructor. * @param node The node to check. * @return <code>true</code> if the node is a link with the pattern. *************** *** 78,82 **** { boolean ret; ! ret = false; if (LinkTag.class.isAssignableFrom (node.getClass ())) --- 80,84 ---- { boolean ret; ! ret = false; if (LinkTag.class.isAssignableFrom (node.getClass ())) Index: LinkStringFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/LinkStringFilter.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** LinkStringFilter.java 10 Apr 2005 23:20:43 -0000 1.2 --- LinkStringFilter.java 15 May 2005 11:49:04 -0000 1.3 *************** *** 47,54 **** */ protected boolean mCaseSensitive; ! /** ! * Creates a new instance of LinkStringFilter that accepts LinkTag nodes containing ! * a URL that matches the supplied pattern. The match is case insensitive. * @param pattern The pattern to match. */ --- 47,55 ---- */ protected boolean mCaseSensitive; ! /** ! * Creates a LinkStringFilter that accepts LinkTag nodes containing ! * a URL that matches the supplied pattern. ! * The match is case insensitive. * @param pattern The pattern to match. */ *************** *** 57,63 **** this (pattern, false); } ! /** ! * Creates a new instance of LinkStringFilter that accepts LinkTag nodes containing * a URL that matches the supplied pattern. * @param pattern The pattern to match. --- 58,64 ---- this (pattern, false); } ! /** ! * Creates a LinkStringFilter that accepts LinkTag nodes containing * a URL that matches the supplied pattern. * @param pattern The pattern to match. *************** *** 69,76 **** mCaseSensitive = caseSensitive; } ! /** ! * Accept nodes that are assignable from the LinkTag class and have a URL that ! * matches the pattern supplied in the constructor. * @param node The node to check. * @return <code>true</code> if the node is a link with the pattern. --- 70,77 ---- mCaseSensitive = caseSensitive; } ! /** ! * Accept nodes that are a LinkTag and ! * have a URL that matches the pattern supplied in the constructor. * @param node The node to check. * @return <code>true</code> if the node is a link with the pattern. *************** *** 79,83 **** { boolean ret; ! ret = false; if (LinkTag.class.isAssignableFrom (node.getClass ())) --- 80,84 ---- { boolean ret; ! ret = false; if (LinkTag.class.isAssignableFrom (node.getClass ())) Index: NotFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/NotFilter.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** NotFilter.java 10 Apr 2005 23:20:43 -0000 1.4 --- NotFilter.java 15 May 2005 11:49:04 -0000 1.5 *************** *** 52,56 **** /** ! * Creates a new instance of NotFilter that accepts nodes not acceptable to the predicate filter. * @param predicate The filter to consult. */ --- 52,56 ---- /** ! * Creates a NotFilter that accepts nodes not acceptable to the predicate. * @param predicate The filter to consult. */ *************** *** 68,72 **** return (mPredicate); } ! /** * Set the predicate for this NotFilter. --- 68,72 ---- return (mPredicate); } ! /** * Set the predicate for this NotFilter. Index: OrFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/OrFilter.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** OrFilter.java 10 Apr 2005 23:20:43 -0000 1.3 --- OrFilter.java 15 May 2005 11:49:04 -0000 1.4 *************** *** 52,56 **** /** ! * Creates a new instance of an OrFilter that accepts nodes acceptable to either filter. * @param left One filter. * @param right The other filter. --- 52,56 ---- /** ! * Creates an OrFilter that accepts nodes acceptable to either filter. * @param left One filter. * @param right The other filter. *************** *** 59,63 **** { NodeFilter[] predicates; ! predicates = new NodeFilter[2]; predicates[0] = left; --- 59,63 ---- { NodeFilter[] predicates; ! predicates = new NodeFilter[2]; predicates[0] = left; *************** *** 74,78 **** return (mPredicates); } ! /** * Set the predicates for this OrFilter. --- 74,78 ---- return (mPredicates); } ! /** * Set the predicates for this OrFilter. *************** *** 99,109 **** { boolean ret; ! ret = false; ! for (int i = 0; !ret && (i < mPredicates.length); i++) if (mPredicates[i].accept (node)) ret = true; ! return (ret); } --- 99,109 ---- { boolean ret; ! ret = false; ! for (int i = 0; !ret && (i < mPredicates.length); i++) if (mPredicates[i].accept (node)) ret = true; ! return (ret); } Index: HasChildFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/HasChildFilter.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** HasChildFilter.java 10 Apr 2005 23:20:43 -0000 1.4 --- HasChildFilter.java 15 May 2005 11:49:04 -0000 1.5 *************** *** 36,44 **** * It can be set to operate recursively, that is perform a scan down * through the node heirarchy in a breadth first traversal looking for any ! * descendant that matches the predicate filter (which stops the search). */ public class HasChildFilter ! implements ! NodeFilter { /** --- 36,44 ---- * It can be set to operate recursively, that is perform a scan down * through the node heirarchy in a breadth first traversal looking for any ! * descendant that matches the predicate filter (which stops the search). */ public class HasChildFilter ! implements ! NodeFilter { /** *************** *** 50,54 **** * Performs a recursive search down the node heirarchy if <code>true</code>. */ ! public boolean mRecursive; /** --- 50,54 ---- * Performs a recursive search down the node heirarchy if <code>true</code>. */ ! protected boolean mRecursive; /** *************** *** 63,67 **** /** ! * Creates a new instance of HasChildFilter that accepts nodes with a direct child acceptable to the filter. * @param filter The filter to apply to the children. */ --- 63,68 ---- /** ! * Creates a new instance of HasChildFilter that accepts nodes ! * with a direct child acceptable to the filter. * @param filter The filter to apply to the children. */ *************** *** 72,78 **** /** ! * Creates a new instance of HasChildFilter that accepts nodes with a child acceptable to the filter. * Of necessity, this applies only to composite tags, i.e. those that can ! * contain other nodes, for example <HTML></HTML>. * @param filter The filter to apply to children. * @param recursive If <code>true</code>, any enclosed node acceptable --- 73,80 ---- /** ! * Creates a new instance of HasChildFilter that accepts nodes ! * with a child acceptable to the filter. * Of necessity, this applies only to composite tags, i.e. those that can ! * contain other nodes, for example <HTML></HTML>. * @param filter The filter to apply to children. * @param recursive If <code>true</code>, any enclosed node acceptable *************** *** 94,98 **** return (mChildFilter); } ! /** * Set the filter for this HasParentFilter. --- 96,100 ---- return (mChildFilter); } ! /** * Set the filter for this HasParentFilter. *************** *** 111,115 **** public boolean getRecursive () { ! return mRecursive; } --- 113,117 ---- public boolean getRecursive () { ! return (mRecursive); } *************** *** 145,149 **** if (getChildFilter ().accept (children.elementAt (i))) ret = true; ! // do recursion after all children checked to get breadth first traversal if (!ret && getRecursive ()) for (int i = 0; !ret && i < children.size (); i++) --- 147,152 ---- if (getChildFilter ().accept (children.elementAt (i))) ret = true; ! // do recursion after all children are checked ! // to get breadth first traversal if (!ret && getRecursive ()) for (int i = 0; !ret && i < children.size (); i++) Index: HasSiblingFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/HasSiblingFilter.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** HasSiblingFilter.java 10 Apr 2005 23:20:43 -0000 1.2 --- HasSiblingFilter.java 15 May 2005 11:49:04 -0000 1.3 *************** *** 37,47 **** */ public class HasSiblingFilter ! implements ! NodeFilter { /** * The filter to apply to the sibling. */ ! public NodeFilter mSiblingFilter; /** --- 37,47 ---- */ public class HasSiblingFilter ! implements ! NodeFilter { /** * The filter to apply to the sibling. */ ! protected NodeFilter mSiblingFilter; /** *************** *** 56,60 **** /** ! * Creates a new instance of HasSiblingFilter that accepts nodes with sibling acceptable to the filter. * @param filter The filter to apply to the sibling. */ --- 56,61 ---- /** ! * Creates a new instance of HasSiblingFilter that accepts nodes ! * with sibling acceptable to the filter. * @param filter The filter to apply to the sibling. */ *************** *** 72,76 **** return (mSiblingFilter); } ! /** * Set the filter for this HasSiblingFilter. --- 73,77 ---- return (mSiblingFilter); } ! /** * Set the filter for this HasSiblingFilter. *************** *** 98,115 **** if (!(node instanceof Tag) || !((Tag)node).isEndTag ()) { ! parent = node.getParent (); ! if (null != parent) ! { ! siblings = parent.getChildren (); ! if (null != siblings) ! { ! count = siblings.size (); ! for (int i = 0; !ret && (i < count); i++) ! if (getSiblingFilter ().accept (siblings.elementAt (i))) ! ret = true; ! } ! else ! System.out.println("gotcha"); ! } } --- 99,114 ---- if (!(node instanceof Tag) || !((Tag)node).isEndTag ()) { ! parent = node.getParent (); ! if (null != parent) ! { ! siblings = parent.getChildren (); ! if (null != siblings) ! { ! count = siblings.size (); ! for (int i = 0; !ret && (i < count); i++) ! if (getSiblingFilter ().accept (siblings.elementAt (i))) ! ret = true; ! } ! } } Index: CssSelectorNodeFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/CssSelectorNodeFilter.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** CssSelectorNodeFilter.java 10 Apr 2005 23:20:43 -0000 1.5 --- CssSelectorNodeFilter.java 15 May 2005 11:49:04 -0000 1.6 *************** *** 44,47 **** --- 44,50 ---- public class CssSelectorNodeFilter implements NodeFilter { + /** + * Regular expression to split the selector into tokens. + */ private static Pattern tokens = Pattern.compile("(" *************** *** 64,75 **** , Pattern.CASE_INSENSITIVE ! |Pattern.DOTALL ! |Pattern.COMMENTS); ! private static final int COMMENT = 1, QUOTEDSTRING = 2, RELATION = 3, ! NAME = 4, COMBINATOR = 5, DELIM = 6, COMMA = 7; private NodeFilter therule; /** * Create a Cascading Style Sheet node filter. --- 67,114 ---- , Pattern.CASE_INSENSITIVE ! | Pattern.DOTALL ! | Pattern.COMMENTS); ! /** ! * Comment token type. ! */ ! private static final int COMMENT = 1; ! ! /** ! * quoted string token type. ! */ ! private static final int QUOTEDSTRING = 2; ! ! /** ! * Relation token type. ! */ ! private static final int RELATION = 3; ! ! /** ! * Name token type. ! */ ! private static final int NAME = 4; ! ! /** ! * Combinator token type. ! */ ! private static final int COMBINATOR = 5; ! ! /** ! * Delimiter token type. ! */ ! private static final int DELIM = 6; ! ! /** ! * Comma token type. ! */ ! private static final int COMMA = 7; private NodeFilter therule; + private Matcher m = null; + private int tokentype = 0; + private String token = null; + /** * Create a Cascading Style Sheet node filter. *************** *** 78,84 **** public CssSelectorNodeFilter(String selector) { ! m = tokens.matcher(selector); ! if (nextToken()) ! therule = parse(); } --- 117,123 ---- public CssSelectorNodeFilter(String selector) { ! m = tokens.matcher (selector); ! if (nextToken ()) ! therule = parse (); } *************** *** 89,119 **** * <code>false</code> otherwise. */ ! public boolean accept(Node node) { ! return therule.accept(node); } ! private Matcher m = null; ! private int tokentype = 0; ! private String token = null; ! ! private boolean nextToken() { ! if (m != null && m.find()) ! for (int i = 1; i < m.groupCount(); i++) ! if (m.group(i) != null) { tokentype = i; ! token = m.group(i); return true; } tokentype = 0; token = null; ! return false; } ! private NodeFilter parse() { ! NodeFilter n = null; do { --- 128,156 ---- * <code>false</code> otherwise. */ ! public boolean accept (Node node) { ! return (therule.accept (node)); } ! private boolean nextToken () { ! if (m != null && m.find ()) ! for (int i = 1; i < m.groupCount (); i++) ! if (null != m.group (i)) { tokentype = i; ! token = m.group (i); return true; } tokentype = 0; token = null; ! return (false); } ! private NodeFilter parse () { ! NodeFilter ret; ! ! ret = null; do { *************** *** 123,153 **** case NAME: case DELIM: ! if (n == null) ! n = parseSimple(); else ! n = new AndFilter(n, parseSimple()); break; case COMBINATOR: ! switch (token.charAt(0)) { case '+': ! n = new AdjacentFilter(n); break; case '>': ! n = new HasParentFilter(n); break; default: // whitespace ! n = new HasAncestorFilter(n); } ! nextToken(); break; case COMMA: ! n = new OrFilter(n, parse()); ! nextToken(); break; } } while (token != null); ! return n; } --- 160,191 ---- case NAME: case DELIM: ! if (ret == null) ! ret = parseSimple (); else ! ret = new AndFilter (ret, parseSimple ()); break; case COMBINATOR: ! switch (token.charAt (0)) { case '+': ! ret = new AdjacentFilter (ret); break; case '>': ! ret = new HasParentFilter (ret); break; default: // whitespace ! ret = new HasAncestorFilter (ret); } ! nextToken (); break; case COMMA: ! ret = new OrFilter (ret, parse ()); ! nextToken (); break; } } while (token != null); ! ! return (ret); } *************** *** 155,159 **** { boolean done = false; ! NodeFilter n = null; if (token != null) --- 193,197 ---- { boolean done = false; ! NodeFilter ret = null; if (token != null) *************** *** 167,175 **** case NAME: if ("*".equals(token)) ! n = new YesFilter(); ! else if (n == null) ! n = new TagNameFilter(unescape(token)); else ! n = new AndFilter(n, new TagNameFilter(unescape(token))); nextToken(); break; --- 205,213 ---- case NAME: if ("*".equals(token)) ! ret = new YesFilter(); ! else if (ret == null) ! ret = new TagNameFilter(unescape(token)); else ! ret = new AndFilter(ret, new TagNameFilter(unescape(token))); nextToken(); break; *************** *** 181,189 **** if (tokentype != NAME) throw new IllegalArgumentException("Syntax error at " + token); ! if (n == null) ! n = new HasAttributeFilter("class", unescape(token)); else ! n ! = new AndFilter(n, new HasAttributeFilter("class", unescape(token))); break; case '#': --- 219,227 ---- if (tokentype != NAME) throw new IllegalArgumentException("Syntax error at " + token); ! if (ret == null) ! ret = new HasAttributeFilter("class", unescape(token)); else ! ret ! = new AndFilter(ret, new HasAttributeFilter("class", unescape(token))); break; case '#': *************** *** 191,212 **** if (tokentype != NAME) throw new IllegalArgumentException("Syntax error at " + token); ! if (n == null) ! n = new HasAttributeFilter("id", unescape(token)); else ! n = new AndFilter(n, new HasAttributeFilter("id", unescape(token))); break; case ':': nextToken(); ! if (n == null) ! n = parsePseudoClass(); else ! n = new AndFilter(n, parsePseudoClass()); break; case '[': nextToken(); ! if (n == null) ! n = parseAttributeExp(); else ! n = new AndFilter(n, parseAttributeExp()); break; } --- 229,250 ---- if (tokentype != NAME) throw new IllegalArgumentException("Syntax error at " + token); ! if (ret == null) ! ret = new HasAttributeFilter("id", unescape(token)); else ! ret = new AndFilter(ret, new HasAttributeFilter("id", unescape(token))); break; case ':': nextToken(); ! if (ret == null) ! ret = parsePseudoClass(); else ! ret = new AndFilter(ret, parsePseudoClass()); break; case '[': nextToken(); ! if (ret == null) ! ret = parseAttributeExp(); else ! ret = new AndFilter(ret, parseAttributeExp()); break; } *************** *** 218,222 **** } while (!done && token != null); ! return n; } --- 256,260 ---- } while (!done && token != null); ! return ret; } *************** *** 228,232 **** private NodeFilter parseAttributeExp() { ! NodeFilter n = null; if (tokentype == NAME) { --- 266,270 ---- private NodeFilter parseAttributeExp() { ! NodeFilter ret = null; if (tokentype == NAME) { *************** *** 234,238 **** nextToken(); if ("]".equals(token)) ! n = new HasAttributeFilter(unescape(attrib)); else if (tokentype == RELATION) { --- 272,276 ---- nextToken(); if ("]".equals(token)) ! ret = new HasAttributeFilter(unescape(attrib)); else if (tokentype == RELATION) { *************** *** 244,260 **** val = unescape(token); if ("~=".equals(rel) && val != null) ! n = new AttribMatchFilter(unescape(attrib), "\\b" + val.replaceAll("([^a-zA-Z0-9])", "\\\\$1") + "\\b"); else if ("=".equals(rel) && val != null) ! n = new HasAttributeFilter(attrib, val); } } ! if (n == null) throw new IllegalArgumentException("Syntax error at " + token + tokentype); nextToken(); ! return n; } --- 282,298 ---- val = unescape(token); if ("~=".equals(rel) && val != null) ! ret = new AttribMatchFilter(unescape(attrib), "\\b" + val.replaceAll("([^a-zA-Z0-9])", "\\\\$1") + "\\b"); else if ("=".equals(rel) && val != null) ! ret = new HasAttributeFilter(attrib, val); } } ! if (ret == null) throw new IllegalArgumentException("Syntax error at " + token + tokentype); nextToken(); ! return ret; } Index: TagNameFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/TagNameFilter.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** TagNameFilter.java 10 Apr 2005 23:20:43 -0000 1.5 --- TagNameFilter.java 15 May 2005 11:49:04 -0000 1.6 *************** *** 56,60 **** /** ! * Creates a new instance of TagNameFilter that accepts tags with the given name. * @param name The tag name to match. */ --- 56,60 ---- /** ! * Creates a TagNameFilter that accepts tags with the given name. * @param name The tag name to match. */ *************** *** 92,98 **** public boolean accept (Node node) { ! return ((node instanceof Tag) && ! !((Tag)node).isEndTag () && ! ((Tag)node).getTagName ().equals (mName)); } } --- 92,98 ---- public boolean accept (Node node) { ! return ((node instanceof Tag) ! && !((Tag)node).isEndTag () ! && ((Tag)node).getTagName ().equals (mName)); } } Index: HasAttributeFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/HasAttributeFilter.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** HasAttributeFilter.java 10 Apr 2005 23:20:43 -0000 1.6 --- HasAttributeFilter.java 15 May 2005 11:49:04 -0000 1.7 *************** *** 35,39 **** /** ! * This class accepts all tags that have a certain attribute, and optionally, with a certain value. */ public class HasAttributeFilter implements NodeFilter --- 35,40 ---- /** ! * This class accepts all tags that have a certain attribute, ! * and optionally, with a certain value. */ public class HasAttributeFilter implements NodeFilter *************** *** 60,64 **** /** ! * Creates a new instance of HasAttributeFilter that accepts tags with the given attribute. * @param attribute The attribute to search for. */ --- 61,66 ---- /** ! * Creates a new instance of HasAttributeFilter that accepts tags ! * with the given attribute. * @param attribute The attribute to search for. */ *************** *** 69,75 **** /** ! * Creates a new instance of HasAttributeFilter that accepts tags with the given attribute. * @param attribute The attribute to search for. ! * @param value The value that must be matched, or null if any value will match. */ public HasAttributeFilter (String attribute, String value) --- 71,79 ---- /** ! * Creates a new instance of HasAttributeFilter that accepts tags ! * with the given attribute and value. * @param attribute The attribute to search for. ! * @param value The value that must be matched, ! * or null if any value will match. */ public HasAttributeFilter (String attribute, String value) *************** *** 109,113 **** * Set the attribute value. * @param value The value of the attribute to accept. ! * If <code>null</code>, any tag with the attribute, no matter it's value is acceptable. */ public void setAttributeValue (String value) --- 113,118 ---- * Set the attribute value. * @param value The value of the attribute to accept. ! * If <code>null</code>, any tag with the attribute, ! * no matter what it's value is acceptable. */ public void setAttributeValue (String value) Index: NodeClassFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/NodeClassFilter.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** NodeClassFilter.java 10 Apr 2005 23:20:43 -0000 1.3 --- NodeClassFilter.java 15 May 2005 11:49:04 -0000 1.4 *************** *** 41,45 **** /** ! * Creates a new instance of NodeClassFilter that accepts tags of the Html (top level) class. */ public NodeClassFilter () --- 41,45 ---- /** ! * Creates a NodeClassFilter that accepts Html tags. */ public NodeClassFilter () *************** *** 49,54 **** /** ! * Creates a new instance of NodeClassFilter that accepts tags of the given class. ! * @param cls The cls to match. */ public NodeClassFilter (Class cls) --- 49,54 ---- /** ! * Creates a NodeClassFilter that accepts tags of the given class. ! * @param cls The class to match. */ public NodeClassFilter (Class cls) *************** *** 76,80 **** /** ! * Accept nodes that are assignable from the class provided in the constructor. * @param node The node to check. * @return <code>true</code> if the node is the right class, --- 76,81 ---- /** ! * Accept nodes that are assignable from the class provided in ! * the constructor. * @param node The node to check. * @return <code>true</code> if the node is the right class, |
From: Derrick O. <der...@us...> - 2005-05-15 11:49:14
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31674/src/org/htmlparser/lexer Modified Files: Cursor.java InputStreamSource.java Lexer.java Page.java PageAttribute.java PageIndex.java Source.java Stream.java StringSource.java Log Message: Documentation revamp part four. Remove some checkstyle warnings. Index: PageIndex.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/PageIndex.java,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** PageIndex.java 1 Aug 2004 02:16:04 -0000 1.17 --- PageIndex.java 15 May 2005 11:49:04 -0000 1.18 *************** *** 330,334 **** if ((index >= capacity ()) || (size () == capacity ())) { // allocate more space ! int new_values[] = new int[Math.max (capacity () + mIncrement, index + 1)]; mIncrement *= 2; if (index < capacity ()) --- 330,334 ---- if ((index >= capacity ()) || (size () == capacity ())) { // allocate more space ! int[] new_values = new int[Math.max (capacity () + mIncrement, index + 1)]; mIncrement *= 2; if (index < capacity ()) Index: StringSource.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/StringSource.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** StringSource.java 12 Apr 2005 11:27:41 -0000 1.3 --- StringSource.java 15 May 2005 11:49:04 -0000 1.4 *************** *** 273,277 **** ret = n; } ! return (ret); } --- 273,277 ---- ret = n; } ! return (ret); } *************** *** 362,366 **** ret = mString.substring (offset, offset + length); } ! return (ret); } --- 362,366 ---- ret = mString.substring (offset, offset + length); } ! return (ret); } Index: Stream.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Stream.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** Stream.java 12 Apr 2005 11:27:41 -0000 1.13 --- Stream.java 15 May 2005 11:49:04 -0000 1.14 *************** *** 29,33 **** import java.io.IOException; import java.io.InputStream; - import java.lang.Runnable; /** --- 29,32 ---- Index: Lexer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** Lexer.java 12 Apr 2005 11:27:41 -0000 1.38 --- Lexer.java 15 May 2005 11:49:04 -0000 1.39 *************** *** 27,31 **** package org.htmlparser.lexer; - import java.io.IOException; import java.io.Serializable; import java.net.MalformedURLException; --- 27,30 ---- *************** *** 77,86 **** /** * Line number to trigger on. ! * This is tested on each <code>nextNode()</code> call, as an aid to debugging. ! * Alter this value and set a breakpoint on the line after the test. ! * Remember, these line numbers are zero based, while most editors are one based. * @see #nextNode ! */ ! static protected int mDebugLineTrigger = -1; /** --- 76,86 ---- /** * Line number to trigger on. ! * This is tested on each <code>nextNode()</code> call, as a debugging aid. ! * Alter this value and set a breakpoint on the guarded statement. ! * Remember, these line numbers are zero based, while most editors are ! * one based. * @see #nextNode ! */ ! protected static int mDebugLineTrigger = -1; /** *************** *** 238,242 **** * @return A Remark, Text or Tag, or <code>null</code> if no * more lexemes are present. ! * @exception ParserException If there is a problem with the underlying page. */ public Node nextNode () --- 238,243 ---- * @return A Remark, Text or Tag, or <code>null</code> if no * more lexemes are present. ! * @exception ParserException If there is a problem with the ! * underlying page. */ public Node nextNode () *************** *** 252,256 **** * @return A Remark, Text or Tag, or <code>null</code> if no * more lexemes are present. ! * @exception ParserException If there is a problem with the underlying page. */ public Node nextNode (boolean quotesmart) --- 253,258 ---- * @return A Remark, Text or Tag, or <code>null</code> if no * more lexemes are present. ! * @exception ParserException If there is a problem with the ! * underlying page. */ public Node nextNode (boolean quotesmart) *************** *** 268,272 **** int lineno = page.row (mCursor); if (mDebugLineTrigger < lineno) ! mDebugLineTrigger = lineno + 1; // trigger on subsequent lines too } start = mCursor.getPosition (); --- 270,274 ---- int lineno = page.row (mCursor); if (mDebugLineTrigger < lineno) ! mDebugLineTrigger = lineno + 1; // trigger on next line too } start = mCursor.getPosition (); *************** *** 302,311 **** else { ! mCursor.retreat (); // remark and tag need this character if ('-' == ch) ret = parseRemark (start, quotesmart); else { ! mCursor.retreat (); // tag needs the previous one too ret = parseTag (start); } --- 304,313 ---- else { ! mCursor.retreat (); // remark/tag need this char if ('-' == ch) ret = parseRemark (start, quotesmart); else { ! mCursor.retreat (); // tag needs prior one too ret = parseTag (start); } *************** *** 365,369 **** break; default: ! throw new IllegalStateException ("how the fuck did we get in state " + state); } } --- 367,371 ---- break; default: ! throw new IllegalStateException ("state " + state); } } *************** *** 416,430 **** mCursor.retreat (); } ! else if (quotesmart && (0 == quote) && (('\'' == ch) || ('"' == ch))) quote = ch; // enter quoted state ! // patch contributed by Gernot Fricke to handle escaped closing quote else if (quotesmart && (0 != quote) && ('\\' == ch)) { ! ch = mPage.getCharacter (mCursor); //try to consume escaped character if ((Page.EOF != ch) && ('\\' != ch) // escaped backslash ! && (ch != quote)) // escaped quote character // ( reflects ["] or ['] whichever opened the quotation) ! mCursor.retreat(); // unconsume char if character was not an escapable char. } else if (quotesmart && (ch == quote)) --- 418,433 ---- mCursor.retreat (); } ! else if (quotesmart && (0 == quote) ! && (('\'' == ch) || ('"' == ch))) quote = ch; // enter quoted state ! // patch from Gernot Fricke to handle escaped closing quote else if (quotesmart && (0 != quote) && ('\\' == ch)) { ! ch = mPage.getCharacter (mCursor); // try to consume escape if ((Page.EOF != ch) && ('\\' != ch) // escaped backslash ! && (ch != quote)) // escaped quote character // ( reflects ["] or ['] whichever opened the quotation) ! mCursor.retreat(); // unconsume char if char not an escape } else if (quotesmart && (ch == quote)) *************** *** 432,436 **** else if (quotesmart && (0 == quote) && (ch == '/')) { ! // handle multiline and double slash comments (with a quote) in script like: // I can't handle single quotations. ch = mPage.getCharacter (mCursor); --- 435,440 ---- else if (quotesmart && (0 == quote) && (ch == '/')) { ! // handle multiline and double slash comments (with a quote) ! // in script like: // I can't handle single quotations. ch = mPage.getCharacter (mCursor); *************** *** 465,469 **** done = true; // the order of these tests might be optimized for speed: ! else if ('/' == ch || Character.isLetter (ch) || '!' == ch || '%' == ch) { done = true; --- 469,474 ---- done = true; // the order of these tests might be optimized for speed: ! else if ('/' == ch || Character.isLetter (ch) ! || '!' == ch || '%' == ch) { done = true; *************** *** 486,490 **** * @param start The starting point of the node. * @param end The ending point of the node. ! * @exception ParserException If the nodefactory creation of the string node fails. * @return The new Text node. */ --- 491,496 ---- * @param start The starting point of the node. * @param end The ending point of the node. ! * @exception ParserException If the nodefactory creation of the text ! * node fails. * @return The new Text node. */ *************** *** 498,539 **** length = end - start; if (0 != length) ! { // got some characters ! ret = getNodeFactory ().createStringNode (this.getPage (), start, end); ! } else ret = null; ! return (ret); } private void whitespace (Vector attributes, int[] bookmarks) { if (bookmarks[1] > bookmarks[0]) ! attributes.addElement (new PageAttribute (mPage, -1, -1, bookmarks[0], bookmarks[1], (char)0)); } private void standalone (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], -1, -1, (char)0)); } private void empty (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[2] + 1, -1, (char)0)); } private void naked (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[3], bookmarks[4], (char)0)); } private void single_quote (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[4] + 1, bookmarks[5], '\'')); } private void double_quote (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[5] + 1, bookmarks[6], '"')); } --- 504,584 ---- length = end - start; if (0 != length) ! // got some characters ! ret = getNodeFactory ().createStringNode ( ! this.getPage (), start, end); else ret = null; ! return (ret); } + /** + * Generate a whitespace 'attribute', + * @param attributes The list so far. + * @param bookmarks The array of positions. + */ private void whitespace (Vector attributes, int[] bookmarks) { if (bookmarks[1] > bookmarks[0]) ! attributes.addElement (new PageAttribute ( ! mPage, -1, -1, bookmarks[0], bookmarks[1], (char)0)); } + /** + * Generate a standalone attribute -- font. + * @param attributes The list so far. + * @param bookmarks The array of positions. + */ private void standalone (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute ( ! mPage, bookmarks[1], bookmarks[2], -1, -1, (char)0)); } + /** + * Generate an empty attribute -- color=. + * @param attributes The list so far. + * @param bookmarks The array of positions. + */ private void empty (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute ( ! mPage, bookmarks[1], bookmarks[2], bookmarks[2] + 1, -1, (char)0)); } + /** + * Generate an unquoted attribute -- size=1. + * @param attributes The list so far. + * @param bookmarks The array of positions. + */ private void naked (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute ( ! mPage, bookmarks[1], bookmarks[2], bookmarks[3], ! bookmarks[4], (char)0)); } + /** + * Generate an single quoted attribute -- width='100%'. + * @param attributes The list so far. + * @param bookmarks The array of positions. + */ private void single_quote (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute ( ! mPage, bookmarks[1], bookmarks[2], bookmarks[4] + 1, ! bookmarks[5], '\'')); } + /** + * Generate an double quoted attribute -- CONTENT="Test Development". + * @param attributes The list so far. + * @param bookmarks The array of positions. + */ private void double_quote (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute ( ! mPage, bookmarks[1], bookmarks[2], bookmarks[5] + 1, ! bookmarks[6], '"')); } *************** *** 565,569 **** * vice versa. Authors may also use numeric character references to * represent double quotes (&#34;) and single quotes (&#39;). ! * For doublequotes authors can also use the character entity reference &quot;.<p> * In certain cases, authors may specify the value of an attribute without * any quotation marks. The attribute value may only contain letters --- 610,615 ---- * vice versa. Authors may also use numeric character references to * represent double quotes (&#34;) and single quotes (&#39;). ! * For doublequotes authors can also use the character entity reference ! * &quot;.<p> * In certain cases, authors may specify the value of an attribute without * any quotation marks. The attribute value may only contain letters *************** *** 574,579 **** * Attribute names are always case-insensitive.<p> * Attribute values are generally case-insensitive. The definition of each ! * attribute in the reference manual indicates whether its value is case-insensitive.<p> ! * All the attributes defined by this specification are listed in the attribute index.<p> * </cite> * <p> --- 620,627 ---- * Attribute names are always case-insensitive.<p> * Attribute values are generally case-insensitive. The definition of each ! * attribute in the reference manual indicates whether its value is ! * case-insensitive.<p> ! * All the attributes defined by this specification are listed in the ! * attribute index.<p> * </cite> * <p> Index: InputStreamSource.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/InputStreamSource.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** InputStreamSource.java 12 Apr 2005 11:27:41 -0000 1.6 --- InputStreamSource.java 15 May 2005 11:49:04 -0000 1.7 *************** *** 47,51 **** /** * An initial buffer size. ! * Has a default value of 16384. */ public static int BUFFER_SIZE = 16384; --- 47,51 ---- /** * An initial buffer size. ! * Has a default value of {@value}. */ public static int BUFFER_SIZE = 16384; *************** *** 70,84 **** * The characters read so far. */ ! public /*volatile*/ char[] mBuffer; /** * The number of valid bytes in the buffer. */ ! public /*volatile*/ int mLevel; /** * The offset of the next byte returned by read(). */ ! public /*volatile*/ int mOffset; /** --- 70,84 ---- * The characters read so far. */ ! protected char[] mBuffer; /** * The number of valid bytes in the buffer. */ ! protected int mLevel; /** * The offset of the next byte returned by read(). */ ! protected int mOffset; /** *************** *** 90,94 **** * Create a source of characters using the default character set. * @param stream The stream of bytes to use. ! * @exception UnsupportedEncodingException If the default character set is unsupported. */ public InputStreamSource (InputStream stream) --- 90,95 ---- * Create a source of characters using the default character set. * @param stream The stream of bytes to use. ! * @exception UnsupportedEncodingException If the default character set ! * is unsupported. */ public InputStreamSource (InputStream stream) *************** *** 103,107 **** * @param stream The stream of bytes to use. * @param charset The character set used in encoding the stream. ! * @exception UnsupportedEncodingException If the character set is unsupported. */ public InputStreamSource (InputStream stream, String charset) --- 104,109 ---- * @param stream The stream of bytes to use. * @param charset The character set used in encoding the stream. ! * @exception UnsupportedEncodingException If the character set ! * is unsupported. */ public InputStreamSource (InputStream stream, String charset) *************** *** 116,123 **** * @param stream The stream of bytes to use. * @param charset The character set used in encoding the stream. ! * @param buffer_size The initial character buffer size. ! * @exception UnsupportedEncodingException If the character set is unsupported. */ ! public InputStreamSource (InputStream stream, String charset, int buffer_size) throws UnsupportedEncodingException --- 118,126 ---- * @param stream The stream of bytes to use. * @param charset The character set used in encoding the stream. ! * @param size The initial character buffer size. ! * @exception UnsupportedEncodingException If the character set ! * is unsupported. */ ! public InputStreamSource (InputStream stream, String charset, int size) throws UnsupportedEncodingException *************** *** 151,155 **** mReader = new InputStreamReader (stream, charset); } ! mBuffer = new char[buffer_size]; mLevel = 0; mOffset = 0; --- 154,158 ---- mReader = new InputStreamReader (stream, charset); } ! mBuffer = new char[size]; mLevel = 0; mOffset = 0; *************** *** 161,164 **** --- 164,172 ---- // + /** + * Serialization support. + * @param out Where to write this object. + * @exception IOException If serialization has a problem. + */ private void writeObject (ObjectOutputStream out) throws *************** *** 177,184 **** mOffset = offset; } ! out.defaultWriteObject (); } private void readObject (ObjectInputStream in) throws --- 185,197 ---- mOffset = offset; } ! out.defaultWriteObject (); } + /** + * Deserialization support. + * @param in Where to read this object from. + * @exception IOException If deserialization has a problem. + */ private void readObject (ObjectInputStream in) throws *************** *** 222,226 **** * read characters up to the current position. * If a difference is encountered, or some other problem occurs, ! * an exception is thrown. * @param character_set The character set to use to convert bytes into * characters. --- 235,239 ---- * read characters up to the current position. * If a difference is encountered, or some other problem occurs, ! * an exception is thrown. * @param character_set The character set to use to convert bytes into * characters. Index: Source.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Source.java,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** Source.java 12 Apr 2005 11:27:41 -0000 1.19 --- Source.java 15 May 2005 11:49:04 -0000 1.20 *************** *** 43,47 **** * <li>the fetching of bytes may be asynchronous</li> * <li>the character set may be changed, which resets the input stream</li> ! * <li>characters may be requested more than once, so in general they will be buffered</li> * </ul> */ --- 43,48 ---- * <li>the fetching of bytes may be asynchronous</li> * <li>the character set may be changed, which resets the input stream</li> ! * <li>characters may be requested more than once, so in general they ! * will be buffered</li> * </ul> */ Index: Page.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v retrieving revision 1.49 retrieving revision 1.50 diff -C2 -d -r1.49 -r1.50 *** Page.java 12 Apr 2005 11:27:41 -0000 1.49 --- Page.java 15 May 2005 11:49:04 -0000 1.50 *************** *** 57,62 **** * The default charset. * This should be <code>{@value}</code>, ! * see RFC 2616 (http://www.ietf.org/rfc/rfc2616.txt?number=2616) section 3.7.1 ! * Another alias is "8859_1". */ public static final String DEFAULT_CHARSET = "ISO-8859-1"; --- 57,63 ---- * The default charset. * This should be <code>{@value}</code>, ! * see RFC 2616 (http://www.ietf.org/rfc/rfc2616.txt?number=2616) ! * section 3.7.1 ! * <p>Another alias is "8859_1". */ public static final String DEFAULT_CHARSET = "ISO-8859-1"; *************** *** 95,99 **** */ protected PageIndex mIndex; ! /** * The connection this page is coming from or <code>null</code>. --- 96,100 ---- */ protected PageIndex mIndex; ! /** * The connection this page is coming from or <code>null</code>. *************** *** 104,108 **** * Connection control (proxy, cookies, authorization). */ ! public static ConnectionManager mConnectionManager = new ConnectionManager (); /** --- 105,110 ---- * Connection control (proxy, cookies, authorization). */ ! protected static ConnectionManager mConnectionManager = ! new ConnectionManager (); /** *************** *** 138,142 **** * @param charset The encoding used. * If null, defaults to the <code>DEFAULT_CHARSET</code>. ! * @exception UnsupportedEncodingException If the given charset is not supported. */ public Page (InputStream stream, String charset) --- 140,145 ---- * @param charset The encoding used. * If null, defaults to the <code>DEFAULT_CHARSET</code>. ! * @exception UnsupportedEncodingException If the given charset ! * is not supported. */ public Page (InputStream stream, String charset) *************** *** 186,189 **** --- 189,207 ---- } + /** + * Construct a page from a source. + * @param source The source of characters. + */ + public Page (Source source) + { + if (null == source) + throw new IllegalArgumentException ("source cannot be null"); + mSource = source; + mIndex = new PageIndex (this); + mConnection = null; + mUrl = null; + mBaseUrl = null; + } + // // static methods *************** *** 216,220 **** * which is applicable both to the HTTP header field Content-Type and * the meta tag http-equiv="Content-Type". ! * Note this method also handles non-compliant quoted charset directives such as: * <pre> * text/html; charset="UTF-8" --- 234,239 ---- * which is applicable both to the HTTP header field Content-Type and * the meta tag http-equiv="Content-Type". ! * Note this method also handles non-compliant quoted charset directives ! * such as: * <pre> * text/html; charset="UTF-8" *************** *** 245,249 **** if (index != -1) { ! content = content.substring (index + CHARSET_STRING.length ()).trim (); if (content.startsWith ("=")) { --- 264,269 ---- if (index != -1) { ! content = content.substring (index + ! CHARSET_STRING.length ()).trim (); if (content.startsWith ("=")) { *************** *** 254,262 **** //remove any double quotes from around charset string ! if (content.startsWith ("\"") && content.endsWith ("\"") && (1 < content.length ())) content = content.substring (1, content.length () - 1); //remove any single quote from around charset string ! if (content.startsWith ("'") && content.endsWith ("'") && (1 < content.length ())) content = content.substring (1, content.length () - 1); --- 274,284 ---- //remove any double quotes from around charset string ! if (content.startsWith ("\"") && content.endsWith ("\"") ! && (1 < content.length ())) content = content.substring (1, content.length () - 1); //remove any single quote from around charset string ! if (content.startsWith ("'") && content.endsWith ("'") ! && (1 < content.length ())) content = content.substring (1, content.length () - 1); *************** *** 264,268 **** // Charset names are not case-sensitive; ! // that is, case is always ignored when comparing charset names. // if (!ret.equalsIgnoreCase (content)) // { --- 286,291 ---- // Charset names are not case-sensitive; ! // that is, case is always ignored when comparing ! // charset names. // if (!ret.equalsIgnoreCase (content)) // { *************** *** 287,294 **** * in that case the default is always returned. * @param name The name to look up. One of the aliases for a character set. ! * @param _default The name to return if the lookup fails. * @return The character set name. */ ! public static String findCharset (String name, String _default) { String ret; --- 310,317 ---- * in that case the default is always returned. * @param name The name to look up. One of the aliases for a character set. ! * @param fallback The name to return if the lookup fails. * @return The character set name. */ ! public static String findCharset (String name, String fallback) { String ret; *************** *** 327,336 **** // and java.nio.charset.UnsupportedCharsetException // return the default ! ret = _default; System.out.println ( "unable to determine cannonical charset name for " + name + " - using " ! + _default); } --- 350,359 ---- // and java.nio.charset.UnsupportedCharsetException // return the default ! ret = fallback; System.out.println ( "unable to determine cannonical charset name for " + name + " - using " ! + fallback); } *************** *** 348,351 **** --- 371,375 ---- * disconnected, the underling source is saved. * @param out The object stream to store this object in. + * @exception IOException If there is a serialization problem. */ private void writeObject (ObjectOutputStream out) *************** *** 388,391 **** --- 412,419 ---- * For details see <code>writeObject()</code>. * @param in The object stream to decode. + * @exception IOException If there is a deserialization problem with + * the stream. + * @exception ClassNotFoundException If the deserialized class can't be + * located with the current classpath and class loader. */ private void readObject (ObjectInputStream in) *************** *** 461,471 **** * Clean up this page, releasing resources. * Calls <code>close()</code>. ! * @exception Throwable if <code>close()</code> throws an <code>IOException</code>. */ ! protected void finalize () throws Throwable { close (); } ! /** * Get the connection, if any. --- 489,502 ---- * Clean up this page, releasing resources. * Calls <code>close()</code>. ! * @exception Throwable if <code>close()</code> throws an ! * <code>IOException</code>. */ ! protected void finalize () ! throws ! Throwable { close (); } ! /** * Get the connection, if any. *************** *** 504,512 **** catch (UnknownHostException uhe) { ! throw new ParserException ("Connect to " + mConnection.getURL ().toExternalForm () + " failed.", uhe); } catch (IOException ioe) { ! throw new ParserException ("Exception connecting to " + mConnection.getURL ().toExternalForm () + " (" + ioe.getMessage () + ").", ioe); } type = getContentType (); --- 535,546 ---- catch (UnknownHostException uhe) { ! throw new ParserException ("Connect to " ! + mConnection.getURL ().toExternalForm () + " failed.", uhe); } catch (IOException ioe) { ! throw new ParserException ("Exception connecting to " ! + mConnection.getURL ().toExternalForm () ! + " (" + ioe.getMessage () + ").", ioe); } type = getContentType (); *************** *** 515,525 **** { contentEncoding = connection.getContentEncoding(); ! if ((null != contentEncoding) && (-1 != contentEncoding.indexOf ("gzip"))) { ! stream = new Stream (new GZIPInputStream (getConnection ().getInputStream ())); } ! else if ((null != contentEncoding) && (-1 != contentEncoding.indexOf ("deflate"))) { ! stream = new Stream (new InflaterInputStream (getConnection ().getInputStream ())); } else --- 549,563 ---- { contentEncoding = connection.getContentEncoding(); ! if ((null != contentEncoding) ! && (-1 != contentEncoding.indexOf ("gzip"))) { ! stream = new Stream (new GZIPInputStream ( ! getConnection ().getInputStream ())); } ! else if ((null != contentEncoding) ! && (-1 != contentEncoding.indexOf ("deflate"))) { ! stream = new Stream (new InflaterInputStream ( ! getConnection ().getInputStream ())); } else *************** *** 549,553 **** catch (IOException ioe) { ! throw new ParserException ("Exception getting input stream from " + mConnection.getURL ().toExternalForm () + " (" + ioe.getMessage () + ").", ioe); } mUrl = connection.getURL ().toExternalForm (); --- 587,593 ---- catch (IOException ioe) { ! throw new ParserException ("Exception getting input stream from " ! + mConnection.getURL ().toExternalForm () ! + " (" + ioe.getMessage () + ").", ioe); } mUrl = connection.getURL ().toExternalForm (); *************** *** 596,600 **** mBaseUrl = url; } ! /** * Get the source this page is reading from. --- 636,640 ---- mBaseUrl = url; } ! /** * Get the source this page is reading from. *************** *** 629,634 **** /** ! * Read the character at the cursor position. ! * The cursor position can be behind or equal to the current source position. * Returns end of lines (EOL) as \n, by converting \r and \r\n to \n, * and updates the end-of-line index accordingly --- 669,675 ---- /** ! * Read the character at the given cursor position. ! * The cursor position can be only behind or equal to the ! * current source position. * Returns end of lines (EOL) as \n, by converting \r and \r\n to \n, * and updates the end-of-line index accordingly *************** *** 651,655 **** if (mSource.offset () < i) // hmmm, we could skip ahead, but then what about the EOL index ! throw new ParserException ("attempt to read future characters from source"); else if (mSource.offset () == i) try --- 692,698 ---- if (mSource.offset () < i) // hmmm, we could skip ahead, but then what about the EOL index ! throw new ParserException ( ! "attempt to read future characters from source " ! + i + " > " + mSource.offset ()); else if (mSource.offset () == i) try *************** *** 761,765 **** * read characters up to the current position. * If a difference is encountered, or some other problem occurs, ! * an exception is thrown. * @param character_set The character set to use to convert bytes into * characters. --- 804,808 ---- * read characters up to the current position. * If a difference is encountered, or some other problem occurs, ! * an exception is thrown. * @param character_set The character set to use to convert bytes into * characters. *************** *** 921,925 **** { String ret; ! try { --- 964,968 ---- { String ret; ! try { *************** *** 936,940 **** + ioe.getMessage ()); } ! return (ret); } --- 979,983 ---- + ioe.getMessage ()); } ! return (ret); } *************** *** 957,961 **** if ((mSource.offset () < start) || (mSource.offset () < end)) ! throw new IllegalArgumentException ("attempt to extract future characters from source"); if (end < start) { --- 1000,1006 ---- if ((mSource.offset () < start) || (mSource.offset () < end)) ! throw new IllegalArgumentException ( ! "attempt to extract future characters from source" ! + start + "|" + end + " > " + mSource.offset ()); if (end < start) { Index: PageAttribute.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/PageAttribute.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** PageAttribute.java 12 Apr 2005 11:27:41 -0000 1.3 --- PageAttribute.java 15 May 2005 11:49:04 -0000 1.4 *************** *** 28,32 **** import org.htmlparser.Attribute; - import org.htmlparser.lexer.Page; /** --- 28,31 ---- Index: Cursor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Cursor.java,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** Cursor.java 12 Apr 2005 11:27:41 -0000 1.19 --- Cursor.java 15 May 2005 11:49:04 -0000 1.20 *************** *** 167,170 **** } } - - \ No newline at end of file --- 167,168 ---- |
From: Derrick O. <der...@us...> - 2005-05-15 11:49:14
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31674/src/org/htmlparser/tests/lexerTests Modified Files: KitTest.java LexerTests.java PageTests.java TagTests.java Log Message: Documentation revamp part four. Remove some checkstyle warnings. Index: LexerTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/LexerTests.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** LexerTests.java 10 Apr 2005 23:20:46 -0000 1.24 --- LexerTests.java 15 May 2005 11:49:05 -0000 1.25 *************** *** 611,614 **** --- 611,618 ---- mAcceptable.add ("SCRIPT"); mAcceptable.add ("NOSCRIPT"); + mAcceptable.add ("STYLE"); + mAcceptable.add ("SPAN"); + mAcceptable.add ("UL"); + mAcceptable.add ("LI"); } Index: PageTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/PageTests.java,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** PageTests.java 31 Jul 2004 16:42:31 -0000 1.18 --- PageTests.java 15 May 2005 11:49:05 -0000 1.19 *************** *** 409,411 **** } ! } \ No newline at end of file --- 409,411 ---- } ! } Index: KitTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** KitTest.java 10 Apr 2005 23:20:46 -0000 1.9 --- KitTest.java 15 May 2005 11:49:05 -0000 1.10 *************** *** 599,602 **** --- 599,606 ---- * * $Log$ + * Revision 1.10 2005/05/15 11:49:05 derrickoswald + * Documentation revamp part four. + * Remove some checkstyle warnings. + * * Revision 1.9 2005/04/10 23:20:46 derrickoswald * Documentation revamp part one. *************** *** 648,650 **** * * ! */ \ No newline at end of file --- 652,654 ---- * * ! */ Index: TagTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/TagTests.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** TagTests.java 13 Mar 2005 14:51:46 -0000 1.13 --- TagTests.java 15 May 2005 11:49:05 -0000 1.14 *************** *** 426,428 **** assertStringEquals ("html", html, tag.toHtml ()); } ! } \ No newline at end of file --- 426,428 ---- assertStringEquals ("html", html, tag.toHtml ()); } ! } |
From: Derrick O. <der...@us...> - 2005-05-15 11:49:13
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31674/src/org/htmlparser/beans Modified Files: BeanyBaby.java FilterBean.java HTMLLinkBean.java HTMLTextBean.java LinkBean.java StringBean.java Log Message: Documentation revamp part four. Remove some checkstyle warnings. Index: HTMLLinkBean.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/HTMLLinkBean.java,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -d -r1.22 -r1.23 *** HTMLLinkBean.java 10 Apr 2005 23:20:42 -0000 1.22 --- HTMLLinkBean.java 15 May 2005 11:49:03 -0000 1.23 *************** *** 42,46 **** * Created on December 24, 2002, 3:49 PM */ ! public class HTMLLinkBean extends JList implements Serializable, PropertyChangeListener { /** --- 42,51 ---- * Created on December 24, 2002, 3:49 PM */ ! public class HTMLLinkBean ! extends ! JList ! implements ! Serializable, ! PropertyChangeListener { /** *************** *** 77,87 **** public Dimension getMinimumSize () { ! FontMetrics metrics; int width; int height; ! metrics = getFontMetrics (getFont ()); ! width = metrics.stringWidth ("http://localhost"); ! height = metrics.getLeading () + metrics.getHeight () + metrics.getDescent (); return (new Dimension (width, height)); --- 82,92 ---- public Dimension getMinimumSize () { ! FontMetrics met; int width; int height; ! met = getFontMetrics (getFont ()); ! width = met.stringWidth ("http://localhost"); ! height = met.getLeading () + met.getHeight () + met.getDescent (); return (new Dimension (width, height)); *************** *** 102,106 **** /** * Remove a PropertyChangeListener from the listener list. ! * This removes a PropertyChangeListener that was registered for all properties. * <p><em>Delegates to the underlying StringBean</em> * @param listener The PropertyChangeListener to be removed. --- 107,111 ---- /** * Remove a PropertyChangeListener from the listener list. ! * This removes a registered PropertyChangeListener. * <p><em>Delegates to the underlying StringBean</em> * @param listener The PropertyChangeListener to be removed. *************** *** 189,196 **** // javax.swing.JFrame frame = new javax.swing.JFrame (); // frame.getContentPane ().setLayout (new BorderLayout ()); ! // frame.getContentPane ().add (new JScrollPane (lb), BorderLayout.CENTER); // frame.addWindowListener (new java.awt.event.WindowListener () { // public void windowOpened (java.awt.event.WindowEvent e) {} ! // public void windowClosing (java.awt.event.WindowEvent e) {System.exit (0);} // public void windowClosed (java.awt.event.WindowEvent e) {} // public void windowDeiconified (java.awt.event.WindowEvent e) {} --- 194,205 ---- // javax.swing.JFrame frame = new javax.swing.JFrame (); // frame.getContentPane ().setLayout (new BorderLayout ()); ! // frame.getContentPane ().add (new JScrollPane (lb), ! // BorderLayout.CENTER); // frame.addWindowListener (new java.awt.event.WindowListener () { // public void windowOpened (java.awt.event.WindowEvent e) {} ! // public void windowClosing (java.awt.event.WindowEvent e) ! // { ! // System.exit (0); ! // } // public void windowClosed (java.awt.event.WindowEvent e) {} // public void windowDeiconified (java.awt.event.WindowEvent e) {} Index: FilterBean.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/FilterBean.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** FilterBean.java 10 Apr 2005 23:20:42 -0000 1.2 --- FilterBean.java 15 May 2005 11:49:03 -0000 1.3 *************** *** 50,55 **** */ public class FilterBean ! implements ! Serializable { /** --- 50,55 ---- */ public class FilterBean ! implements ! Serializable { /** *************** *** 128,140 **** oldText = ""; mNodes = nodes; ! if (null != mNodes) // TODO: fix this null problem if StringBean finds no nodes newText = getText (); ! else newText = ""; if (null == newText) newText = ""; ! mPropertySupport.firePropertyChange (PROP_NODES_PROPERTY, oldValue, nodes); if (!newText.equals (oldText)) ! mPropertySupport.firePropertyChange (PROP_TEXT_PROPERTY, oldText, newText); } } --- 128,142 ---- oldText = ""; mNodes = nodes; ! if (null != mNodes) // TODO: fix this null problem newText = getText (); ! else // StringBean finds no nodes newText = ""; if (null == newText) newText = ""; ! mPropertySupport.firePropertyChange ( ! PROP_NODES_PROPERTY, oldValue, nodes); if (!newText.equals (oldText)) ! mPropertySupport.firePropertyChange ( ! PROP_TEXT_PROPERTY, oldText, newText); } } *************** *** 145,153 **** * Subsequent filters are applied to the output of the prior filter. * @return A list of nodes passed through all filters. ! * @throws ParserException If an encoding change occurs or there is some other problem. */ protected NodeList applyFilters () ! throws ! ParserException { NodeList ret; --- 147,156 ---- * Subsequent filters are applied to the output of the prior filter. * @return A list of nodes passed through all filters. ! * @throws ParserException If an encoding change occurs ! * or there is some other problem. */ protected NodeList applyFilters () ! throws ! ParserException { NodeList ret; *************** *** 158,162 **** for (int i = 0; i < getFilters ().length; i++) if (0 == i) ! ret = mParser.parse (getFilters ()[0]); else ret = ret.extractAllNodesThatMatch (getFilters ()[i]); --- 161,165 ---- for (int i = 0; i < getFilters ().length; i++) if (0 == i) ! ret = mParser.parse (getFilters ()[0]); else ret = ret.extractAllNodesThatMatch (getFilters ()[i]); *************** *** 214,218 **** /** * Remove a PropertyChangeListener from the listener list. ! * This removes a PropertyChangeListener that was registered for all properties. * @param listener The PropertyChangeListener to be removed. */ --- 217,221 ---- /** * Remove a PropertyChangeListener from the listener list. ! * This removes a registered PropertyChangeListener. * @param listener The PropertyChangeListener to be removed. */ *************** *** 234,238 **** { if (null == mNodes) ! setNodes (); return (mNodes); --- 237,241 ---- { if (null == mNodes) ! setNodes (); return (mNodes); *************** *** 262,266 **** old = getURL (); conn = getConnection (); ! if (((null == old) && (null != url)) || ((null != old) && !old.equals (url))) { try --- 265,270 ---- old = getURL (); conn = getConnection (); ! if (((null == old) && (null != url)) || ((null != old) ! && !old.equals (url))) { try *************** *** 270,275 **** else mParser.setURL (url); ! mPropertySupport.firePropertyChange (PROP_URL_PROPERTY, old, getURL ()); ! mPropertySupport.firePropertyChange (PROP_CONNECTION_PROPERTY, conn, mParser.getConnection ()); setNodes (); } --- 274,281 ---- else mParser.setURL (url); ! mPropertySupport.firePropertyChange ( ! PROP_URL_PROPERTY, old, getURL ()); ! mPropertySupport.firePropertyChange ( ! PROP_CONNECTION_PROPERTY, conn, mParser.getConnection ()); setNodes (); } *************** *** 304,308 **** url = getURL (); conn = getConnection (); ! if (((null == conn) && (null != connection)) || ((null != conn) && !conn.equals (connection))) { try --- 310,315 ---- url = getURL (); conn = getConnection (); ! if (((null == conn) && (null != connection)) || ((null != conn) ! && !conn.equals (connection))) { try *************** *** 312,317 **** else mParser.setConnection (connection); ! mPropertySupport.firePropertyChange (PROP_URL_PROPERTY, url, getURL ()); ! mPropertySupport.firePropertyChange (PROP_CONNECTION_PROPERTY, conn, mParser.getConnection ()); setNodes (); } --- 319,326 ---- else mParser.setConnection (connection); ! mPropertySupport.firePropertyChange ( ! PROP_URL_PROPERTY, url, getURL ()); ! mPropertySupport.firePropertyChange ( ! PROP_CONNECTION_PROPERTY, conn, mParser.getConnection ()); setNodes (); } *************** *** 371,378 **** /** ! * Convenience method to apply a {@link StringBean} to the results of filtering. ! * This may yield duplicate or multiple text elements if the node list contains nodes from ! * two or more levels in the same nested tag heirarchy, but if the node list ! * contains only one tag, it provides access to the text within the node. * @return The textual contents of the nodes that pass through the filter set, * as collected by the StringBean. --- 380,388 ---- /** ! * Convenience method to apply a {@link StringBean} to the filter results. ! * This may yield duplicate or multiple text elements if the node list ! * contains nodes from two or more levels in the same nested tag heirarchy, ! * but if the node list contains only one tag, it provides access to the ! * text within the node. * @return The textual contents of the nodes that pass through the filter set, * as collected by the StringBean. Index: HTMLTextBean.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/HTMLTextBean.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** HTMLTextBean.java 10 Apr 2005 23:20:42 -0000 1.23 --- HTMLTextBean.java 15 May 2005 11:49:03 -0000 1.24 *************** *** 41,45 **** * Created on December 24, 2002, 3:49 PM */ ! public class HTMLTextBean extends JTextArea implements Serializable, PropertyChangeListener { /** --- 41,50 ---- * Created on December 24, 2002, 3:49 PM */ ! public class HTMLTextBean ! extends ! JTextArea ! implements ! Serializable, ! PropertyChangeListener { /** *************** *** 63,73 **** public Dimension getMinimumSize () { ! FontMetrics metrics; int width; int height; ! metrics = getFontMetrics (getFont ()); ! width = metrics.stringWidth ("Hello World"); ! height = metrics.getLeading () + metrics.getHeight () + metrics.getDescent (); return (new Dimension (width, height)); --- 68,78 ---- public Dimension getMinimumSize () { ! FontMetrics met; int width; int height; ! met = getFontMetrics (getFont ()); ! width = met.stringWidth ("Hello World"); ! height = met.getLeading () + met.getHeight () + met.getDescent (); return (new Dimension (width, height)); *************** *** 88,92 **** /** * Remove a PropertyChangeListener from the listener list. ! * This removes a PropertyChangeListener that was registered for all properties. * <p><em>Delegates to the underlying StringBean</em> * @param listener The PropertyChangeListener to be removed. --- 93,97 ---- /** * Remove a PropertyChangeListener from the listener list. ! * This removes a registered PropertyChangeListener. * <p><em>Delegates to the underlying StringBean</em> * @param listener The PropertyChangeListener to be removed. *************** *** 178,188 **** /** * Set the 'replace non breaking spaces' state. ! * @param replace_space <code>true</code> if non-breaking spaces (character '\u00a0', * numeric character reference &160; or character entity reference ) * are to be replaced with normal spaces (character '\u0020'). */ ! public void setReplaceNonBreakingSpaces (boolean replace_space) { ! getBean ().setReplaceNonBreakingSpaces (replace_space); } --- 183,193 ---- /** * Set the 'replace non breaking spaces' state. ! * @param replace <code>true</code> if non-breaking spaces (character '\u00a0', * numeric character reference &160; or character entity reference ) * are to be replaced with normal spaces (character '\u0020'). */ ! public void setReplaceNonBreakingSpaces (boolean replace) { ! getBean ().setReplaceNonBreakingSpaces (replace); } *************** *** 206,215 **** /** * Set the current 'collapse whitespace' state. ! * @param collapse_whitespace If <code>true</code>, sequences of whitespace * will be reduced to a single space. */ ! public void setCollapse (boolean collapse_whitespace) { ! getBean ().setCollapse (collapse_whitespace); } --- 211,220 ---- /** * Set the current 'collapse whitespace' state. ! * @param collapse If <code>true</code>, sequences of whitespace * will be reduced to a single space. */ ! public void setCollapse (boolean collapse) { ! getBean ().setCollapse (collapse); } *************** *** 259,266 **** // javax.swing.JFrame frame = new javax.swing.JFrame (); // frame.getContentPane ().setLayout (new BorderLayout ()); ! // frame.getContentPane ().add (new JScrollPane (tb), BorderLayout.CENTER); // frame.addWindowListener (new java.awt.event.WindowListener () { // public void windowOpened (java.awt.event.WindowEvent e) {} ! // public void windowClosing (java.awt.event.WindowEvent e) {System.exit (0);} // public void windowClosed (java.awt.event.WindowEvent e) {} // public void windowDeiconified (java.awt.event.WindowEvent e) {} --- 264,275 ---- // javax.swing.JFrame frame = new javax.swing.JFrame (); // frame.getContentPane ().setLayout (new BorderLayout ()); ! // frame.getContentPane ().add (new JScrollPane (tb), ! // BorderLayout.CENTER); // frame.addWindowListener (new java.awt.event.WindowListener () { // public void windowOpened (java.awt.event.WindowEvent e) {} ! // public void windowClosing (java.awt.event.WindowEvent e) ! // { ! // System.exit (0); ! // } // public void windowClosed (java.awt.event.WindowEvent e) {} // public void windowDeiconified (java.awt.event.WindowEvent e) {} Index: LinkBean.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/LinkBean.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** LinkBean.java 10 Apr 2005 23:20:42 -0000 1.31 --- LinkBean.java 15 May 2005 11:49:03 -0000 1.32 *************** *** 35,39 **** import java.util.Vector; - import org.htmlparser.Node; import org.htmlparser.NodeFilter; import org.htmlparser.Parser; --- 35,38 ---- *************** *** 170,174 **** /** * Remove a PropertyChangeListener from the listener list. ! * This removes a PropertyChangeListener that was registered for all properties. * @param listener The PropertyChangeListener to be removed. */ --- 169,173 ---- /** * Remove a PropertyChangeListener from the listener list. ! * This removes a registered PropertyChangeListener. * @param listener The PropertyChangeListener to be removed. */ *************** *** 200,204 **** oldValue = mLinks; mLinks = urls; ! mPropertySupport.firePropertyChange (PROP_LINKS_PROPERTY, oldValue, mLinks); } } --- 199,204 ---- oldValue = mLinks; mLinks = urls; ! mPropertySupport.firePropertyChange ( ! PROP_LINKS_PROPERTY, oldValue, mLinks); } } *************** *** 219,223 **** { mLinks = extractLinks (); ! mPropertySupport.firePropertyChange (PROP_LINKS_PROPERTY, null, mLinks); } catch (ParserException hpe) --- 219,224 ---- { mLinks = extractLinks (); ! mPropertySupport.firePropertyChange ( ! PROP_LINKS_PROPERTY, null, mLinks); } catch (ParserException hpe) *************** *** 248,257 **** old = getURL (); ! if (((null == old) && (null != url)) || ((null != old) && !old.equals (url))) { try { mParser.setURL (url); ! mPropertySupport.firePropertyChange (PROP_URL_PROPERTY, old, getURL ()); setLinks (); } --- 249,260 ---- old = getURL (); ! if (((null == old) && (null != url)) || ((null != old) ! && !old.equals (url))) { try { mParser.setURL (url); ! mPropertySupport.firePropertyChange ( ! PROP_URL_PROPERTY, old, getURL ()); setLinks (); } *************** *** 296,300 **** { if (0 >= args.length) ! System.out.println ("Usage: java -classpath htmlparser.jar org.htmlparser.beans.LinkBean <http://whatever_url>"); else { --- 299,304 ---- { if (0 >= args.length) ! System.out.println ("Usage: java -classpath htmlparser.jar" ! + " org.htmlparser.beans.LinkBean <http://whatever_url>"); else { Index: StringBean.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/StringBean.java,v retrieving revision 1.43 retrieving revision 1.44 diff -C2 -d -r1.43 -r1.44 *** StringBean.java 10 Apr 2005 23:20:42 -0000 1.43 --- StringBean.java 15 May 2005 11:49:03 -0000 1.44 *************** *** 90,96 **** /** ! * Property name in event where the 'replace non-breaking spaces' state changes. */ ! public static final String PROP_REPLACE_SPACE_PROPERTY = "replaceNonBreakingSpaces"; /** --- 90,98 ---- /** ! * Property name in event where the 'replace non-breaking spaces' ! * state changes. */ ! public static final String PROP_REPLACE_SPACE_PROPERTY = ! "replaceNonBreakingSpaces"; /** *************** *** 107,116 **** * A newline. */ ! private static final String newline = System.getProperty ("line.separator"); /** ! * The length of the newline. */ ! private static final int newline_size = newline.length (); /** --- 109,118 ---- * A newline. */ ! private static final String NEWLINE = System.getProperty ("line.separator"); /** ! * The length of the NEWLINE. */ ! private static final int NEWLINE_SIZE = NEWLINE.length (); /** *************** *** 141,146 **** /** ! * If <code>true</code> sequences of whitespace characters are replaced with a ! * single space character. */ protected boolean mCollapse; --- 143,148 ---- /** ! * If <code>true</code> sequences of whitespace characters are replaced ! * with a single space character. */ protected boolean mCollapse; *************** *** 201,213 **** * Except if the buffer is empty. */ ! protected void carriage_return () { int length; length = mBuffer.length (); ! if ( (0 != length) // why bother appending newlines to the beginning of a buffer ! && ( (newline_size <= length) // not enough chars to hold a newline ! && (!mBuffer.substring (length - newline_size, length).equals (newline)))) ! mBuffer.append (newline); } --- 203,216 ---- * Except if the buffer is empty. */ ! protected void carriageReturn () { int length; length = mBuffer.length (); ! if ((0 != length) // don't append newlines to the beginning of a buffer ! && ((NEWLINE_SIZE <= length) // not enough chars to hold a NEWLINE ! && (!mBuffer.substring ( ! length - NEWLINE_SIZE, length).equals (NEWLINE)))) ! mBuffer.append (NEWLINE); } *************** *** 243,249 **** { length = buffer.length (); ! state = ( (0 == length) ! || (buffer.charAt (length - 1) == ' ') ! || ((newline_size <= length) && buffer.substring (length - newline_size, length).equals (newline))) ? 0 : 1; for (int i = 0; i < chars; i++) { --- 246,255 ---- { length = buffer.length (); ! state = ((0 == length) ! || (buffer.charAt (length - 1) == ' ') ! || ((NEWLINE_SIZE <= length) ! && buffer.substring ( ! length - NEWLINE_SIZE, length).equals (NEWLINE))) ! ? 0 : 1; for (int i = 0; i < chars; i++) { *************** *** 302,306 **** oldValue = mStrings; mStrings = strings; ! mPropertySupport.firePropertyChange (PROP_STRINGS_PROPERTY, oldValue, strings); } } --- 308,313 ---- oldValue = mStrings; mStrings = strings; ! mPropertySupport.firePropertyChange ( ! PROP_STRINGS_PROPERTY, oldValue, strings); } } *************** *** 394,398 **** /** * Remove a PropertyChangeListener from the listener list. ! * This removes a PropertyChangeListener that was registered for all properties. * @param listener The PropertyChangeListener to be removed. */ --- 401,405 ---- /** * Remove a PropertyChangeListener from the listener list. ! * This removes a registered PropertyChangeListener. * @param listener The PropertyChangeListener to be removed. */ *************** *** 409,413 **** * Return the textual contents of the URL. * This is the primary output of the bean. ! * @return The user visible (what would be seen in a browser) text from the URL. */ public String getStrings () --- 416,420 ---- * Return the textual contents of the URL. * This is the primary output of the bean. ! * @return The user visible (what would be seen in a browser) text. */ public String getStrings () *************** *** 445,449 **** { mLinks = links; ! mPropertySupport.firePropertyChange (PROP_LINKS_PROPERTY, oldValue, links); resetStrings (); } --- 452,457 ---- { mLinks = links; ! mPropertySupport.firePropertyChange ( ! PROP_LINKS_PROPERTY, oldValue, links); resetStrings (); } *************** *** 473,477 **** old = getURL (); conn = getConnection (); ! if (((null == old) && (null != url)) || ((null != old) && !old.equals (url))) { try --- 481,486 ---- old = getURL (); conn = getConnection (); ! if (((null == old) && (null != url)) || ((null != old) ! && !old.equals (url))) { try *************** *** 481,486 **** else mParser.setURL (url); ! mPropertySupport.firePropertyChange (PROP_URL_PROPERTY, old, getURL ()); ! mPropertySupport.firePropertyChange (PROP_CONNECTION_PROPERTY, conn, mParser.getConnection ()); setStrings (); } --- 490,497 ---- else mParser.setURL (url); ! mPropertySupport.firePropertyChange ( ! PROP_URL_PROPERTY, old, getURL ()); ! mPropertySupport.firePropertyChange ( ! PROP_CONNECTION_PROPERTY, conn, mParser.getConnection ()); setStrings (); } *************** *** 495,500 **** * Get the current 'replace non breaking spaces' state. * @return <code>true</code> if non-breaking spaces (character '\u00a0', ! * numeric character reference &#160; or character entity reference &nbsp;) ! * are to be replaced with normal spaces (character '\u0020'). */ public boolean getReplaceNonBreakingSpaces () --- 506,512 ---- * Get the current 'replace non breaking spaces' state. * @return <code>true</code> if non-breaking spaces (character '\u00a0', ! * numeric character reference &#160; or character entity ! * reference &nbsp;) are to be replaced with normal ! * spaces (character '\u0020'). */ public boolean getReplaceNonBreakingSpaces () *************** *** 507,521 **** * If the setting is changed after the URL has been set, the text from the * URL will be reacquired, which is possibly expensive. ! * @param replace_space <code>true</code> if non-breaking spaces (character '\u00a0', ! * numeric character reference &#160; or character entity reference &nbsp;) ! * are to be replaced with normal spaces (character '\u0020'). */ ! public void setReplaceNonBreakingSpaces (boolean replace_space) { boolean oldValue = mReplaceSpace; ! if (oldValue != replace_space) { ! mReplaceSpace = replace_space; ! mPropertySupport.firePropertyChange (PROP_REPLACE_SPACE_PROPERTY, oldValue, replace_space); resetStrings (); } --- 519,535 ---- * If the setting is changed after the URL has been set, the text from the * URL will be reacquired, which is possibly expensive. ! * @param replace <code>true</code> if non-breaking spaces ! * (character '\u00a0', numeric character reference &#160; ! * or character entity reference &nbsp;) are to be replaced with normal ! * spaces (character '\u0020'). */ ! public void setReplaceNonBreakingSpaces (boolean replace) { boolean oldValue = mReplaceSpace; ! if (oldValue != replace) { ! mReplaceSpace = replace; ! mPropertySupport.firePropertyChange (PROP_REPLACE_SPACE_PROPERTY, ! oldValue, replace); resetStrings (); } *************** *** 525,530 **** * Get the current 'collapse whitespace' state. * If set to <code>true</code> this emulates the operation of browsers ! * in interpretting text where <quote>user agents should collapse input white ! * space sequences when producing output inter-word space</quote>. * See HTML specification section 9.1 White space * <a href="http://www.w3.org/TR/html4/struct/text.html#h-9.1"> --- 539,544 ---- * Get the current 'collapse whitespace' state. * If set to <code>true</code> this emulates the operation of browsers ! * in interpretting text where <quote>user agents should collapse input ! * white space sequences when producing output inter-word space</quote>. * See HTML specification section 9.1 White space * <a href="http://www.w3.org/TR/html4/struct/text.html#h-9.1"> *************** *** 532,536 **** * @return <code>true</code> if sequences of whitespace (space '\u0020', * tab '\u0009', form feed '\u000C', zero-width space '\u200B', ! * carriage-return '\r' and newline '\n') are to be replaced with a single * space. */ --- 546,550 ---- * @return <code>true</code> if sequences of whitespace (space '\u0020', * tab '\u0009', form feed '\u000C', zero-width space '\u200B', ! * carriage-return '\r' and NEWLINE '\n') are to be replaced with a single * space. */ *************** *** 544,557 **** * If the setting is changed after the URL has been set, the text from the * URL will be reacquired, which is possibly expensive. ! * @param collapse_whitespace If <code>true</code>, sequences of whitespace * will be reduced to a single space. */ ! public void setCollapse (boolean collapse_whitespace) { boolean oldValue = mCollapse; ! if (oldValue != collapse_whitespace) { ! mCollapse = collapse_whitespace; ! mPropertySupport.firePropertyChange (PROP_COLLAPSE_PROPERTY, oldValue, collapse_whitespace); resetStrings (); } --- 558,572 ---- * If the setting is changed after the URL has been set, the text from the * URL will be reacquired, which is possibly expensive. ! * @param collapse If <code>true</code>, sequences of whitespace * will be reduced to a single space. */ ! public void setCollapse (boolean collapse) { boolean oldValue = mCollapse; ! if (oldValue != collapse) { ! mCollapse = collapse; ! mPropertySupport.firePropertyChange ( ! PROP_COLLAPSE_PROPERTY, oldValue, collapse); resetStrings (); } *************** *** 581,585 **** url = getURL (); conn = getConnection (); ! if (((null == conn) && (null != connection)) || ((null != conn) && !conn.equals (connection))) { try --- 596,601 ---- url = getURL (); conn = getConnection (); ! if (((null == conn) && (null != connection)) ! || ((null != conn) && !conn.equals (connection))) { try *************** *** 589,594 **** else mParser.setConnection (connection); ! mPropertySupport.firePropertyChange (PROP_URL_PROPERTY, url, getURL ()); ! mPropertySupport.firePropertyChange (PROP_CONNECTION_PROPERTY, conn, mParser.getConnection ()); setStrings (); } --- 605,612 ---- else mParser.setConnection (connection); ! mPropertySupport.firePropertyChange ( ! PROP_URL_PROPERTY, url, getURL ()); ! mPropertySupport.firePropertyChange ( ! PROP_CONNECTION_PROPERTY, conn, mParser.getConnection ()); setStrings (); } *************** *** 617,621 **** text = Translate.decode (text); if (getReplaceNonBreakingSpaces ()) ! text = text.replace ('\u00a0',' '); if (getCollapse ()) collapse (mBuffer, text); --- 635,639 ---- text = Translate.decode (text); if (getReplaceNonBreakingSpaces ()) ! text = text.replace ('\u00a0', ' '); if (getCollapse ()) collapse (mBuffer, text); *************** *** 629,633 **** /** ! * Appends a newline to the output if the tag breaks flow, and * possibly sets the state of the PRE and SCRIPT flags. * @param tag The tag to examine. --- 647,651 ---- /** ! * Appends a NEWLINE to the output if the tag breaks flow, and * possibly sets the state of the PRE and SCRIPT flags. * @param tag The tag to examine. *************** *** 652,656 **** mIsStyle = true; if (tag.breaksFlow ()) ! carriage_return (); } --- 670,674 ---- mIsStyle = true; if (tag.breaksFlow ()) ! carriageReturn (); } *************** *** 679,683 **** { if (0 >= args.length) ! System.out.println ("Usage: java -classpath htmlparser.jar org.htmlparser.beans.StringBean <http://whatever_url>"); else { --- 697,702 ---- { if (0 >= args.length) ! System.out.println ("Usage: java -classpath htmlparser.jar" ! + " org.htmlparser.beans.StringBean <http://whatever_url>"); else { Index: BeanyBaby.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/BeanyBaby.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** BeanyBaby.java 10 Apr 2005 23:20:42 -0000 1.24 --- BeanyBaby.java 15 May 2005 11:49:03 -0000 1.25 *************** *** 37,40 **** --- 37,41 ---- import java.awt.event.WindowEvent; import java.beans.PropertyChangeListener; + import java.util.Vector; import javax.swing.JCheckBoxMenuItem; *************** *** 57,66 **** * Bread crumb trail of visited URLs. */ ! java.util.Vector mTrail; /** * Current position on the bread crumb trail. */ ! int mCrumb; /** Creates new form BeanyBaby */ --- 58,67 ---- * Bread crumb trail of visited URLs. */ ! protected Vector mTrail; /** * Current position on the bread crumb trail. */ ! protected int mCrumb; /** Creates new form BeanyBaby */ *************** *** 68,77 **** { initComponents (); ! mTrail = new java.util.Vector (25); mCrumb = -1; // shenanigans to get the splitter bar at the midpoint setVisible (true); ! mSplitPane.setDividerLocation(0.5); setVisible (false); --- 69,78 ---- { initComponents (); ! mTrail = new Vector (); mCrumb = -1; // shenanigans to get the splitter bar at the midpoint setVisible (true); ! mSplitPane.setDividerLocation (0.5); // 50% for each pane setVisible (false); *************** *** 118,126 **** name = event.getPropertyName (); if (name.equals (StringBean.PROP_LINKS_PROPERTY)) ! mLinks.setSelected (((Boolean)event.getNewValue ()).booleanValue ()); else if (name.equals (StringBean.PROP_COLLAPSE_PROPERTY)) ! mCollapse.setSelected (((Boolean)event.getNewValue ()).booleanValue ()); else if (name.equals (StringBean.PROP_REPLACE_SPACE_PROPERTY)) ! mNobreak.setSelected (((Boolean)event.getNewValue ()).booleanValue ()); } } --- 119,130 ---- name = event.getPropertyName (); if (name.equals (StringBean.PROP_LINKS_PROPERTY)) ! mLinks.setSelected ( ! ((Boolean)event.getNewValue ()).booleanValue ()); else if (name.equals (StringBean.PROP_COLLAPSE_PROPERTY)) ! mCollapse.setSelected ( ! ((Boolean)event.getNewValue ()).booleanValue ()); else if (name.equals (StringBean.PROP_REPLACE_SPACE_PROPERTY)) ! mNobreak.setSelected ( ! ((Boolean)event.getNewValue ()).booleanValue ()); } } *************** *** 192,197 **** // MouseListener interface // /** ! * Invoked when the mouse button has been clicked (pressed and released) on a component. * @param event Details on the mouse event. */ --- 196,203 ---- // MouseListener interface // + /** ! * Invoked when the mouse button has been clicked on a component. ! * A mouse click is a press and release of a mouse button. * @param event Details on the mouse event. */ *************** *** 244,248 **** // API control // ! /** * Set the URL for the bean. --- 250,254 ---- // API control // ! /** * Set the URL for the bean. *************** *** 372,377 **** }//GEN-END:initComponents ! /** Exit the Application */ ! private void exitForm (WindowEvent evt) {//GEN-FIRST:event_exitForm System.exit (0); --- 378,384 ---- }//GEN-END:initComponents ! /** Exit the Application ! * @param event Details about the window event. */ ! private void exitForm (WindowEvent event) {//GEN-FIRST:event_exitForm System.exit (0); |
From: Derrick O. <der...@us...> - 2005-05-15 11:49:13
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31674/src/org/htmlparser Modified Files: Attribute.java NodeFactory.java NodeFilter.java Remark.java Tag.java Text.java Log Message: Documentation revamp part four. Remove some checkstyle warnings. Index: Attribute.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Attribute.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** Attribute.java 12 Apr 2005 11:27:40 -0000 1.6 --- Attribute.java 15 May 2005 11:49:03 -0000 1.7 *************** *** 33,43 **** * Holds the name, assignment string, value and quote character. * <p> ! * This class was made deliberately simple. Except for {@link #setRawValue RawValue}, ! * the properties are completely orthogonal, that is: each property is independant ! * of the others. This means you have enough rope here to hang yourself, and ! * it's very easy to create malformed HTML. Where it's obvious, warnings and ! * notes have been provided in the setters javadocs, but it is up to you -- the ! * programmer -- to ensure that the contents of the four fields will yield ! * valid HTML (if that's what you want). * <p> * Be especially mindful of quotes and assignment strings. These are handled --- 33,44 ---- * Holds the name, assignment string, value and quote character. * <p> ! * This class was made deliberately simple. Except for ! * {@link #setRawValue RawValue}, the properties are completely orthogonal, ! * that is: each property is independant of the others. This means you have ! * enough rope here to hang yourself, and it's very easy to create ! * malformed HTML. Where it's obvious, warnings and notes have been provided ! * in the setters javadocs, but it is up to you -- the programmer -- ! * to ensure that the contents of the four fields will yield valid HTML ! * (if that's what you want). * <p> * Be especially mindful of quotes and assignment strings. These are handled *************** *** 142,153 **** * </table> * <br>In words: ! * <br>If Name is null, and Assignment is null, and Quote is zero, it is whitepace and Value has the whitespace text -- value ! * <br>If Name is not null, and both Assignment and Value are null it's a standalone attribute -- name ! * <br>If Name is not null, and Assignment is an equals sign, and Quote is zero it's an empty attribute -- name= ! * <br>If Name is not null, and Assignment is an equals sign, and Value is "" or null, and Quote is ' it's an empty single quoted attribute -- name='' ! * <br>If Name is not null, and Assignment is an equals sign, and Value is "" or null, and Quote is " it's an empty double quoted attribute -- name="" ! * <br>If Name is not null, and Assignment is an equals sign, and Value is something, and Quote is zero it's a naked attribute -- name=value ! * <br>If Name is not null, and Assignment is an equals sign, and Value is something, and Quote is ' it's a single quoted attribute -- name='value' ! * <br>If Name is not null, and Assignment is an equals sign, and Value is something, and Quote is " it's a double quoted attribute -- name="value" * <br>All other states are invalid HTML. * <p> --- 143,167 ---- * </table> * <br>In words: ! * <br>If Name is null, and Assignment is null, and Quote is zero, ! * it's whitepace and Value has the whitespace text -- value ! * <br>If Name is not null, and both Assignment and Value are null ! * it's a standalone attribute -- name ! * <br>If Name is not null, and Assignment is an equals sign, and Quote is zero ! * it's an empty attribute -- name= ! * <br>If Name is not null, and Assignment is an equals sign, ! * and Value is "" or null, and Quote is ' ! * it's an empty single quoted attribute -- name='' ! * <br>If Name is not null, and Assignment is an equals sign, ! * and Value is "" or null, and Quote is " ! * it's an empty double quoted attribute -- name="" ! * <br>If Name is not null, and Assignment is an equals sign, ! * and Value is something, and Quote is zero ! * it's a naked attribute -- name=value ! * <br>If Name is not null, and Assignment is an equals sign, ! * and Value is something, and Quote is ' ! * it's a single quoted attribute -- name='value' ! * <br>If Name is not null, and Assignment is an equals sign, ! * and Value is something, and Quote is " ! * it's a double quoted attribute -- name="value" * <br>All other states are invalid HTML. * <p> *************** *** 177,181 **** * vice versa. Authors may also use numeric character references to * represent double quotes (&#34;) and single quotes (&#39;). ! * For doublequotes authors can also use the character entity reference &quot;.<p> * In certain cases, authors may specify the value of an attribute without * any quotation marks. The attribute value may only contain letters --- 191,196 ---- * vice versa. Authors may also use numeric character references to * represent double quotes (&#34;) and single quotes (&#39;). ! * For doublequotes authors can also use the character entity reference ! * &quot;.<p> * In certain cases, authors may specify the value of an attribute without * any quotation marks. The attribute value may only contain letters *************** *** 186,192 **** * Attribute names are always case-insensitive.<p> * Attribute values are generally case-insensitive. The definition of each ! * attribute in the reference manual indicates whether its value is case-insensitive.<p> * All the attributes defined by this specification are listed in the ! * <a href="http://www.w3.org/TR/html4/index/attributes.html">attribute index</a>.<p> * </cite> * <p> --- 201,209 ---- * Attribute names are always case-insensitive.<p> * Attribute values are generally case-insensitive. The definition of each ! * attribute in the reference manual indicates whether its value is ! * case-insensitive.<p> * All the attributes defined by this specification are listed in the ! * <a href="http://www.w3.org/TR/html4/index/attributes.html">attribute ! * index</a>.<p> * </cite> * <p> *************** *** 226,230 **** /** ! * Create an attribute with the name, assignment string, value and quote given. * If the quote value is zero, assigns the value using {@link #setRawValue} * which sets the quote character to a proper value if necessary. --- 243,247 ---- /** ! * Create an attribute with the name, assignment, value and quote given. * If the quote value is zero, assigns the value using {@link #setRawValue} * which sets the quote character to a proper value if necessary. *************** *** 309,313 **** /** * Create an empty attribute. ! * This will provide "" from the {@link #toString} and * {@link #toString(StringBuffer)} methods. */ --- 326,330 ---- /** * Create an empty attribute. ! * This will provide "" from the {@link #toString} and * {@link #toString(StringBuffer)} methods. */ *************** *** 479,483 **** if (0 != quote) { ! buffer = new StringBuffer (); // todo: can we get the value length? buffer.append (quote); getValue (buffer); --- 496,500 ---- if (0 != quote) { ! buffer = new StringBuffer (); // todo: what is the value length? buffer.append (quote); getValue (buffer); *************** *** 532,541 **** if ((null != value) && (0 != value.trim ().length ())) { ! if (value.startsWith ("'") && value.endsWith ("'") && (2 <= value.length ())) { quote = '\''; value = value.substring (1, value.length () - 1); } ! else if (value.startsWith ("\"") && value.endsWith ("\"") && (2 <= value.length ())) { quote = '"'; --- 549,560 ---- if ((null != value) && (0 != value.trim ().length ())) { ! if (value.startsWith ("'") && value.endsWith ("'") ! && (2 <= value.length ())) { quote = '\''; value = value.substring (1, value.length () - 1); } ! else if (value.startsWith ("\"") && value.endsWith ("\"") ! && (2 <= value.length ())) { quote = '"'; *************** *** 562,566 **** needed = true; } ! else if (!('-' == ch) && !('.' == ch) && !('_' == ch) && !(':' == ch) && !Character.isLetterOrDigit (ch)) { --- 581,585 ---- needed = true; } ! else if (!('-' == ch) && !('.' == ch) && !('_' == ch) && !(':' == ch) && !Character.isLetterOrDigit (ch)) { *************** *** 583,587 **** ref = """; // Translate.encode (quote); // JDK 1.4: value = value.replaceAll ("\"", ref); ! buffer = new StringBuffer (value.length() * 5); for (int i = 0; i < value.length (); i++) { --- 602,607 ---- ref = """; // Translate.encode (quote); // JDK 1.4: value = value.replaceAll ("\"", ref); ! buffer = new StringBuffer ( ! value.length() * (ref.length () - 1)); for (int i = 0; i < value.length (); i++) { *************** *** 600,604 **** setQuote (quote); } ! /** * Predicate to determine if this attribute is whitespace. --- 620,624 ---- setQuote (quote); } ! /** * Predicate to determine if this attribute is whitespace. *************** *** 666,670 **** if (0 != quote) ret += 2; ! return (ret); } --- 686,690 ---- if (0 != quote) ret += 2; ! return (ret); } *************** *** 698,702 **** return (ret.toString ()); } ! /** * Get a text representation of this attribute. --- 718,722 ---- return (ret.toString ()); } ! /** * Get a text representation of this attribute. Index: Remark.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Remark.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** Remark.java 10 Apr 2005 23:20:42 -0000 1.2 --- Remark.java 15 May 2005 11:49:03 -0000 1.3 *************** *** 27,32 **** package org.htmlparser; - import org.htmlparser.Node; - /** * This interface represents a comment in the HTML document. --- 27,30 ---- *************** *** 36,52 **** Node { - /** * Returns the text contents of the comment tag. * @return The contents of the text inside the comment delimiters. */ ! public String getText(); /** * Sets the string contents of the node. ! * If the text has the remark delimiters (<!-- -->), these are stripped off. * @param text The new text for the node. */ ! public void setText (String text); // --- 34,50 ---- Node { /** * Returns the text contents of the comment tag. * @return The contents of the text inside the comment delimiters. */ ! String getText(); /** * Sets the string contents of the node. ! * If the text has the remark delimiters (<!-- -->), ! * these are stripped off. * @param text The new text for the node. */ ! void setText (String text); // *************** *** 54,101 **** // ! // public void accept (org.htmlparser.visitors.NodeVisitor visitor) // { // } ! // ! // public void collectInto (org.htmlparser.util.NodeList collectionList, NodeFilter filter) // { // } ! // ! // public org.htmlparser.util.NodeList getChildren () // { // } ! // // public int getEndPosition () // { // } ! // // public Node getParent () // { // } ! // // public int getStartPosition () // { // } ! // ! // public void setChildren (org.htmlparser.util.NodeList children) // { // } ! // // public void setEndPosition (int position) // { // } ! // // public void setParent (Node node) // { // } ! // // public void setStartPosition (int position) // { // } ! // // public String toHtml () // { // } ! // // public String toPlainTextString () // { --- 52,99 ---- // ! // public void accept (NodeVisitor visitor) // { // } ! // ! // public void collectInto (NodeList collectionList, NodeFilter filter) // { // } ! // ! // public NodeList getChildren () // { // } ! // // public int getEndPosition () // { // } ! // // public Node getParent () // { // } ! // // public int getStartPosition () // { // } ! // ! // public void setChildren (NodeList children) // { // } ! // // public void setEndPosition (int position) // { // } ! // // public void setParent (Node node) // { // } ! // // public void setStartPosition (int position) // { // } ! // // public String toHtml () // { // } ! // // public String toPlainTextString () // { Index: Tag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Tag.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** Tag.java 10 Apr 2005 23:20:42 -0000 1.5 --- Tag.java 15 May 2005 11:49:03 -0000 1.6 *************** *** 33,37 **** /** ! * This interface represents a tag such as <xxx yyy="zzz"> in the HTML document. * Adds capabilities to a Node that are specific to a tag. */ --- 33,37 ---- /** ! * This interface represents a tag (<xxx yyy="zzz">) in the HTML document. * Adds capabilities to a Node that are specific to a tag. */ *************** *** 44,48 **** * not exist, or is a stand-alone or */ ! public String getAttribute (String name); /** --- 44,48 ---- * not exist, or is a stand-alone or */ ! String getAttribute (String name); /** *************** *** 52,59 **** * @param value The value of the attribute. */ ! public void setAttribute (String key, String value); /** ! * Set attribute with given key, value pair where the value is quoted by quote. * @param key The name of the attribute. * @param value The value of the attribute. --- 52,59 ---- * @param value The value of the attribute. */ ! void setAttribute (String key, String value); /** ! * Set attribute with given key/value pair, the value is quoted by quote. * @param key The name of the attribute. * @param value The value of the attribute. *************** *** 61,65 **** * If zero, it is an unquoted value. */ ! public void setAttribute (String key, String value, char quote); /** --- 61,65 ---- * If zero, it is an unquoted value. */ ! void setAttribute (String key, String value, char quote); /** *************** *** 67,71 **** * @param key The name of the attribute. */ ! public void removeAttribute (String key); /** --- 67,71 ---- * @param key The name of the attribute. */ ! void removeAttribute (String key); /** *************** *** 75,79 **** * not exist. */ ! public Attribute getAttributeEx (String name); /** --- 75,79 ---- * not exist. */ ! Attribute getAttributeEx (String name); /** *************** *** 83,87 **** * @param attribute The attribute to set. */ ! public void setAttributeEx (Attribute attribute); /** --- 83,87 ---- * @param attribute The attribute to set. */ ! void setAttributeEx (Attribute attribute); /** *************** *** 89,93 **** * @return Returns the list of {@link Attribute Attributes} in the tag. */ ! public Vector getAttributesEx (); /** --- 89,93 ---- * @return Returns the list of {@link Attribute Attributes} in the tag. */ ! Vector getAttributesEx (); /** *************** *** 98,103 **** * @param attribs The attribute collection to set. */ ! public void setAttributesEx (Vector attribs); ! /** * Gets the attributes in the tag. --- 98,103 ---- * @param attribs The attribute collection to set. */ ! void setAttributesEx (Vector attribs); ! /** * Gets the attributes in the tag. *************** *** 107,119 **** * <code>String</code> objects available from this <code>Hashtable</code>. * @return Returns a list of name/value pairs representing the attributes. ! * These are not in order, the keys (names) are converted to uppercase and the values ! * are not quoted, even if they need to be. The table <em>will</em> return ! * <code>null</code> if there was no value for an attribute (no equals ! * sign or nothing to the right of the equals sign). A special entry with ! * a key of SpecialHashtable.TAGNAME ("$<TAGNAME>$") holds the tag name. * The conversion to uppercase is performed with an ENGLISH locale. * @deprecated Use getAttributesEx() instead. */ ! public Hashtable getAttributes (); /** --- 107,120 ---- * <code>String</code> objects available from this <code>Hashtable</code>. * @return Returns a list of name/value pairs representing the attributes. ! * These are not in order, the keys (names) are converted to uppercase ! * and the values are not quoted, even if they need to be. ! * The table <em>will</em> return <code>null</code> if there was no value ! * for an attribute (either no equals sign or nothing to the right of the ! * equals sign). A special entry with a key of ! * SpecialHashtable.TAGNAME ("$<TAGNAME>$") holds the tag name. * The conversion to uppercase is performed with an ENGLISH locale. * @deprecated Use getAttributesEx() instead. */ ! Hashtable getAttributes (); /** *************** *** 124,128 **** * @deprecated Use setAttributesEx() instead. */ ! public void setAttributes (Hashtable attributes); /** --- 125,129 ---- * @deprecated Use setAttributesEx() instead. */ ! void setAttributes (Hashtable attributes); /** *************** *** 137,141 **** * @return The tag name. */ ! public String getTagName (); /** --- 138,142 ---- * @return The tag name. */ ! String getTagName (); /** *************** *** 145,149 **** * @param name The tag name. */ ! public void setTagName (String name); /** --- 146,150 ---- * @param name The tag name. */ ! void setTagName (String name); /** *************** *** 152,156 **** * whitespace. */ ! public String getRawTagName (); /** --- 153,157 ---- * whitespace. */ ! String getRawTagName (); /** *************** *** 159,163 **** * <code>false</code> otherwise. */ ! public boolean breaksFlow (); /** --- 160,164 ---- * <code>false</code> otherwise. */ ! boolean breaksFlow (); /** *************** *** 165,177 **** * @return <code>true</code> if this tag is an end tag. */ ! public boolean isEndTag (); ! ! /** ! * Set this tag to be an end tag, or not. ! * Adds or removes the leading slash on the tag name. ! * @param endTag If true, this tag is made into an end tag. ! * Any attributes it may have had are dropped. ! */ ! // public void setEndTag (boolean endTag); /** --- 166,170 ---- * @return <code>true</code> if this tag is an end tag. */ ! boolean isEndTag (); /** *************** *** 179,183 **** * @return true if the last character of the last attribute is a '/'. */ ! public boolean isEmptyXmlTag (); /** --- 172,176 ---- * @return true if the last character of the last attribute is a '/'. */ ! boolean isEmptyXmlTag (); /** *************** *** 187,191 **** * i.e. <tag/>, otherwise removes it. */ ! public void setEmptyXmlTag (boolean emptyXmlTag); /** --- 180,184 ---- * i.e. <tag/>, otherwise removes it. */ ! void setEmptyXmlTag (boolean emptyXmlTag); /** *************** *** 194,198 **** * @return The names to be matched that create tags of this type. */ ! public String[] getIds (); /** --- 187,191 ---- * @return The names to be matched that create tags of this type. */ ! String[] getIds (); /** *************** *** 204,208 **** * @return The names of following tags that stop further scanning. */ ! public String[] getEnders (); /** --- 197,201 ---- * @return The names of following tags that stop further scanning. */ ! String[] getEnders (); /** *************** *** 214,218 **** * @return The names of following end tags that stop further scanning. */ ! public String[] getEndTagEnders (); /** --- 207,211 ---- * @return The names of following end tags that stop further scanning. */ ! String[] getEndTagEnders (); /** *************** *** 221,232 **** * @return The tag that terminates this composite tag, i.e. </HTML>. */ ! public Tag getEndTag (); /** * Set the end tag for this (composite) tag. * For a non-composite tag this is a no-op. ! * @param end The tag that terminates this composite tag, i.e. </HTML>. */ ! public void setEndTag (Tag end); /** --- 214,225 ---- * @return The tag that terminates this composite tag, i.e. </HTML>. */ ! Tag getEndTag (); /** * Set the end tag for this (composite) tag. * For a non-composite tag this is a no-op. ! * @param tag The tag that closes this composite tag, i.e. </HTML>. */ ! void setEndTag (Tag tag); /** *************** *** 234,238 **** * @return The scanner associated with this tag. */ ! public Scanner getThisScanner (); /** --- 227,231 ---- * @return The scanner associated with this tag. */ ! Scanner getThisScanner (); /** *************** *** 240,254 **** * @param scanner The scanner for this tag. */ ! public void setThisScanner (Scanner scanner); ! /** * Get the line number where this tag starts. * @return The (zero based) line number in the page where this tag starts. */ ! public int getStartingLineNumber (); /** * Get the line number where this tag ends. * @return The (zero based) line number in the page where this tag ends. */ ! public int getEndingLineNumber (); } --- 233,247 ---- * @param scanner The scanner for this tag. */ ! void setThisScanner (Scanner scanner); ! /** * Get the line number where this tag starts. * @return The (zero based) line number in the page where this tag starts. */ ! int getStartingLineNumber (); /** * Get the line number where this tag ends. * @return The (zero based) line number in the page where this tag ends. */ ! int getEndingLineNumber (); } Index: Text.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Text.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** Text.java 10 Apr 2005 23:20:42 -0000 1.2 --- Text.java 15 May 2005 11:49:03 -0000 1.3 *************** *** 27,32 **** package org.htmlparser; - import org.htmlparser.Node; - /** * This interface represents a piece of the content of the HTML document. --- 27,30 ---- *************** *** 38,44 **** /** * Accesses the textual contents of the node. ! * Returns the text of the node. */ ! public String getText (); /** --- 36,42 ---- /** * Accesses the textual contents of the node. ! * @return The text of the node. */ ! String getText (); /** *************** *** 46,50 **** * @param text The new text for the node. */ ! public void setText (String text); // --- 44,48 ---- * @param text The new text for the node. */ ! void setText (String text); // *************** *** 52,111 **** // ! // public void accept (org.htmlparser.visitors.NodeVisitor visitor) // { // } ! // ! // public void collectInto (org.htmlparser.util.NodeList collectionList, NodeFilter filter) // { // } ! // ! // public void doSemanticAction () throws org.htmlparser.util.ParserException // { // } ! // ! // public org.htmlparser.util.NodeList getChildren () // { // } ! // // public int getEndPosition () // { // } ! // // public Node getParent () // { // } ! // // public int getStartPosition () // { // } ! // // public String getText () // { // } ! // ! // public void setChildren (org.htmlparser.util.NodeList children) // { // } ! // // public void setEndPosition (int position) // { // } ! // // public void setParent (Node node) // { // } ! // // public void setStartPosition (int position) // { // } ! // // public void setText (String text) // { // } ! // // public String toHtml () // { // } ! // // public String toPlainTextString () // { --- 50,109 ---- // ! // public void accept (NodeVisitor visitor) // { // } ! // ! // public void collectInto (.NodeList collectionList, NodeFilter filter) // { // } ! // ! // public void doSemanticAction () throws ParserException // { // } ! // ! // public NodeList getChildren () // { // } ! // // public int getEndPosition () // { // } ! // // public Node getParent () // { // } ! // // public int getStartPosition () // { // } ! // // public String getText () // { // } ! // ! // public void setChildren (NodeList children) // { // } ! // // public void setEndPosition (int position) // { // } ! // // public void setParent (Node node) // { // } ! // // public void setStartPosition (int position) // { // } ! // // public void setText (String text) // { // } ! // // public String toHtml () // { // } ! // // public String toPlainTextString () // { Index: NodeFactory.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/NodeFactory.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** NodeFactory.java 10 Apr 2005 23:20:42 -0000 1.2 --- NodeFactory.java 15 May 2005 11:49:03 -0000 1.3 *************** *** 29,35 **** import java.util.Vector; - import org.htmlparser.Remark; - import org.htmlparser.Tag; - import org.htmlparser.Text; import org.htmlparser.lexer.Page; import org.htmlparser.util.ParserException; --- 29,32 ---- *************** *** 57,64 **** * @param start The beginning position of the string. * @param end The ending positiong of the string. ! * @throws ParserException If there is a problem encountered in creating the node. * @return A text node comprising the indicated characters from the page. */ ! public Text createStringNode (Page page, int start, int end) throws ParserException; --- 54,62 ---- * @param start The beginning position of the string. * @param end The ending positiong of the string. ! * @throws ParserException If there is a problem encountered ! * when creating the node. * @return A text node comprising the indicated characters from the page. */ ! Text createStringNode (Page page, int start, int end) throws ParserException; *************** *** 69,76 **** * @param start The beginning position of the remark. * @param end The ending positiong of the remark. ! * @throws ParserException If there is a problem encountered in creating the node. * @return A remark node comprising the indicated characters from the page. */ ! public Remark createRemarkNode (Page page, int start, int end) throws ParserException; --- 67,75 ---- * @param start The beginning position of the remark. * @param end The ending positiong of the remark. ! * @throws ParserException If there is a problem encountered ! * when creating the node. * @return A remark node comprising the indicated characters from the page. */ ! Remark createRemarkNode (Page page, int start, int end) throws ParserException; *************** *** 86,93 **** * @param end The ending positiong of the tag. * @param attributes The attributes contained in this tag. ! * @throws ParserException If there is a problem encountered in creating the node. * @return A tag node comprising the indicated characters from the page. */ ! public Tag createTagNode (Page page, int start, int end, Vector attributes) throws ParserException; --- 85,93 ---- * @param end The ending positiong of the tag. * @param attributes The attributes contained in this tag. ! * @throws ParserException If there is a problem encountered ! * when creating the node. * @return A tag node comprising the indicated characters from the page. */ ! Tag createTagNode (Page page, int start, int end, Vector attributes) throws ParserException; Index: NodeFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/NodeFilter.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** NodeFilter.java 5 Apr 2005 00:48:10 -0000 1.3 --- NodeFilter.java 15 May 2005 11:49:03 -0000 1.4 *************** *** 33,39 **** */ public interface NodeFilter ! extends ! Serializable, ! Cloneable { /** --- 33,39 ---- */ public interface NodeFilter ! extends ! Serializable, ! Cloneable { /** |
From: Derrick O. <der...@us...> - 2005-05-15 11:49:13
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31674/src/org/htmlparser/scanners Modified Files: ScriptDecoder.java Log Message: Documentation revamp part four. Remove some checkstyle warnings. Index: ScriptDecoder.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ScriptDecoder.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** ScriptDecoder.java 13 Mar 2005 14:51:45 -0000 1.3 --- ScriptDecoder.java 15 May 2005 11:49:04 -0000 1.4 *************** *** 502,504 **** // System.exit (ret); // } ! } \ No newline at end of file --- 502,504 ---- // System.exit (ret); // } ! } |
From: Derrick O. <der...@us...> - 2005-05-13 10:44:24
|
Update of /cvsroot/htmlparser/htmlparser/docs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13512/docs Modified Files: contributors.html Log Message: Add parse(InputSource) suggested by Jamie McCrindle. Index: contributors.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/docs/contributors.html,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** contributors.html 6 Apr 2005 10:20:21 -0000 1.15 --- contributors.html 13 May 2005 10:44:15 -0000 1.16 *************** *** 396,400 **** </tr> </table> ! <p>Thanks to John Derrick, David Andersen, Manuel Polo, Enrico Triolo, Gernot Fricke, Nick Burch, Stephen Harrington, Domenico Lordi, Kamen, John Zook, Cheng Jun, Mazlan Mat, Rob Shields, Wolfgang Germund, Raj Sharma, --- 396,400 ---- </tr> </table> ! <p>Thanks to Jamie McCrindle, John Derrick, David Andersen, Manuel Polo, Enrico Triolo, Gernot Fricke, Nick Burch, Stephen Harrington, Domenico Lordi, Kamen, John Zook, Cheng Jun, Mazlan Mat, Rob Shields, Wolfgang Germund, Raj Sharma, |
From: Derrick O. <der...@us...> - 2005-05-13 10:44:24
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/sax In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13512/src/org/htmlparser/sax Modified Files: XMLReader.java Log Message: Add parse(InputSource) suggested by Jamie McCrindle. Index: XMLReader.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/sax/XMLReader.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** XMLReader.java 12 Apr 2005 11:27:43 -0000 1.2 --- XMLReader.java 13 May 2005 10:44:15 -0000 1.3 *************** *** 28,31 **** --- 28,33 ---- import java.io.IOException; + import org.htmlparser.lexer.Lexer; + import org.htmlparser.lexer.Page; import org.xml.sax.ContentHandler; *************** *** 507,513 **** throws IOException, SAXException { ! throw new SAXException ("parse (InputSource input) is not yet supported"); ! } /** --- 509,552 ---- throws IOException, SAXException { ! Locator locator; ! ParserFeedback feedback; + if (null != mContentHandler) + try + { + mParser = new Parser ( + new Lexer ( + new Page ( + input.getByteStream (), + input.getEncoding ()))); + locator = new Locator (mParser); + if (null != mErrorHandler) + feedback = new Feedback (mErrorHandler, locator); + else + feedback = new DefaultParserFeedback (0); + mParser.setFeedback (feedback); + mContentHandler.setDocumentLocator (locator); + try + { + mContentHandler.startDocument (); + for (NodeIterator iterator = mParser.elements (); + iterator.hasMoreNodes (); + doSAX (iterator.nextNode ())); + mContentHandler.endDocument (); + } + catch (SAXException se) + { + if (null != mErrorHandler) + mErrorHandler.fatalError (new SAXParseException ( + "contentHandler threw me", locator, se)); + } + } + catch (ParserException pe) + { + if (null != mErrorHandler) + mErrorHandler.fatalError (new SAXParseException ( + pe.getMessage (), "", "", 0, 0)); + } + } /** |
From: Derrick O. <der...@us...> - 2005-05-10 22:11:57
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6956 Modified Files: SelectTagTest.java Log Message: Remove Shamil's email address. Index: SelectTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/SelectTagTest.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** SelectTagTest.java 2 Jan 2004 16:24:57 -0000 1.40 --- SelectTagTest.java 10 May 2005 22:11:48 -0000 1.41 *************** *** 114,118 **** /** ! * Bug reproduction based on report by gu...@cc... */ public void testSelectTagWithComments() throws Exception { --- 114,118 ---- /** ! * Bug reproduction. */ public void testSelectTagWithComments() throws Exception { |
From: Derrick O. <der...@us...> - 2005-04-24 17:48:45
|
Update of /cvsroot/htmlparser/htmlparser/docs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5117/htmlparser/docs Modified Files: main.html Log Message: Documentation revamp part three. Reworked some JavaDoc descriptions. Added "HTML Parser for dummies" introductory text. Removed checkstyle.jar and fit.jar (and it's cruft). Index: main.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/docs/main.html,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** main.html 4 Jan 2004 03:23:08 -0000 1.8 --- main.html 24 Apr 2005 17:48:06 -0000 1.9 *************** *** 95,107 **** <li>conversion to XML, moving existing web pages to XML</li> </ul> ! During or after reading in a page, operations on the nodes can ! accomplish many transformation tasks "in place", which can then be output ! with the <a href="javadoc/org/htmlparser/Node.html#toHtml()">toHtml()</a> method. ! Depending on the purpose of your application, you will probably want to look ! into node decorators, ! <a href="javadoc/org/htmlparser/visitors/package-summary.html">visitors</a>, or <a href="javadoc/org/htmlparser/tags/package-summary.html">custom tags</a> in conjunction with the <a href="javadoc/org/htmlparser/PrototypicalNodeFactory.html">PrototypicalNodeFactory</a>. <p>The HTML Parser is an open source library released under <a href="http://www.opensource.org/licenses/lgpl-license.html">GNU Lesser General Public --- 95,107 ---- <li>conversion to XML, moving existing web pages to XML</li> </ul> ! Transformation can occur 'on the fly' when using <a href="javadoc/org/htmlparser/tags/package-summary.html">custom tags</a> in conjunction with the <a href="javadoc/org/htmlparser/PrototypicalNodeFactory.html">PrototypicalNodeFactory</a>. + Or transformation can occur on a list of nodes after extraction using one or + more <a href="javadoc/org/htmlparser/visitors/package-summary.html">visitors</a>. + In either case you will need to output the NodeList returned by the parse() + method with the <a href="javadoc/org/htmlparser/util/NodeList.html#toHtml()">toHtml()</a> + method. <p>The HTML Parser is an open source library released under <a href="http://www.opensource.org/licenses/lgpl-license.html">GNU Lesser General Public |
From: Derrick O. <der...@us...> - 2005-04-24 17:48:44
|
Update of /cvsroot/htmlparser/htmlparser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5117/htmlparser Modified Files: build.xml Log Message: Documentation revamp part three. Reworked some JavaDoc descriptions. Added "HTML Parser for dummies" introductory text. Removed checkstyle.jar and fit.jar (and it's cruft). Index: build.xml =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/build.xml,v retrieving revision 1.77 retrieving revision 1.78 diff -C2 -d -r1.77 -r1.78 *** build.xml 6 Apr 2005 10:27:59 -0000 1.77 --- build.xml 24 Apr 2005 17:48:05 -0000 1.78 *************** *** 122,140 **** <property name="junit.jar" value="${lib}/junit.jar"/> <property name="sax2.jar" value="${lib}/sax2.jar"/> ! <taskdef resource="checkstyletask.properties" ! classpath="${lib}/checkstyle-all-3.1.jar"/> ! <target name="checkstyle" description="check source code adheres to coding standards"> <checkstyle config="${resources}/htmlparser_checks.xml"> ! <fileset dir="src" includes="**/*.java"/> </checkstyle> </target> <target name="JDK1.4"> <condition property="JDK1.4"> <or> <equals arg1="1.4" arg2="${ant.java.version}"/> ! <equals arg1="1.4" arg2="${ant.java.version}"/> </or> </condition> --- 122,175 ---- <property name="junit.jar" value="${lib}/junit.jar"/> <property name="sax2.jar" value="${lib}/sax2.jar"/> + <property name="check" value="*.java"/> + <available property="checkstyle_present" file="${lib}/checkstyle.jar"/> + <available property="fit_present" file="${lib}/fit.jar"/> ! <target name="fetch_checkstyle" description="gets the checkstyle jar file" ! unless="checkstyle_present"> ! <get dest="${lib}/checkstyle_distribution.zip" usetimestamp="true" ! ignoreerrors="true" verbose="true" ! src="http://heanet.dl.sourceforge.net/sourceforge/checkstyle/checkstyle-3.5.zip"/> ! <unzip src="${lib}/checkstyle_distribution.zip" dest="${lib}/"> ! <patternset> ! <include name="**/checkstyle-all*.jar"/> ! </patternset> ! </unzip> ! <delete file="${lib}/checkstyle_distribution.zip"/> ! <move tofile="${lib}/checkstyle.jar"> ! <fileset dir="${lib}" includes="**/checkstyle-all*.jar"/> ! </move> ! <delete includeEmptyDirs="true"> ! <fileset dir="${lib}" includes="**/checkstyle-*"/> ! </delete> ! </target> ! <target name="checkstyle" depends="fetch_checkstyle" ! description="check source code adheres to coding standards"> ! <taskdef resource="checkstyletask.properties" ! classpath="${lib}/checkstyle.jar"/> <checkstyle config="${resources}/htmlparser_checks.xml"> ! <fileset dir="src" includes="**/${check}"/> </checkstyle> </target> + <target name="fetch_fit" description="gets the fit jar file" + unless="fit_present"> + <get dest="${lib}/fit_distribution.zip" usetimestamp="true" + ignoreerrors="true" verbose="true" + src="http://puzzle.dl.sourceforge.net/sourceforge/fit/fit-java-1.1.zip"/> + <unzip src="${lib}/fit_distribution.zip" dest="${lib}/"> + <patternset> + <include name="**/fit.jar"/> + </patternset> + </unzip> + <delete file="${lib}/fit_distribution.zip"/> + </target> + <target name="JDK1.4"> <condition property="JDK1.4"> <or> <equals arg1="1.4" arg2="${ant.java.version}"/> ! <equals arg1="1.5" arg2="${ant.java.version}"/> </or> </condition> *************** *** 334,338 **** <mkdir dir="${lib}"/> <mkdir dir="${classes}"/> ! <javac compiler="javac1.4" srcdir="${src}" destdir="${classes}" debug="on" classpath="${classes}:${lib}/htmllexer.jar" source="1.3"> <include name="org/htmlparser/lexerapplications/thumbelina/**/*.java"/> </javac> --- 369,373 ---- <mkdir dir="${lib}"/> <mkdir dir="${classes}"/> ! <javac srcdir="${src}" destdir="${classes}" debug="on" classpath="${classes}:${lib}/htmllexer.jar" source="1.3"> <include name="org/htmlparser/lexerapplications/thumbelina/**/*.java"/> </javac> *************** *** 353,357 **** <mkdir dir="${lib}"/> <mkdir dir="${classes}"/> ! <javac compiler="javac1.4" srcdir="${src}" destdir="${classes}" debug="on" classpath="${classes}:${lib}/htmlparser.jar" source="1.3"> <include name="org/htmlparser/parserapplications/filterbuilder/**/*.java"/> </javac> --- 388,392 ---- <mkdir dir="${lib}"/> <mkdir dir="${classes}"/> ! <javac srcdir="${src}" destdir="${classes}" debug="on" classpath="${classes}:${lib}/htmlparser.jar" source="1.3"> <include name="org/htmlparser/parserapplications/filterbuilder/**/*.java"/> </javac> *************** *** 482,487 **** </target> ! <!-- Perform the htmlparser integration --> ! <target name="htmlparser" depends="init,release,sources" description="create distribution zip file"> <mkdir dir="${distribution}"/> --- 517,522 ---- </target> ! <!-- Perform the htmlparser integration --><!--,release,sources" --> ! <target name="htmlparser" depends="init" description="create distribution zip file"> <mkdir dir="${distribution}"/> *************** *** 491,495 **** <zipfileset dir="${bin}" prefix="htmlparser${versionQualifier}/${bin}" includes="*" excludes="*.bat" filemode="755"/> <zipfileset dir="${docs}" prefix="htmlparser${versionQualifier}/${docs}" excludes="samples/**"/> ! <zipfileset dir="${lib}" prefix="htmlparser${versionQualifier}/${lib}"/> <zipfileset dir="." prefix="htmlparser${versionQualifier}/" includes="src.zip"/> <!-- Copy the release notes as readme.txt in the base release directory --> --- 526,530 ---- <zipfileset dir="${bin}" prefix="htmlparser${versionQualifier}/${bin}" includes="*" excludes="*.bat" filemode="755"/> <zipfileset dir="${docs}" prefix="htmlparser${versionQualifier}/${docs}" excludes="samples/**"/> ! <zipfileset dir="${lib}" prefix="htmlparser${versionQualifier}/${lib}" includes="*.jar" excludes="checkstyle.jar,fit.jar"/> <zipfileset dir="." prefix="htmlparser${versionQualifier}/" includes="src.zip"/> <!-- Copy the release notes as readme.txt in the base release directory --> |
From: Derrick O. <der...@us...> - 2005-04-24 17:48:38
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5117/htmlparser/src/org/htmlparser/visitors Modified Files: NodeVisitor.java Log Message: Documentation revamp part three. Reworked some JavaDoc descriptions. Added "HTML Parser for dummies" introductory text. Removed checkstyle.jar and fit.jar (and it's cruft). Index: NodeVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/NodeVisitor.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** NodeVisitor.java 24 May 2004 16:18:36 -0000 1.38 --- NodeVisitor.java 24 Apr 2005 17:48:27 -0000 1.39 *************** *** 43,68 **** * <pre> * import org.htmlparser.Parser; ! * import org.htmlparser.tags.LinkTag; * import org.htmlparser.util.ParserException; * import org.htmlparser.visitors.NodeVisitor; * ! * public class Visitor extends NodeVisitor * { ! * public Visitor () * { * } * public void visitTag (Tag tag) * { ! * if (tag instanceof LinkTag) ! * System.out.println (tag); * } * public static void main (String[] args) throws ParserException * { * Parser parser = new Parser ("http://cbc.ca"); ! * Visitor visitor = new Visitor (); * parser.visitAllNodesWith (visitor); * } * } * </pre> */ public abstract class NodeVisitor --- 43,107 ---- * <pre> * import org.htmlparser.Parser; ! * import org.htmlparser.Tag; ! * import org.htmlparser.Text; * import org.htmlparser.util.ParserException; * import org.htmlparser.visitors.NodeVisitor; * ! * public class MyVisitor extends NodeVisitor * { ! * public MyVisitor () * { * } + * * public void visitTag (Tag tag) * { ! * System.out.println ("\n" + tag.getTagName () + tag.getStartPosition ()); ! * } ! * ! * public void visitStringNode (Text string) ! * { ! * System.out.println (string); * } + * * public static void main (String[] args) throws ParserException * { * Parser parser = new Parser ("http://cbc.ca"); ! * Visitor visitor = new MyVisitor (); * parser.visitAllNodesWith (visitor); * } * } * </pre> + * If you want to handle more than one tag type with the same visitor + * you will need to check the tag type in the visitTag method. You can + * do that by either checking the tag name: + * <pre> + * public void visitTag (Tag tag) + * { + * if (tag.getName ().equals ("BODY")) + * ... do something with the BODY tag + * else if (tag.getName ().equals ("FRAME")) + * ... do something with the FRAME tag + * } + * </pre> + * or you can use <code>instanceof</code> if all the tags you want to handle + * have a {@link org.htmlparser.PrototypicalNodeFactory#registerTag registered} + * tag (i.e. they are generated by the NodeFactory): + * <pre> + * public void visitTag (Tag tag) + * { + * if (tag instanceof BodyTag) + * { + * BodyTag body = (BodyTag)tag; + * ... do something with body + * } + * else if (tag instanceof FrameTag) + * { + * FrameTag frame = (FrameTag)tag; + * ... do something with frame + * } + * else // other specific tags and generic TagNode objects + * { + * } + * } */ public abstract class NodeVisitor |
From: Derrick O. <der...@us...> - 2005-04-24 17:48:37
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5117/htmlparser/src/org/htmlparser Modified Files: Node.java Parser.java PrototypicalNodeFactory.java Log Message: Documentation revamp part three. Reworked some JavaDoc descriptions. Added "HTML Parser for dummies" introductory text. Removed checkstyle.jar and fit.jar (and it's cruft). Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.104 retrieving revision 1.105 diff -C2 -d -r1.104 -r1.105 *** Parser.java 5 Apr 2005 00:48:10 -0000 1.104 --- Parser.java 24 Apr 2005 17:48:27 -0000 1.105 *************** *** 117,121 **** * The floating point version number ({@value}). */ ! public final static double VERSION_NUMBER = 1.5 ; --- 117,121 ---- * The floating point version number ({@value}). */ ! public static final double VERSION_NUMBER = 1.5 ; *************** *** 124,128 **** * The type of version ({@value}). */ ! public final static String VERSION_TYPE = "Integration Build" ; --- 124,128 ---- * The type of version ({@value}). */ ! public static final String VERSION_TYPE = "Integration Build" ; *************** *** 131,146 **** * The date of the version ({@value}). */ ! public final static String VERSION_DATE = "Mar 13, 2005" ; /** * The display version ({@value}). */ ! public final static String ! VERSION_STRING = "" + VERSION_NUMBER + " (" + VERSION_TYPE + " " + VERSION_DATE + ")" ! ; ! ! // End of formatting /** --- 131,146 ---- * The date of the version ({@value}). */ ! public static final String VERSION_DATE = "Mar 13, 2005" ; + // End of formatting + /** * The display version ({@value}). */ ! public static final String VERSION_STRING = ! "" + VERSION_NUMBER ! + " (" + VERSION_TYPE + " " + VERSION_DATE + ")"; /** *************** *** 158,162 **** * Use this for no feedback. */ ! public static ParserFeedback noFeedback = new DefaultParserFeedback (DefaultParserFeedback.QUIET); /** --- 158,163 ---- * Use this for no feedback. */ ! public static final ParserFeedback DEVNULL = ! new DefaultParserFeedback (DefaultParserFeedback.QUIET); /** *************** *** 164,168 **** * Use this for output on <code>System.out</code>. */ ! public static ParserFeedback stdout = new DefaultParserFeedback (); // --- 165,169 ---- * Use this for output on <code>System.out</code>. */ ! public static final ParserFeedback STDOUT = new DefaultParserFeedback (); // *************** *** 243,247 **** public Parser () { ! this (new Lexer (new Page ("")), noFeedback); } --- 244,248 ---- public Parser () { ! this (new Lexer (new Page ("")), DEVNULL); } *************** *** 272,276 **** * method will be called so it need not be connected yet. * @param fb The object to use for message communication. ! * @throws ParserException If the creation of the underlying Lexer cannot be performed. */ public Parser (URLConnection connection, ParserFeedback fb) --- 273,278 ---- * method will be called so it need not be connected yet. * @param fb The object to use for message communication. ! * @throws ParserException If the creation of the underlying Lexer ! * cannot be performed. */ public Parser (URLConnection connection, ParserFeedback fb) *************** *** 283,287 **** /** * Creates a Parser object with the location of the resource (URL or file) ! * You would typically create a DefaultHTMLParserFeedback object and pass it in. * @see #Parser(URLConnection,ParserFeedback) * @param resourceLocn Either the URL or the filename (autodetects). --- 285,290 ---- /** * Creates a Parser object with the location of the resource (URL or file) ! * You would typically create a DefaultHTMLParserFeedback object and pass ! * it in. * @see #Parser(URLConnection,ParserFeedback) * @param resourceLocn Either the URL or the filename (autodetects). *************** *** 292,296 **** * @throws ParserException If the URL is invalid. */ ! public Parser (String resourceLocn, ParserFeedback feedback) throws ParserException { this (getConnectionManager ().openConnection (resourceLocn), feedback); --- 295,301 ---- * @throws ParserException If the URL is invalid. */ ! public Parser (String resourceLocn, ParserFeedback feedback) ! throws ! ParserException { this (getConnectionManager ().openConnection (resourceLocn), feedback); *************** *** 301,314 **** * A DefaultHTMLParserFeedback object is used for feedback. * @param resourceLocn Either the URL or the filename (autodetects). ! * @throws ParserException If the resourceLocn argument does not resolve to a valid page or file. */ public Parser (String resourceLocn) throws ParserException { ! this (resourceLocn, stdout); } /** * Construct a parser using the provided lexer. ! * A feedback object printing to {@link #stdout System.out} is used. * This would be used to create a parser for special cases where the * normal creation of a lexer on a URLConnection needs to be customized. --- 306,320 ---- * A DefaultHTMLParserFeedback object is used for feedback. * @param resourceLocn Either the URL or the filename (autodetects). ! * @throws ParserException If the resourceLocn argument does not resolve ! * to a valid page or file. */ public Parser (String resourceLocn) throws ParserException { ! this (resourceLocn, STDOUT); } /** * Construct a parser using the provided lexer. ! * A feedback object printing to {@link #STDOUT System.out} is used. * This would be used to create a parser for special cases where the * normal creation of a lexer on a URLConnection needs to be customized. *************** *** 317,321 **** public Parser (Lexer lexer) { ! this (lexer, stdout); } --- 323,327 ---- public Parser (Lexer lexer) { ! this (lexer, STDOUT); } *************** *** 325,337 **** * a special setup or negotiation conditioning beyond what is available * from the {@link #getConnectionManager ConnectionManager}. ! * A feedback object printing to {@link #stdout System.out} is used. * @see #Parser(URLConnection,ParserFeedback) * @param connection A fully conditioned connection. The connect() * method will be called so it need not be connected yet. ! * @throws ParserException If the creation of the underlying Lexer cannot be performed. */ public Parser (URLConnection connection) throws ParserException { ! this (connection, stdout); } --- 331,344 ---- * a special setup or negotiation conditioning beyond what is available * from the {@link #getConnectionManager ConnectionManager}. ! * A feedback object printing to {@link #STDOUT System.out} is used. * @see #Parser(URLConnection,ParserFeedback) * @param connection A fully conditioned connection. The connect() * method will be called so it need not be connected yet. ! * @throws ParserException If the creation of the underlying Lexer ! * cannot be performed. */ public Parser (URLConnection connection) throws ParserException { ! this (connection, STDOUT); } *************** *** 412,416 **** getLexer ().getPage ().setEncoding (encoding); } ! /** * Get the encoding for the page this parser is reading from. --- 419,423 ---- getLexer ().getPage ().setEncoding (encoding); } ! /** * Get the encoding for the page this parser is reading from. *************** *** 488,496 **** * Sets the feedback object used in scanning. * @param fb The new feedback object to use. If this is null a ! * {@link #noFeedback silent feedback object} is used. */ public void setFeedback (ParserFeedback fb) { ! mFeedback = (null == fb) ? noFeedback : fb; } --- 495,506 ---- * Sets the feedback object used in scanning. * @param fb The new feedback object to use. If this is null a ! * {@link #DEVNULL silent feedback object} is used. */ public void setFeedback (ParserFeedback fb) { ! if (null == fb) ! mFeedback = DEVNULL; ! else ! mFeedback = fb; } *************** *** 512,515 **** --- 522,534 ---- * This assumes support for a reset from the underlying * {@link org.htmlparser.lexer.Source} object. + * <p>This is cheaper (in terms of time) than resetting the URL, i.e. + * <pre> + * parser.setURL (parser.getURL ()); + * </pre> + * because the page is not refetched from the internet. + * <em>Note: the nodes returned on the second parse are new + * nodes and not the same nodes returned on the first parse. If you + * want the same nodes for re-use, collect them in a NodeList with + * {@link #parse(NodeFilter) parse(null)} and operate on the NodeList.</em> */ public void reset () *************** *** 552,562 **** * // ... * // process recursively (nodes within nodes) via getChildren() ! * NodeList list = tag.getChildren (); ! * if (null != list) ! * for (NodeIterator i = list.elements (); i.hasMoreElements (); ) * processMyNodes (i.nextNode ()); * } * } ! * * Parser parser = new Parser ("http://www.yahoo.com"); * for (NodeIterator i = parser.elements (); i.hasMoreElements (); ) --- 571,581 ---- * // ... * // process recursively (nodes within nodes) via getChildren() ! * NodeList nl = tag.getChildren (); ! * if (null != nl) ! * for (NodeIterator i = nl.elements (); i.hasMoreElements (); ) * processMyNodes (i.nextNode ()); * } * } ! * * Parser parser = new Parser ("http://www.yahoo.com"); * for (NodeIterator i = parser.elements (); i.hasMoreElements (); ) *************** *** 574,577 **** --- 593,620 ---- /** * Parse the given resource, using the filter provided. + * This can be used to extract information from specific nodes. + * When used with a <code>null</code> filter it returns an + * entire page which can then be modified and converted back to HTML + * (Note: the synthesis use-case is not handled very well; the parser + * is more often used to extract information from a web page). + * <p>For example, to replace the entire contents of the HEAD with a + * single TITLE tag you could do this: + * <pre> + * NodeList nl = parser.parse (null); // here is your two node list + * NodeList heads = nl.extractAllNodesThatMatch (new TagNameFilter ("HEAD")) + * if (heads.size () > 0) // there may not be a HEAD tag + * { + * Head head = heads.elementAt (0); // there should be only one + * head.removeAll (); // clean out the contents + * Tag title = new TitleTag (); + * title.setTagName ("title"); + * title.setChildren (new NodeList (new TextNode ("The New Title"))); + * Tag title_end = new TitleTag (); + * title_end.setTagName ("/title"); + * title.setEndTag (title_end); + * head.add (title); + * } + * System.out.println (nl.toHtml ()); // output the modified HTML + * </pre> * @return The list of matching nodes (for a <code>null</code> * filter this is all the top level nodes). *************** *** 595,599 **** ret.add (node); } ! return (ret); } --- 638,642 ---- ret.add (node); } ! return (ret); } *************** *** 606,615 **** * page and <code>finishedParsing()</code> is called after the processing. * @param visitor The visitor to visit all nodes with. ! * @throws ParserException If a parse error occurs while traversing the page with the visitor. */ ! public void visitAllNodesWith (NodeVisitor visitor) throws ParserException { Node node; visitor.beginParsing(); ! for (NodeIterator e = elements();e.hasMoreNodes();) { node = e.nextNode(); node.accept(visitor); --- 649,661 ---- * page and <code>finishedParsing()</code> is called after the processing. * @param visitor The visitor to visit all nodes with. ! * @throws ParserException If a parse error occurs while traversing ! * the page with the visitor. */ ! public void visitAllNodesWith (NodeVisitor visitor) throws ParserException ! { Node node; visitor.beginParsing(); ! for (NodeIterator e = elements(); e.hasMoreNodes(); ) ! { node = e.nextNode(); node.accept(visitor); *************** *** 621,625 **** * Initializes the parser with the given input HTML String. * @param inputHTML the input HTML that is to be parsed. ! * @throws ParserException If a error occurs in setting up the underlying Lexer. */ public void setInputHTML (String inputHTML) --- 667,672 ---- * Initializes the parser with the given input HTML String. * @param inputHTML the input HTML that is to be parsed. ! * @throws ParserException If a error occurs in setting up the ! * underlying Lexer. */ public void setInputHTML (String inputHTML) *************** *** 642,650 **** * returned <code>true</code>. */ ! public NodeList extractAllNodesThatMatch (NodeFilter filter) throws ParserException { NodeIterator e; NodeList ret; ! ret = new NodeList (); for (e = elements (); e.hasMoreNodes (); ) --- 689,699 ---- * returned <code>true</code>. */ ! public NodeList extractAllNodesThatMatch (NodeFilter filter) ! throws ! ParserException { NodeIterator e; NodeList ret; ! ret = new NodeList (); for (e = elements (); e.hasMoreNodes (); ) *************** *** 656,664 **** /** * Convenience method to extract all nodes of a given class type. ! * Equivalent to <code>extractAllNodesThatMatch (new NodeClassFilter (nodeType))</code>. * @param nodeType The class of the nodes to collect. * @throws ParserException If a parse error occurs. * @return A list of nodes which have the class specified. ! * @deprecated Use extractAllNodesThatMatch (new NodeClassFilter (nodeType)). * @see #extractAllNodesThatAre */ --- 705,714 ---- /** * Convenience method to extract all nodes of a given class type. ! * Equivalent to ! * <code>extractAllNodesThatMatch (new NodeClassFilter (nodeType))</code>. * @param nodeType The class of the nodes to collect. * @throws ParserException If a parse error occurs. * @return A list of nodes which have the class specified. ! * @deprecated Use extractAllNodesThatMatch (new NodeClassFilter (cls)). * @see #extractAllNodesThatAre */ *************** *** 669,673 **** NodeList ret; ! ret = extractAllNodesThatMatch (new NodeClassFilter (nodeType)); return (ret.toNodeArray ()); --- 719,723 ---- NodeList ret; ! ret = extractAllNodesThatMatch (new NodeClassFilter (nodeType)); return (ret.toNodeArray ()); *************** *** 690,695 **** ParserException { ! if (null != getFeedback ()) ! getFeedback ().info (ConnectionManager.getRequestHeader (connection)); } --- 740,744 ---- ParserException { ! getFeedback ().info (ConnectionManager.getRequestHeader (connection)); } *************** *** 706,711 **** ParserException { ! if (null != getFeedback ()) ! getFeedback ().info (ConnectionManager.getResponseHeader (connection)); } --- 755,759 ---- ParserException { ! getFeedback ().info (ConnectionManager.getResponseHeader (connection)); } *************** *** 724,738 **** System.out.println ("HTML Parser v" + VERSION_STRING + "\n"); System.out.println (); ! System.out.println ("Syntax : java -jar htmlparser.jar <resourceLocn/website> [node_type]"); ! System.out.println (" <resourceLocn/website> the URL or file to be parsed"); ! System.out.println (" node_type an optional node name, for example:"); ! System.out.println (" A - Show only the link tags extracted from the document"); ! System.out.println (" IMG - Show only the image tags extracted from the document"); ! System.out.println (" TITLE - Extract the title from the document"); ! System.out.println (); ! System.out.println ("Example : java -jar htmlparser.jar http://www.yahoo.com"); System.out.println (); ! System.out.println ("For support, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page..."); ! System.out.println ("HTML Parser home page : http://htmlparser.org"); System.out.println (); } --- 772,785 ---- System.out.println ("HTML Parser v" + VERSION_STRING + "\n"); System.out.println (); ! System.out.println ("Syntax : java -jar htmlparser.jar" ! + " <file/page> [type]"); ! System.out.println (" <file/page> the URL or file to be parsed"); ! System.out.println (" type the node type, for example:"); ! System.out.println (" A - Show only the link tags"); ! System.out.println (" IMG - Show only the image tags"); ! System.out.println (" TITLE - Show only the title tag"); System.out.println (); ! System.out.println ("Example : java -jar htmlparser.jar" ! + " http://www.yahoo.com"); System.out.println (); } *************** *** 746,750 **** { // for a simple dump, use more verbose settings filter = null; ! parser.setFeedback (Parser.stdout); getConnectionManager ().setMonitor (parser); } --- 793,797 ---- { // for a simple dump, use more verbose settings filter = null; ! parser.setFeedback (Parser.STDOUT); getConnectionManager ().setMonitor (parser); } Index: Node.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Node.java,v retrieving revision 1.52 retrieving revision 1.53 diff -C2 -d -r1.52 -r1.53 *** Node.java 10 Apr 2005 23:20:42 -0000 1.52 --- Node.java 24 Apr 2005 17:48:27 -0000 1.53 *************** *** 37,41 **** * to define your own nodes to be returned by the * {@link org.htmlparser.lexer.Lexer} or {@link Parser}, but each of the types ! * must support this interface. * More specific interface requirements for each of the node types are specified * by the {@link Text}, {@link Remark} and {@link Tag} interfaces. --- 37,41 ---- * to define your own nodes to be returned by the * {@link org.htmlparser.lexer.Lexer} or {@link Parser}, but each of the types ! * must support this interface. * More specific interface requirements for each of the node types are specified * by the {@link Text}, {@link Remark} and {@link Tag} interfaces. *************** *** 65,69 **** * @return The text of this node including it's children. */ ! public abstract String toPlainTextString (); /** --- 65,69 ---- * @return The text of this node including it's children. */ ! String toPlainTextString (); /** *************** *** 71,75 **** * This should be the exact sequence of characters that were encountered by * the parser that caused this node to be created. Where this breaks down is ! * where broken nodes (tags and remarks) have been encountered and fixed. * Applications reproducing html can use this method on nodes which are to * be used or transferred as they were received or created. --- 71,75 ---- * This should be the exact sequence of characters that were encountered by * the parser that caused this node to be created. Where this breaks down is ! * where broken nodes (tags and remarks) have been encountered and fixed. * Applications reproducing html can use this method on nodes which are to * be used or transferred as they were received or created. *************** *** 77,86 **** * to be returned by the parser or lexer. */ ! public abstract String toHtml (); /** * Return the string representation of the node. * The return value may not be the entire contents of the node, and non- ! * printable characters may be translated in order to make them visible. * This is typically to be used in * the manner<br> --- 77,86 ---- * to be returned by the parser or lexer. */ ! String toHtml (); /** * Return the string representation of the node. * The return value may not be the entire contents of the node, and non- ! * printable characters may be translated in order to make them visible. * This is typically to be used in * the manner<br> *************** *** 92,110 **** * that isn't too large. */ ! public abstract String toString (); /** ! * Collect this node and its child nodes (if applicable) into a list, provided the node ! * satisfies the filtering criteria.<P> ! * ! * This mechanism allows powerful filtering code to be written very easily, ! * without bothering about collection of embedded tags separately. * e.g. when we try to get all the links on a page, it is not possible to * get it at the top-level, as many tags (like form tags), can contain * links embedded in them. We could get the links out by checking if the * current node is a {@link org.htmlparser.tags.CompositeTag}, and going ! * through its children. So this method provides a convenient way to do this.<P> ! * ! * Using collectInto(), programs get a lot shorter. Now, the code to * extract all links from a page would look like: * <pre> --- 92,109 ---- * that isn't too large. */ ! String toString (); /** ! * Collect this node and its child nodes into a list, provided the node ! * satisfies the filtering criteria. ! * <p>This mechanism allows powerful filtering code to be written very ! * easily, without bothering about collection of embedded tags separately. * e.g. when we try to get all the links on a page, it is not possible to * get it at the top-level, as many tags (like form tags), can contain * links embedded in them. We could get the links out by checking if the * current node is a {@link org.htmlparser.tags.CompositeTag}, and going ! * through its children. So this method provides a convenient way to do ! * this.</p> ! * <p>Using collectInto(), programs get a lot shorter. Now, the code to * extract all links from a page would look like: * <pre> *************** *** 115,121 **** * </pre> * Thus, <code>list</code> will hold all the link nodes, irrespective of how ! * deep the links are embedded.<P> ! * ! * Another way to accomplish the same objective is: * <pre> * NodeList list = new NodeList (); --- 114,119 ---- * </pre> * Thus, <code>list</code> will hold all the link nodes, irrespective of how ! * deep the links are embedded.</p> ! * <p>Another way to accomplish the same objective is: * <pre> * NodeList list = new NodeList (); *************** *** 128,134 **** * @param list The list to collect nodes into. * @param filter The criteria to use when deciding if a node should ! * be added to the list. */ ! public abstract void collectInto (NodeList list, NodeFilter filter); /** --- 126,132 ---- * @param list The list to collect nodes into. * @param filter The criteria to use when deciding if a node should ! * be added to the list.</p> */ ! void collectInto (NodeList list, NodeFilter filter); /** *************** *** 137,141 **** * @return The start position. */ ! public abstract int getStartPosition (); /** --- 135,139 ---- * @return The start position. */ ! int getStartPosition (); /** *************** *** 143,147 **** * @param position The new start position. */ ! public abstract void setStartPosition (int position); /** --- 141,145 ---- * @param position The new start position. */ ! void setStartPosition (int position); /** *************** *** 151,155 **** * @return The end position. */ ! public abstract int getEndPosition (); /** --- 149,153 ---- * @return The end position. */ ! int getEndPosition (); /** *************** *** 157,161 **** * @param position The new end position. */ ! public abstract void setEndPosition (int position); /** --- 155,159 ---- * @param position The new end position. */ ! void setEndPosition (int position); /** *************** *** 163,167 **** * @return The page that supplied this node. */ ! public Page getPage (); /** --- 161,165 ---- * @return The page that supplied this node. */ ! Page getPage (); /** *************** *** 169,178 **** * @param page The page that supplied this node. */ ! public void setPage (Page page); /** * Apply the visitor to this node. * @param visitor The visitor to this node. */ ! public abstract void accept (NodeVisitor visitor); /** --- 167,177 ---- * @param page The page that supplied this node. */ ! void setPage (Page page); ! /** * Apply the visitor to this node. * @param visitor The visitor to this node. */ ! void accept (NodeVisitor visitor); /** *************** *** 186,190 **** * otherwise. */ ! public abstract Node getParent (); /** --- 185,189 ---- * otherwise. */ ! Node getParent (); /** *************** *** 192,196 **** * @param node The node that contains this node. */ ! public abstract void setParent (Node node); /** --- 191,195 ---- * @param node The node that contains this node. */ ! void setParent (Node node); /** *************** *** 199,203 **** * <code>null</code> otherwise. */ ! public abstract NodeList getChildren (); /** --- 198,202 ---- * <code>null</code> otherwise. */ ! NodeList getChildren (); /** *************** *** 205,209 **** * @param children The new list of children this node contains. */ ! public abstract void setChildren (NodeList children); /** --- 204,208 ---- * @param children The new list of children this node contains. */ ! void setChildren (NodeList children); /** *************** *** 212,216 **** * a tag, the contents of the tag less the enclosing angle brackets. */ ! public String getText (); /** --- 211,215 ---- * a tag, the contents of the tag less the enclosing angle brackets. */ ! String getText (); /** *************** *** 218,222 **** * @param text The new text for the node. */ ! public void setText (String text); /** --- 217,221 ---- * @param text The new text for the node. */ ! void setText (String text); /** *************** *** 231,236 **** * composite nodes (those that contain other nodes), the children will have * already been parsed and will be available via {@link #getChildren}. */ ! public void doSemanticAction () throws ParserException; --- 230,237 ---- * composite nodes (those that contain other nodes), the children will have * already been parsed and will be available via {@link #getChildren}. + * @exception ParserException If a problem is encountered performing the + * semantic action. */ ! void doSemanticAction () throws ParserException; *************** *** 242,247 **** /** * Allow cloning of nodes. ! * Creates and returns a copy of this object. The precise meaning ! * of "copy" may depend on the class of the object. The general * intent is that, for any object <tt>x</tt>, the expression: * <blockquote> --- 243,248 ---- /** * Allow cloning of nodes. ! * Creates and returns a copy of this object. The precise meaning ! * of "copy" may depend on the class of the object. The general * intent is that, for any object <tt>x</tt>, the expression: * <blockquote> *************** *** 252,261 **** * <pre> * x.clone().getClass() == x.getClass()</pre></blockquote> ! * will be <tt>true</tt>, but these are not absolute requirements. * While it is typically the case that: * <blockquote> * <pre> * x.clone().equals(x)</pre></blockquote> ! * will be <tt>true</tt>, this is not an absolute requirement. * <p> * By convention, the returned object should be obtained by calling --- 253,262 ---- * <pre> * x.clone().getClass() == x.getClass()</pre></blockquote> ! * will be <tt>true</tt>, but these are not absolute requirements. * While it is typically the case that: * <blockquote> * <pre> * x.clone().equals(x)</pre></blockquote> ! * will be <tt>true</tt>, this is not an absolute requirement. * <p> * By convention, the returned object should be obtained by calling *************** *** 275,291 **** * need to be modified. * <p> ! * The method <tt>clone</tt> for class <tt>Object</tt> performs a ! * specific cloning operation. First, if the class of this object does ! * not implement the interface <tt>Cloneable</tt>, then a ! * <tt>CloneNotSupportedException</tt> is thrown. Note that all arrays ! * are considered to implement the interface <tt>Cloneable</tt>. ! * Otherwise, this method creates a new instance of the class of this ! * object and initializes all its fields with exactly the contents of * the corresponding fields of this object, as if by assignment; the ! * contents of the fields are not themselves cloned. Thus, this method * performs a "shallow copy" of this object, not a "deep copy" operation. * <p> ! * The class <tt>Object</tt> does not itself implement the interface ! * <tt>Cloneable</tt>, so calling the <tt>clone</tt> method on an object * whose class is <tt>Object</tt> will result in throwing an * exception at run time. --- 276,292 ---- * need to be modified. * <p> ! * The method <tt>clone</tt> for class <tt>Object</tt> performs a ! * specific cloning operation. First, if the class of this object does ! * not implement the interface <tt>Cloneable</tt>, then a ! * <tt>CloneNotSupportedException</tt> is thrown. Note that all arrays ! * are considered to implement the interface <tt>Cloneable</tt>. ! * Otherwise, this method creates a new instance of the class of this ! * object and initializes all its fields with exactly the contents of * the corresponding fields of this object, as if by assignment; the ! * contents of the fields are not themselves cloned. Thus, this method * performs a "shallow copy" of this object, not a "deep copy" operation. * <p> ! * The class <tt>Object</tt> does not itself implement the interface ! * <tt>Cloneable</tt>, so calling the <tt>clone</tt> method on an object * whose class is <tt>Object</tt> will result in throwing an * exception at run time. *************** *** 299,303 **** * @see java.lang.Cloneable */ ! public Object clone () throws CloneNotSupportedException; --- 300,304 ---- * @see java.lang.Cloneable */ ! Object clone () throws CloneNotSupportedException; Index: PrototypicalNodeFactory.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/PrototypicalNodeFactory.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** PrototypicalNodeFactory.java 10 Apr 2005 23:20:42 -0000 1.14 --- PrototypicalNodeFactory.java 24 Apr 2005 17:48:27 -0000 1.15 *************** *** 34,42 **** import java.util.Vector; - import org.htmlparser.Attribute; - import org.htmlparser.NodeFactory; - import org.htmlparser.Remark; - import org.htmlparser.Tag; - import org.htmlparser.Text; import org.htmlparser.lexer.Page; import org.htmlparser.nodes.TextNode; --- 34,37 ---- *************** *** 73,77 **** import org.htmlparser.tags.TextareaTag; import org.htmlparser.tags.TitleTag; - import org.htmlparser.util.ParserException; /** --- 68,71 ---- *************** *** 98,102 **** * explicitly.</p> * <p>Here is an example of how to override all text issued from ! * {@link org.htmlparser.nodes.TextNode#toPlainTextString() Text.toPlainTextString()}, * in this case decoding (converting character references), * which illustrates the use of setting the text prototype: --- 92,97 ---- * explicitly.</p> * <p>Here is an example of how to override all text issued from ! * {@link org.htmlparser.nodes.TextNode#toPlainTextString() ! * Text.toPlainTextString()}, * in this case decoding (converting character references), * which illustrates the use of setting the text prototype: *************** *** 108,112 **** * public String toPlainTextString() * { ! * return (org.htmlparser.util.Translate.decode (super.toPlainTextString ())); * } * }); --- 103,108 ---- * public String toPlainTextString() * { ! * String original = super.toPlainTextString (); ! * return (org.htmlparser.util.Translate.decode (original)); * } * }); *************** *** 208,211 **** --- 204,209 ---- * Adds a tag to the registry. * @param id The name under which to register the tag. + * <strong>For proper operation, the id should be uppercase so it + * will be matched by a Map lookup.</strong> * @param tag The tag to be returned from a {@link #createTagNode} call. * @return The tag previously registered with that id if any, *************** *** 259,272 **** * Register a tag. * Registers the given tag under every {@link Tag#getIds() id} that the ! * tag has. * @param tag The tag to register. */ public void registerTag (Tag tag) { ! String ids[]; ! ids = tag.getIds (); for (int i = 0; i < ids.length; i++) ! put (ids[i], tag); } --- 257,272 ---- * Register a tag. * Registers the given tag under every {@link Tag#getIds() id} that the ! * tag has (i.e. all names returned by {@link Tag#getIds() tag.getIds()}. ! * <p><strong>For proper operation, the ids are converted to uppercase so ! * they will be matched by a Map lookup.</strong> * @param tag The tag to register. */ public void registerTag (Tag tag) { ! String[] ids; ! ids = tag.getIds (); for (int i = 0; i < ids.length; i++) ! put (ids[i].toUpperCase (Locale.ENGLISH), tag); } *************** *** 274,286 **** * Unregister a tag. * Unregisters the given tag from every {@link Tag#getIds() id} the tag has. * @param tag The tag to unregister. */ public void unregisterTag (Tag tag) { ! String ids[]; ! ids = tag.getIds (); for (int i = 0; i < ids.length; i++) ! remove (ids[i]); } --- 274,288 ---- * Unregister a tag. * Unregisters the given tag from every {@link Tag#getIds() id} the tag has. + * <p><strong>The ids are converted to uppercase to undo the operation + * of registerTag.</strong> * @param tag The tag to unregister. */ public void unregisterTag (Tag tag) { ! String[] ids; ! ids = tag.getIds (); for (int i = 0; i < ids.length; i++) ! remove (ids[i].toUpperCase (Locale.ENGLISH)); } *************** *** 323,327 **** registerTag (new HeadTag ()); registerTag (new Html ()); ! return (this); } --- 325,329 ---- registerTag (new HeadTag ()); registerTag (new Html ()); ! return (this); } *************** *** 440,444 **** { Remark ret; ! try { --- 442,446 ---- { Remark ret; ! try { *************** *** 503,507 **** catch (CloneNotSupportedException cnse) { ! // default to creating a new one } } --- 505,509 ---- catch (CloneNotSupportedException cnse) { ! // default to creating a generic one } } |