Thread: [Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer Page.java,1.43,1.44 Lexer.java,1.32,1.33
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2004-09-02 02:28:54
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29769/src/org/htmlparser/lexer Modified Files: Page.java Lexer.java Log Message: Implemented: RFE #1017249 HTML Client Doesn't Support Cookies but will follow redirect RFE #1010586 Add support for password protected URL and RFE #1000739 Add support for proxy scenario A new http package is added, the primary class being Connectionmanager which handles proxies, passwords and cookies. Some testing still needed. Also removed some line separator cruft. Index: Page.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v retrieving revision 1.43 retrieving revision 1.44 diff -C2 -d -r1.43 -r1.44 *** Page.java 25 Aug 2004 03:36:01 -0000 1.43 --- Page.java 2 Sep 2004 02:28:14 -0000 1.44 *************** *** 42,45 **** --- 42,46 ---- import java.util.zip.InflaterInputStream; + import org.htmlparser.http.ConnectionManager; import org.htmlparser.util.ParserException; *************** *** 95,113 **** /** ! * Messages for page not there (404). */ ! static private final String[] mFourOhFour = ! { ! "The web site you seek cannot be located, but countless more exist", ! "You step in the stream, but the water has moved on. This page is not here.", ! "Yesterday the page existed. Today it does not. The internet is like that.", ! "That page was so big. It might have been very useful. But now it is gone.", ! "Three things are certain: death, taxes and broken links. Guess which has occured.", ! "Chaos reigns within. Reflect, repent and enter the correct URL. Order shall return.", ! "Stay the patient course. Of little worth is your ire. The page is not found.", ! "A non-existant URL reduces your expensive computer to a simple stone.", ! "Many people have visited that page. Today, you are not one of the lucky ones.", ! "Cutting the wind with a knife. Bookmarking a URL. Both are ephemeral.", ! }; /** --- 96,102 ---- /** ! * Connection control (proxy, cookies, authorization). */ ! public static ConnectionManager mConnectionManager = new ConnectionManager (); /** *************** *** 192,195 **** --- 181,206 ---- // + // static methods + // + + /** + * Get the connection manager all Parsers use. + * @return The connection manager. + */ + public static ConnectionManager getConnectionManager () + { + return (mConnectionManager); + } + + /** + * Set the connection manager to use. + * @return The connection manager. + */ + public static void setConnectionManager (ConnectionManager manager) + { + mConnectionManager = manager; + } + + // // Serialization support // *************** *** 351,372 **** try { - try - { - getConnection ().setRequestProperty ("Accept-Encoding", "gzip, deflate"); - } - catch (IllegalStateException ise) // already connected - { - // assume all request properties have already been set - } getConnection ().connect (); } catch (UnknownHostException uhe) { ! int message = (int)(Math.random () * mFourOhFour.length); ! throw new ParserException (mFourOhFour[message], uhe); } catch (IOException ioe) { ! throw new ParserException (ioe.getMessage (), ioe); } type = getContentType (); --- 362,374 ---- try { getConnection ().connect (); } catch (UnknownHostException uhe) { ! throw new ParserException ("Connect to " + mConnection.getURL ().toExternalForm () + " failed.", uhe); } catch (IOException ioe) { ! throw new ParserException ("Exception connecting to " + mConnection.getURL ().toExternalForm () + " (" + ioe.getMessage () + ").", ioe); } type = getContentType (); *************** *** 409,413 **** catch (IOException ioe) { ! throw new ParserException (ioe.getMessage (), ioe); } mUrl = connection.getURL ().toExternalForm (); --- 411,415 ---- catch (IOException ioe) { ! throw new ParserException ("Exception getting input stream from " + mConnection.getURL ().toExternalForm () + " (" + ioe.getMessage () + ").", ioe); } mUrl = connection.getURL ().toExternalForm (); Index: Lexer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** Lexer.java 1 Aug 2004 02:16:04 -0000 1.32 --- Lexer.java 2 Sep 2004 02:28:14 -0000 1.33 *************** *** 30,34 **** import java.io.Serializable; import java.net.MalformedURLException; - import java.net.URL; import java.net.URLConnection; import java.util.Vector; --- 30,33 ---- *************** *** 39,42 **** --- 38,42 ---- import org.htmlparser.Text; import org.htmlparser.Tag; + import org.htmlparser.http.ConnectionManager; import org.htmlparser.nodes.RemarkNode; import org.htmlparser.nodes.TextNode; *************** *** 1105,1109 **** ParserException { - URL url; Lexer lexer; Node node; --- 1105,1108 ---- *************** *** 1113,1120 **** else { - url = new URL (args[0]); try { ! lexer = new Lexer (url.openConnection ()); while (null != (node = lexer.nextNode ())) System.out.println (node.toString ()); --- 1112,1119 ---- else { try { ! ConnectionManager manager = Page.getConnectionManager (); ! lexer = new Lexer (manager.openConnection (args[0])); while (null != (node = lexer.nextNode ())) System.out.println (node.toString ()); |