[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer Lexer.java,1.2,1.3 Page.java,1.6,1.7
Brought to you by:
derrickoswald
|
From: <der...@us...> - 2003-08-23 01:33:09
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer
In directory sc8-pr-cvs1:/tmp/cvs-serv23027/src/org/htmlparser/lexer
Modified Files:
Lexer.java Page.java
Log Message:
Fifth drop for new i/o subsystem.
There is now a mainline for the lexer.
Try:
java -jar lexer.jar http://whatever
or the integration build has a new lexer execution script:
bin/lexer http://whatever
Index: Lexer.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** Lexer.java 21 Aug 2003 01:52:23 -0000 1.2
--- Lexer.java 23 Aug 2003 01:33:06 -0000 1.3
***************
*** 26,33 ****
--- 26,39 ----
// CA 94708, USA
// Website : http://www.industriallogic.com
+ //
+ // This class was contributed by
+ // Derrick Oswald
+ //
package org.htmlparser.lexer;
+ import java.io.IOException;
import java.io.UnsupportedEncodingException;
+ import java.net.URL;
import java.net.URLConnection;
import java.util.Vector;
***************
*** 546,548 ****
--- 552,580 ----
}
+ /**
+ * Mainline for command line operation
+ */
+ public static void main (String[] args) throws IOException, ParserException
+ {
+ URL url;
+ Lexer lexer;
+ Node node;
+
+ if (0 >= args.length)
+ System.out.println ("usage: java -jar htmllexer.jar <url>");
+ else
+ {
+ url = new URL (args[0]);
+ try
+ {
+ lexer = new Lexer (url.openConnection ());
+ while (null != (node = lexer.nextNode ()))
+ System.out.println (node.toString ());
+ }
+ catch (ParserException pe)
+ {
+ System.out.println (pe.getMessage ());
+ }
+ }
+ }
}
Index: Page.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** Page.java 21 Aug 2003 01:52:23 -0000 1.6
--- Page.java 23 Aug 2003 01:33:06 -0000 1.7
***************
*** 26,29 ****
--- 26,33 ----
// CA 94708, USA
// Website : http://www.industriallogic.com
+ //
+ // This class was contributed by
+ // Derrick Oswald
+ //
package org.htmlparser.lexer;
***************
*** 39,45 ****
import java.net.URLConnection;
import java.net.UnknownHostException;
!
! import org.apache.commons.logging.Log;
! import org.apache.commons.logging.LogFactory;
import org.htmlparser.util.ParserException;
--- 43,47 ----
import java.net.URLConnection;
import java.net.UnknownHostException;
! import java.util.Random;
import org.htmlparser.util.ParserException;
***************
*** 61,69 ****
/**
- * The logging object.
- */
- protected static Log mLog = null;
-
- /**
* The source of characters.
*/
--- 63,66 ----
***************
*** 113,117 ****
catch (UnknownHostException uhe)
{
! throw new ParserException ("the host (" + connection.getURL ().getHost () + ") was not found", uhe);
}
catch (IOException ioe)
--- 110,116 ----
catch (UnknownHostException uhe)
{
! Random number = new Random ();
! int message = number.nextInt (mFourOhFour.length);
! throw new ParserException (mFourOhFour[message], uhe);
}
catch (IOException ioe)
***************
*** 348,352 ****
if (!ret.equalsIgnoreCase (content))
{
! getLog ().info (
"detected charset \""
+ content
--- 347,351 ----
if (!ret.equalsIgnoreCase (content))
{
! System.out.println (
"detected charset \""
+ content
***************
*** 408,417 ****
// return the default
ret = _default;
! getLog ().debug (
"unable to determine cannonical charset name for "
+ name
+ " - using "
! + _default,
! ita);
}
--- 407,415 ----
// return the default
ret = _default;
! System.out.println (
"unable to determine cannonical charset name for "
+ name
+ " - using "
! + _default);
}
***************
*** 506,523 ****
getText (buffer, 0, mSource.mOffset);
}
-
- //
- // Bean patterns
- //
-
- public Log getLog ()
- {
- if (null == mLog)
- mLog = LogFactory.getLog (this.getClass ());
- // String name = this.getClass ().getName ();
- // java.util.logging.Logger logger = java.util.logging.Logger.getLogger (name);
- // logger.setLevel (java.util.logging.Level.FINEST);
- return (mLog);
- }
-
}
--- 504,506 ----
|