[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserapplications LinkExtractor.java,1.51,1.52 SiteC

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23432/htmlparser/src/org/htmlparser/parserapplications

Modified Files:
	LinkExtractor.java SiteCapturer.java StringExtractor.java 
	WikiCapturer.java package.html 
Log Message:
Documentation revamp part two.

Index: package.html
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/package.html,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** package.html	2 Jan 2004 16:24:54 -0000	1.19
--- package.html	12 Apr 2005 11:27:42 -0000	1.20
***************
*** 30,44 ****
  </head>
  <body bgcolor="white">
! Developers and users alike should try out the applications in this package. The code of these applications will give
! a good idea about the capabilities of the HTML Parser, and its intended usage. The binary releases of html parser would
! typically contain these applications in runnable form.

! <h2>Related Documentation</h2>

! For overviews, tutorials, examples, guides, and tool documentation, please see:
! <ul>
!   <li><a href="http://htmlparser.sourceforge.net">HTML Parser Home Page</a>
! </ul>

--- 30,93 ----
  </head>
  <body bgcolor="white">
! Example applications.
! <p>
! <table width="94%" border="0">
!   <tr> 
!     <td valign="top">
!     <strong>Link Extractor</strong><br>
!     </td>
!     <td>
!     <i>Extract links/mail addresses from a web page.</i><br>

!     <a href="javadoc/org/htmlparser/parserapplications/LinkExtractor.html" target="_parent">org.htmlparser.parserapplications.LinkExtractor</a>
!     <pre>
!     <code>bin/linkextractor http://website_url [-maillinks]</code>
!     the optional -maillinks argument causes mailto: links to be printed
!     </pre>
!     </td>
!   </tr>
!   <tr> 
!     <td valign="top">

!     <strong>String Extractor</strong><br>
!     </td>
!     <td>
!     <i>Extract text from a web page.</i><br>
!     <a href="javadoc/org/htmlparser/parserapplications/StringExtractor.html" target="_parent">org.htmlparser.parserapplications.StringExtractor</a>
!     <pre>
!     <code>bin/stringextractor http://website_url [-links]</code>

+     the optional -links argument causes hyperlinks to be shown within the text
+     </pre>
+     </td>
+   </tr>
+   <tr> 
+     <td valign="top">
+     <strong>Site Capturer</strong><br>
+     </td>
+     <td>
+ 
+     <i>Save a web site locally.</i><br>
+     <a href="javadoc/org/htmlparser/parserapplications/SiteCapturer.html" target="_parent">org.htmlparser.parserapplications.SiteCapturer</a>
+     <pre>
+     <code>bin/sitecapturer http://source_website /target_directory/ [true|false]</code>
+ 
+     the optional boolean argument determines whether resources such as images,
+     audio and video are to be captured
+     </pre>
+     </td>
+   </tr>
+   <tr> 
+     <td valign="top">
+     <strong>Wiki Capturer</strong><br>
+     </td>
+     <td>
+ 
+     <i>Save a wiki locally.</i><br>
+     <a href="javadoc/org/htmlparser/parserapplications/WikiCapturer.html" target="_parent">org.htmlparser.parserapplications.WikiCapturer</a>
+     Subclass of SiteCapturer (see above) that eliminates specific Wiki pages.
+     </td>
+   </tr>
+ </table>
  <!-- Put @see and @since tags down here. -->

Index: WikiCapturer.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/WikiCapturer.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** WikiCapturer.java	30 May 2004 01:43:54 -0000	1.2
--- WikiCapturer.java	12 Apr 2005 11:27:42 -0000	1.3
***************
*** 123,126 ****
--- 123,128 ----
       * such as images and video are to be captured as well.
       * These are requested via dialog boxes if not supplied.
+      * @exception MalformedURLException If the supplied URL is invalid.
+      * @exception IOException If an error occurs reading the pages or resources.
       */
      public static void main (String[] args)

Index: SiteCapturer.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/SiteCapturer.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -d -r1.8 -r1.9
*** SiteCapturer.java	5 Apr 2005 00:48:12 -0000	1.8
--- SiteCapturer.java	12 Apr 2005 11:27:41 -0000	1.9
***************
*** 267,272 ****
--- 267,274 ----
      /**
       * Returns <code>true</code> if the link contains text/html content.
+      * @param link The URL to check for content type.
       * @return <code>true</code> if the HTTP header indicates the type is
       * "text/html".
+      * @exception ParserException If the supplied URL can't be read from.
       */
      protected boolean isHtml (String link)
***************
*** 469,472 ****
--- 471,476 ----
      /**
       * Process a single page.
+      * @param filter The filter to apply to the collected nodes.
+      * @exception ParserException If a parse error occurs.
       */
      protected void process (NodeFilter filter)
***************
*** 748,751 ****
--- 752,757 ----
       * such as images and video are to be captured as well.
       * These are requested via dialog boxes if not supplied.
+      * @exception MalformedURLException If the supplied URL is invalid.
+      * @exception IOException If an error occurs reading the page or resources.
       */
      public static void main (String[] args)

Index: StringExtractor.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/StringExtractor.java,v
retrieving revision 1.47
retrieving revision 1.48
diff -C2 -d -r1.47 -r1.48
*** StringExtractor.java	29 Feb 2004 15:09:56 -0000	1.47
--- StringExtractor.java	12 Apr 2005 11:27:42 -0000	1.48
***************
*** 51,56 ****
      /**
       * Extract the text from a page.
-      * @param links if <code>true</code> include hyperlinks in output.
       * @return The textual contents of the page.
       */
      public String extractStrings (boolean links)
--- 51,57 ----
      /**
       * Extract the text from a page.
       * @return The textual contents of the page.
+      * @param links if <code>true</code> include hyperlinks in output.
+      * @exception ParserException If a parse error occurs.
       */
      public String extractStrings (boolean links)

Index: LinkExtractor.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/LinkExtractor.java,v
retrieving revision 1.51
retrieving revision 1.52
diff -C2 -d -r1.51 -r1.52
*** LinkExtractor.java	4 Jan 2004 03:23:09 -0000	1.51
--- LinkExtractor.java	12 Apr 2005 11:27:41 -0000	1.52
***************
*** 44,47 ****
--- 44,50 ----
  {
      /**
+      * Run the link extractor.
+      * @param args [0] Optional url to extract links from.
+      * An input dialog is displayed if it is not supplied.
       */
      public static void main (String[] args)

[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserapplications LinkExtractor.java,1.51,1.52 SiteC

[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserapplications LinkExtractor.java,1.51,1.52 SiteCapturer.java,1.8,1.9 StringExtractor.java,1.47,1.48 WikiCapturer.java,1.2,1.3 package.html,1.19,1.20