[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserapplications LinkExtractor.java,1.49,1.50 MailR

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications
In directory sc8-pr-cvs1:/tmp/cvs-serv23579/src/org/htmlparser/parserapplications

Modified Files:
	LinkExtractor.java 
Removed Files:
	MailRipper.java Robot.java 
Log Message:
Add filter support to NodeList.
Rework LinkExtractor and remove MailRipper and Robot example programs.
Clean out docs directory.


Index: LinkExtractor.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/LinkExtractor.java,v
retrieving revision 1.49
retrieving revision 1.50
diff -C2 -d -r1.49 -r1.50
*** LinkExtractor.java	8 Dec 2003 01:31:52 -0000	1.49
--- LinkExtractor.java	31 Dec 2003 02:50:50 -0000	1.50
***************
*** 1,4 ****
! // HTMLParser Library v1_4_20031207 - A java-based parser for HTML
! // Copyright (C) Dec 31, 2000 Somik Raha
  //
  // This library is free software; you can redistribute it and/or
--- 1,12 ----
! // HTMLParser Library $Name$ - A java-based parser for HTML
! // http://sourceforge.org/projects/htmlparser
! // Copyright (C) 2003 Derrick Oswald
! //
! // Revision Control Information
! //
! // $Source$
! // $Author$
! // $Date$
! // $Revision$
  //
  // This library is free software; you can redistribute it and/or
***************
*** 9,35 ****
  // This library is distributed in the hope that it will be useful,
  // but WITHOUT ANY WARRANTY; without even the implied warranty of
! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  // Lesser General Public License for more details.
  //
  // You should have received a copy of the GNU Lesser General Public
  // License along with this library; if not, write to the Free Software
! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
! //
! // For any questions or suggestions, you can write to me at :
! // Email :so...@in...
  //
- // Postal Address :
- // Somik Raha
- // Extreme Programmer & Coach
- // Industrial Logic Corporation
- // 2583 Cedar Street, Berkeley,
- // CA 94708, USA
- // Website : http://www.industriallogic.com
  
  package org.htmlparser.parserapplications;
  
  import org.htmlparser.Node;
  import org.htmlparser.Parser;
  import org.htmlparser.tags.LinkTag;
  import org.htmlparser.util.ParserException;
  
--- 17,38 ----
  // This library is distributed in the hope that it will be useful,
  // but WITHOUT ANY WARRANTY; without even the implied warranty of
! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  // Lesser General Public License for more details.
  //
  // You should have received a copy of the GNU Lesser General Public
  // License along with this library; if not, write to the Free Software
! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  //
  
  package org.htmlparser.parserapplications;
  
+ import javax.swing.JOptionPane;
  import org.htmlparser.Node;
+ import org.htmlparser.NodeFilter;
  import org.htmlparser.Parser;
+ import org.htmlparser.filters.AndFilter;
+ import org.htmlparser.filters.NodeClassFilter;
  import org.htmlparser.tags.LinkTag;
+ import org.htmlparser.util.NodeList;
  import org.htmlparser.util.ParserException;
  
***************
*** 38,77 ****
   * and prints them on standard output.
   */
! public class LinkExtractor {
!     private String location;
!     private Parser parser;
!     public LinkExtractor(String location) {
!         this.location = location;
!         try {
!             this.parser   = new Parser(location); // Create the parser object
!         }
!         catch (ParserException e) {
!             e.printStackTrace();
!         }
! 
!     }
!     public void extractLinks() throws ParserException {
!         System.out.println("Parsing "+location+" for links...");
!         Node [] links = parser.extractAllNodesThatAre(LinkTag.class);
!         for (int i = 0;i < links.length;i++) {
!             LinkTag linkTag = (LinkTag)links[i];
!             // To extract only mail addresses, uncomment the following line
!             // if (linkTag.isMailLink())
!             System.out.println(linkTag.getLink());
!         }
!     }
  
!     public static void main(String[] args) {
!         if (args.length != 1) {
!             System.err.println("Syntax Error : Please provide the location(URL or file) to parse");
!             System.exit(-1);
          }
!         LinkExtractor linkExtractor = new LinkExtractor(args[0]);
!         try {
!             linkExtractor.extractLinks();
          }
!         catch (ParserException e) {
!             e.printStackTrace();
          }
      }
  }
--- 41,94 ----
   * and prints them on standard output.
   */
! public class LinkExtractor
! {
!     /**
!      */
!     public static void main (String[] args)
!     {
!         String url;
!         Parser parser;
!         NodeFilter filter;
!         NodeList list;
  
!         if (0 >= args.length)
!         {
!             url = (String)JOptionPane.showInputDialog (
!                 null,
!                 "Enter the URL to extract links from:",
!                 "Web Site",
!                 JOptionPane.PLAIN_MESSAGE,
!                 null,
!                 null,
!                 "http://htmlparser.sourceforge.net/docs/");
!             if (null == url)
!                 System.exit (1);
          }
!         else
!             url = args[0];
!         filter = new NodeClassFilter (LinkTag.class);
!         if ((1 < args.length) && args[1].equalsIgnoreCase ("-maillinks"))
!             filter = new AndFilter (
!                 filter,
!                 new NodeFilter ()
!                 {
!                     public boolean accept (Node node)
!                     {
!                         return (((LinkTag)node).isMailLink ());
!                     }
!                 }
!             );
!         try
!         {
!             parser = new Parser (url);
!             list = parser.extractAllNodesThatMatch (filter);
!             for (int i = 0; i < list.size (); i++)
!                 System.out.println (list.elementAt (i).toHtml ());
          }
!         catch (ParserException e)
!         {
!             e.printStackTrace ();
          }
+         System.exit (0);
      }
  }

--- MailRipper.java DELETED ---

--- Robot.java DELETED ---

[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserapplications LinkExtractor.java,1.49,1.50 MailR

[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserapplications LinkExtractor.java,1.49,1.50 MailRipper.java,1.50,NONE Robot.java,1.52,NONE