[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserapplications LinkExtractor.java,1.49,1.50 MailR
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-12-31 02:50:53
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications In directory sc8-pr-cvs1:/tmp/cvs-serv23579/src/org/htmlparser/parserapplications Modified Files: LinkExtractor.java Removed Files: MailRipper.java Robot.java Log Message: Add filter support to NodeList. Rework LinkExtractor and remove MailRipper and Robot example programs. Clean out docs directory. Index: LinkExtractor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/LinkExtractor.java,v retrieving revision 1.49 retrieving revision 1.50 diff -C2 -d -r1.49 -r1.50 *** LinkExtractor.java 8 Dec 2003 01:31:52 -0000 1.49 --- LinkExtractor.java 31 Dec 2003 02:50:50 -0000 1.50 *************** *** 1,4 **** ! // HTMLParser Library v1_4_20031207 - A java-based parser for HTML ! // Copyright (C) Dec 31, 2000 Somik Raha // // This library is free software; you can redistribute it and/or --- 1,12 ---- ! // HTMLParser Library $Name$ - A java-based parser for HTML ! // http://sourceforge.org/projects/htmlparser ! // Copyright (C) 2003 Derrick Oswald ! // ! // Revision Control Information ! // ! // $Source$ ! // $Author$ ! // $Date$ ! // $Revision$ // // This library is free software; you can redistribute it and/or *************** *** 9,35 **** // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // ! // For any questions or suggestions, you can write to me at : ! // Email :so...@in... // - // Postal Address : - // Somik Raha - // Extreme Programmer & Coach - // Industrial Logic Corporation - // 2583 Cedar Street, Berkeley, - // CA 94708, USA - // Website : http://www.industriallogic.com package org.htmlparser.parserapplications; import org.htmlparser.Node; import org.htmlparser.Parser; import org.htmlparser.tags.LinkTag; import org.htmlparser.util.ParserException; --- 17,38 ---- // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.parserapplications; + import javax.swing.JOptionPane; import org.htmlparser.Node; + import org.htmlparser.NodeFilter; import org.htmlparser.Parser; + import org.htmlparser.filters.AndFilter; + import org.htmlparser.filters.NodeClassFilter; import org.htmlparser.tags.LinkTag; + import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; *************** *** 38,77 **** * and prints them on standard output. */ ! public class LinkExtractor { ! private String location; ! private Parser parser; ! public LinkExtractor(String location) { ! this.location = location; ! try { ! this.parser = new Parser(location); // Create the parser object ! } ! catch (ParserException e) { ! e.printStackTrace(); ! } ! ! } ! public void extractLinks() throws ParserException { ! System.out.println("Parsing "+location+" for links..."); ! Node [] links = parser.extractAllNodesThatAre(LinkTag.class); ! for (int i = 0;i < links.length;i++) { ! LinkTag linkTag = (LinkTag)links[i]; ! // To extract only mail addresses, uncomment the following line ! // if (linkTag.isMailLink()) ! System.out.println(linkTag.getLink()); ! } ! } ! public static void main(String[] args) { ! if (args.length != 1) { ! System.err.println("Syntax Error : Please provide the location(URL or file) to parse"); ! System.exit(-1); } ! LinkExtractor linkExtractor = new LinkExtractor(args[0]); ! try { ! linkExtractor.extractLinks(); } ! catch (ParserException e) { ! e.printStackTrace(); } } } --- 41,94 ---- * and prints them on standard output. */ ! public class LinkExtractor ! { ! /** ! */ ! public static void main (String[] args) ! { ! String url; ! Parser parser; ! NodeFilter filter; ! NodeList list; ! if (0 >= args.length) ! { ! url = (String)JOptionPane.showInputDialog ( ! null, ! "Enter the URL to extract links from:", ! "Web Site", ! JOptionPane.PLAIN_MESSAGE, ! null, ! null, ! "http://htmlparser.sourceforge.net/docs/"); ! if (null == url) ! System.exit (1); } ! else ! url = args[0]; ! filter = new NodeClassFilter (LinkTag.class); ! if ((1 < args.length) && args[1].equalsIgnoreCase ("-maillinks")) ! filter = new AndFilter ( ! filter, ! new NodeFilter () ! { ! public boolean accept (Node node) ! { ! return (((LinkTag)node).isMailLink ()); ! } ! } ! ); ! try ! { ! parser = new Parser (url); ! list = parser.extractAllNodesThatMatch (filter); ! for (int i = 0; i < list.size (); i++) ! System.out.println (list.elementAt (i).toHtml ()); } ! catch (ParserException e) ! { ! e.printStackTrace (); } + System.exit (0); } } --- MailRipper.java DELETED --- --- Robot.java DELETED --- |