[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserapplications LinkExtractor.java,1.41,1.42
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-09-01 20:24:08
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications In directory sc8-pr-cvs1:/tmp/cvs-serv25734 Modified Files: LinkExtractor.java Log Message: Fix bug #786869 LinkExtractor Sample not working. Index: LinkExtractor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/LinkExtractor.java,v retrieving revision 1.41 retrieving revision 1.42 diff -C2 -d -r1.41 -r1.42 *** LinkExtractor.java 24 Aug 2003 21:59:42 -0000 1.41 --- LinkExtractor.java 1 Sep 2003 20:24:04 -0000 1.42 *************** *** 27,31 **** // Website : http://www.industriallogic.com - package org.htmlparser.parserapplications; --- 27,30 ---- *************** *** 40,82 **** */ public class LinkExtractor { ! private String location; ! private Parser parser; ! public LinkExtractor(String location) { ! this.location = location; ! try { ! this.parser = new Parser(location); // Create the parser object ! parser.registerScanners(); // Register standard scanners (Very Important) ! } ! catch (ParserException e) { ! e.printStackTrace(); ! } ! ! } ! public void extractLinks() throws ParserException { ! System.out.println("Parsing "+location+" for links..."); ! Node [] links = parser.extractAllNodesThatAre(LinkTag.class); ! for (int i = 0;i < links.length;i++) { ! LinkTag linkTag = (LinkTag)links[i]; ! // Print it ! // System.out.println(linkTag.toString()); ! System.out.println(linkTag.getLink()); ! // To extract only mail addresses, uncomment the following line ! // if (linkTag.isMailLink()) System.out.println(linkTag.getLink()); ! } ! } ! public static void main(String[] args) { ! if (args.length != 1) { ! System.err.println("Syntax Error : Please provide the location(URL or file) to parse"); ! System.exit(-1); ! } ! LinkExtractor linkExtractor = new LinkExtractor(args[0]); ! try { ! linkExtractor.extractLinks(); ! } ! catch (ParserException e) { ! e.printStackTrace(); ! } ! } } --- 39,79 ---- */ public class LinkExtractor { ! private String location; ! private Parser parser; ! public LinkExtractor(String location) { ! this.location = location; ! try { ! this.parser = new Parser(location); // Create the parser object ! parser.registerScanners(); // Register standard scanners (Very Important) ! } ! catch (ParserException e) { ! e.printStackTrace(); ! } ! ! } ! public void extractLinks() throws ParserException { ! System.out.println("Parsing "+location+" for links..."); ! Node [] links = parser.extractAllNodesThatAre(LinkTag.class); ! for (int i = 0;i < links.length;i++) { ! LinkTag linkTag = (LinkTag)links[i]; ! // To extract only mail addresses, uncomment the following line ! // if (linkTag.isMailLink()) ! System.out.println(linkTag.getLink()); ! } ! } ! public static void main(String[] args) { ! if (args.length != 1) { ! System.err.println("Syntax Error : Please provide the location(URL or file) to parse"); ! System.exit(-1); ! } ! LinkExtractor linkExtractor = new LinkExtractor(args[0]); ! try { ! linkExtractor.extractLinks(); ! } ! catch (ParserException e) { ! e.printStackTrace(); ! } ! } } |