htmlparser-cvs Mailing List for HTML Parser (Page 43)

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications
In directory sc8-pr-cvs1:/tmp/cvs-serv31228/parserapplications

Modified Files:
	MailRipper.java Robot.java 
Log Message:
Change tabs to spaces in all source files.


Index: MailRipper.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/MailRipper.java,v
retrieving revision 1.42
retrieving revision 1.43
diff -C2 -d -r1.42 -r1.43
*** MailRipper.java	24 Aug 2003 21:59:42 -0000	1.42
--- MailRipper.java	3 Sep 2003 23:36:19 -0000	1.43
***************
*** 46,113 ****
  public class MailRipper {
    private org.htmlparser.Parser parser;
! 	/**
! 	 * MailRipper c'tor takes the url to be ripped
! 	 * @param resourceLocation url to be ripped
! 	 */
! 	public MailRipper(String resourceLocation) {
! 		try {
! 		  parser = new Parser(resourceLocation,new DefaultParserFeedback());
! 		  parser.registerScanners();
! 		}
! 		catch (ParserException e) {
! 			System.err.println("Could not create parser object");
! 			e.printStackTrace();
! 		}
! 	}
! 	public static void main(String[] args) {
! 	  System.out.println("Mail Ripper v" + Parser.getVersion ());
! 	  if (args.length<1 || args[0].equals("-help"))
! 	  {
! 	    System.out.println();
! 	    System.out.println("Syntax : java -classpath htmlparser.jar org.htmlparser.parserapplications.MailRipper <resourceLocn/website>");
! 	    System.out.println();
! 	    System.out.println("   <resourceLocn> the name of the file to be parsed (with complete path ");
! 	    System.out.println("                  if not in current directory)");
! 	    System.out.println("   -help This screen");
! 	    System.out.println();
! 	    System.out.println("HTML Parser home page : http://htmlparser.sourceforge.net");
! 	    System.out.println();
! 	    System.out.println("Example : java -classpath htmlparser.jar com.kizna.parserapplications.MailRipper http://htmlparser.sourceforge.net");
! 	    System.out.println();
! 	    System.out.println("If you have any doubts, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page instead of mailing any of the contributors directly. You will be surprised with the quality of open source support. ");
! 	    System.exit(-1);
! 	  }		
! 	  String resourceLocation = "http://htmlparser.sourceforge.net";
! 	  if (args.length!=0) resourceLocation = args[0];
! 	
! 	  MailRipper ripper = new MailRipper(resourceLocation);	
! 	  System.out.println("Ripping Site "+resourceLocation);
! 	  try {
! 		  for (Enumeration e=ripper.rip();e.hasMoreElements();) {
! 		    LinkTag tag = (LinkTag)e.nextElement();
! 		    System.out.println("Ripped mail address : "+tag.getLink());
! 		  }
! 	  }
! 	  catch (ParserException e) {
! 	  	e.printStackTrace();
! 	  }
! 	}
! 	/**
! 	 * Rip all mail addresses from the given url, and return an enumeration of such mail addresses.
! 	 * @return Enumeration of mail addresses (a vector of LinkTag)
! 	 */
! 	public Enumeration rip() throws ParserException {
! 	  Node node;
! 	  Vector mailAddresses = new Vector();
! 	  for (NodeIterator e = parser.elements();e.hasMoreNodes();)
! 	  {
! 	    node = e.nextNode();
! 	    if (node instanceof LinkTag)
! 	    {
! 	      LinkTag linkTag = (LinkTag)node;
! 	      if (linkTag.isMailLink()) mailAddresses.addElement(linkTag);
! 	    }
! 	  }
! 	  return mailAddresses.elements();	
! 	}
  }
--- 46,113 ----
  public class MailRipper {
    private org.htmlparser.Parser parser;
!     /**
!      * MailRipper c'tor takes the url to be ripped
!      * @param resourceLocation url to be ripped
!      */
!     public MailRipper(String resourceLocation) {
!         try {
!           parser = new Parser(resourceLocation,new DefaultParserFeedback());
!           parser.registerScanners();
!         }
!         catch (ParserException e) {
!             System.err.println("Could not create parser object");
!             e.printStackTrace();
!         }
!     }
!     public static void main(String[] args) {
!       System.out.println("Mail Ripper v" + Parser.getVersion ());
!       if (args.length<1 || args[0].equals("-help"))
!       {
!         System.out.println();
!         System.out.println("Syntax : java -classpath htmlparser.jar org.htmlparser.parserapplications.MailRipper <resourceLocn/website>");
!         System.out.println();
!         System.out.println("   <resourceLocn> the name of the file to be parsed (with complete path ");
!         System.out.println("                  if not in current directory)");
!         System.out.println("   -help This screen");
!         System.out.println();
!         System.out.println("HTML Parser home page : http://htmlparser.sourceforge.net");
!         System.out.println();
!         System.out.println("Example : java -classpath htmlparser.jar com.kizna.parserapplications.MailRipper http://htmlparser.sourceforge.net");
!         System.out.println();
!         System.out.println("If you have any doubts, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page instead of mailing any of the contributors directly. You will be surprised with the quality of open source support. ");
!         System.exit(-1);
!       }     
!       String resourceLocation = "http://htmlparser.sourceforge.net";
!       if (args.length!=0) resourceLocation = args[0];
!     
!       MailRipper ripper = new MailRipper(resourceLocation); 
!       System.out.println("Ripping Site "+resourceLocation);
!       try {
!           for (Enumeration e=ripper.rip();e.hasMoreElements();) {
!             LinkTag tag = (LinkTag)e.nextElement();
!             System.out.println("Ripped mail address : "+tag.getLink());
!           }
!       }
!       catch (ParserException e) {
!         e.printStackTrace();
!       }
!     }
!     /**
!      * Rip all mail addresses from the given url, and return an enumeration of such mail addresses.
!      * @return Enumeration of mail addresses (a vector of LinkTag)
!      */
!     public Enumeration rip() throws ParserException {
!       Node node;
!       Vector mailAddresses = new Vector();
!       for (NodeIterator e = parser.elements();e.hasMoreNodes();)
!       {
!         node = e.nextNode();
!         if (node instanceof LinkTag)
!         {
!           LinkTag linkTag = (LinkTag)node;
!           if (linkTag.isMailLink()) mailAddresses.addElement(linkTag);
!         }
!       }
!       return mailAddresses.elements();  
!     }
  }

Index: Robot.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/Robot.java,v
retrieving revision 1.44
retrieving revision 1.45
diff -C2 -d -r1.44 -r1.45
*** Robot.java	24 Aug 2003 21:59:42 -0000	1.44
--- Robot.java	3 Sep 2003 23:36:19 -0000	1.45
***************
*** 40,138 ****
  public class Robot {
    private org.htmlparser.Parser parser;
! 	/**
! 	 * Robot crawler - Provide the starting url 
! 	 */
! 	public Robot(String resourceLocation) {
! 		try {
! 		  parser = new Parser(resourceLocation,new DefaultParserFeedback());
! 		  parser.registerScanners();
! 		}
! 		catch (ParserException e) {
! 			System.err.println("Error, could not create parser object");
! 			e.printStackTrace();
! 		}
! 	}
! 	/**
! 	 * Crawl using a given crawl depth.
! 	 * @param crawlDepth Depth of crawling
! 	 */
! 	public void crawl(int crawlDepth) throws ParserException
! 	{
! 		try {
! 		  crawl(parser,crawlDepth);
! 		}
! 		catch (ParserException e) {
! 			throw new ParserException("HTMLParserException at crawl("+crawlDepth+")",e);
! 		}
! 	}
! 	/**
! 	 * Crawl using a given parser object, and a given crawl depth.
! 	 * @param parser Parser object
! 	 * @param crawlDepth Depth of crawling
! 	 */
! 	public void crawl(Parser parser,int crawlDepth) throws ParserException {
! 	  System.out.println(" crawlDepth = "+crawlDepth);
! 	  for (NodeIterator e = parser.elements();e.hasMoreNodes();)
! 	  {
! 	    Node node = e.nextNode();
! 	    if (node instanceof LinkTag)
! 	    {
! 	      LinkTag linkTag = (LinkTag)node;
! 	      {
! 	        if (!linkTag.isMailLink())
! 	        {
! 	          if (linkTag.getLink().toUpperCase().indexOf("HTM")!=-1 || 
! 	            linkTag.getLink().toUpperCase().indexOf("COM")!=-1 ||
! 	            linkTag.getLink().toUpperCase().indexOf("ORG")!=-1)
! 	          {
! 	            if (crawlDepth>0)
! 	            {
! 	              Parser newParser = new Parser(linkTag.getLink(),new DefaultParserFeedback());
! 	              newParser.registerScanners();
! 	              System.out.print("Crawling to "+linkTag.getLink());
! 	              crawl(newParser,crawlDepth-1);
! 	            }
! 	            else System.out.println(linkTag.getLink());
! 	          }
! 	        }
! 	      }
! 	    }
! 	  }
! 	}
  
! 	public static void main(String[] args) 
! 	{
! 	  System.out.println("Robot Crawler v" + Parser.getVersion ());
! 	  if (args.length<2 || args[0].equals("-help"))
! 	  {
! 	    System.out.println();
! 	    System.out.println("Syntax : java -classpath htmlparser.jar org.htmlparser.parserapplications.Robot <resourceLocn/website> <depth>");
! 	    System.out.println();
! 	    System.out.println("   <resourceLocn> the name of the file to be parsed (with complete path ");
! 	    System.out.println("                  if not in current directory)");
! 	    System.out.println("   <depth> No of links to be followed from each link");
! 	    System.out.println("   -help This screen");
! 	    System.out.println();
! 	    System.out.println("HTML Parser home page : http://htmlparser.sourceforge.net");
! 	    System.out.println();
! 	    System.out.println("Example : java -classpath htmlparser.jar com.kizna.parserapplications.Robot http://www.google.com 3");
! 	    System.out.println();
! 	    System.out.println("If you have any doubts, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page instead of mailing any of the contributors directly. You will be surprised with the quality of open source support. ");
! 	    System.exit(-1);
! 	  }	
! 	  String resourceLocation="";
! 	  int crawlDepth = 1;
! 	  if (args.length!=0) resourceLocation = args[0];
! 	  if (args.length==2) crawlDepth=Integer.valueOf(args[1]).intValue();
! 	  
! 	    
! 	  Robot robot = new Robot(resourceLocation);	
! 	  System.out.println("Crawling Site "+resourceLocation);
! 	  try {
! 		  robot.crawl(crawlDepth);
! 	  }
! 	  catch (ParserException e) {
! 	  	e.printStackTrace();
! 	  }
! 	}
  }
--- 40,138 ----
  public class Robot {
    private org.htmlparser.Parser parser;
!     /**
!      * Robot crawler - Provide the starting url 
!      */
!     public Robot(String resourceLocation) {
!         try {
!           parser = new Parser(resourceLocation,new DefaultParserFeedback());
!           parser.registerScanners();
!         }
!         catch (ParserException e) {
!             System.err.println("Error, could not create parser object");
!             e.printStackTrace();
!         }
!     }
!     /**
!      * Crawl using a given crawl depth.
!      * @param crawlDepth Depth of crawling
!      */
!     public void crawl(int crawlDepth) throws ParserException
!     {
!         try {
!           crawl(parser,crawlDepth);
!         }
!         catch (ParserException e) {
!             throw new ParserException("HTMLParserException at crawl("+crawlDepth+")",e);
!         }
!     }
!     /**
!      * Crawl using a given parser object, and a given crawl depth.
!      * @param parser Parser object
!      * @param crawlDepth Depth of crawling
!      */
!     public void crawl(Parser parser,int crawlDepth) throws ParserException {
!       System.out.println(" crawlDepth = "+crawlDepth);
!       for (NodeIterator e = parser.elements();e.hasMoreNodes();)
!       {
!         Node node = e.nextNode();
!         if (node instanceof LinkTag)
!         {
!           LinkTag linkTag = (LinkTag)node;
!           {
!             if (!linkTag.isMailLink())
!             {
!               if (linkTag.getLink().toUpperCase().indexOf("HTM")!=-1 || 
!                 linkTag.getLink().toUpperCase().indexOf("COM")!=-1 ||
!                 linkTag.getLink().toUpperCase().indexOf("ORG")!=-1)
!               {
!                 if (crawlDepth>0)
!                 {
!                   Parser newParser = new Parser(linkTag.getLink(),new DefaultParserFeedback());
!                   newParser.registerScanners();
!                   System.out.print("Crawling to "+linkTag.getLink());
!                   crawl(newParser,crawlDepth-1);
!                 }
!                 else System.out.println(linkTag.getLink());
!               }
!             }
!           }
!         }
!       }
!     }
  
!     public static void main(String[] args) 
!     {
!       System.out.println("Robot Crawler v" + Parser.getVersion ());
!       if (args.length<2 || args[0].equals("-help"))
!       {
!         System.out.println();
!         System.out.println("Syntax : java -classpath htmlparser.jar org.htmlparser.parserapplications.Robot <resourceLocn/website> <depth>");
!         System.out.println();
!         System.out.println("   <resourceLocn> the name of the file to be parsed (with complete path ");
!         System.out.println("                  if not in current directory)");
!         System.out.println("   <depth> No of links to be followed from each link");
!         System.out.println("   -help This screen");
!         System.out.println();
!         System.out.println("HTML Parser home page : http://htmlparser.sourceforge.net");
!         System.out.println();
!         System.out.println("Example : java -classpath htmlparser.jar com.kizna.parserapplications.Robot http://www.google.com 3");
!         System.out.println();
!         System.out.println("If you have any doubts, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page instead of mailing any of the contributors directly. You will be surprised with the quality of open source support. ");
!         System.exit(-1);
!       } 
!       String resourceLocation="";
!       int crawlDepth = 1;
!       if (args.length!=0) resourceLocation = args[0];
!       if (args.length==2) crawlDepth=Integer.valueOf(args[1]).intValue();
!       
!         
!       Robot robot = new Robot(resourceLocation);    
!       System.out.println("Crawling Site "+resourceLocation);
!       try {
!           robot.crawl(crawlDepth);
!       }
!       catch (ParserException e) {
!         e.printStackTrace();
!       }
!     }
  }

2003	Jan	Feb	Mar	Apr	May (141)	Jun (108)	Jul (66)	Aug (127)	Sep (155)	Oct (149)	Nov (72)	Dec (72)
2004	Jan (100)	Feb (36)	Mar (21)	Apr (3)	May (87)	Jun (28)	Jul (84)	Aug (5)	Sep (14)	Oct	Nov	Dec
2005	Jan (1)	Feb (39)	Mar (26)	Apr (38)	May (14)	Jun (10)	Jul	Aug	Sep (13)	Oct (8)	Nov (10)	Dec
2006	Jan	Feb (1)	Mar (17)	Apr (20)	May (28)	Jun (24)	Jul	Aug	Sep	Oct	Nov	Dec
2015	Jan	Feb	Mar (1)	Apr	May	Jun	Jul	Aug	Sep	Oct	Nov	Dec

htmlparser-cvs Mailing List for HTML Parser (Page 43)

htmlparser-cvs — syncmail email notification of CVS commits