[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserapplications MailRipper.java,1.42,1.43 Robot.ja

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications
In directory sc8-pr-cvs1:/tmp/cvs-serv31228/parserapplications

Modified Files:
	MailRipper.java Robot.java 
Log Message:
Change tabs to spaces in all source files.


Index: MailRipper.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/MailRipper.java,v
retrieving revision 1.42
retrieving revision 1.43
diff -C2 -d -r1.42 -r1.43
*** MailRipper.java	24 Aug 2003 21:59:42 -0000	1.42
--- MailRipper.java	3 Sep 2003 23:36:19 -0000	1.43
***************
*** 46,113 ****
  public class MailRipper {
    private org.htmlparser.Parser parser;
! 	/**
! 	 * MailRipper c'tor takes the url to be ripped
! 	 * @param resourceLocation url to be ripped
! 	 */
! 	public MailRipper(String resourceLocation) {
! 		try {
! 		  parser = new Parser(resourceLocation,new DefaultParserFeedback());
! 		  parser.registerScanners();
! 		}
! 		catch (ParserException e) {
! 			System.err.println("Could not create parser object");
! 			e.printStackTrace();
! 		}
! 	}
! 	public static void main(String[] args) {
! 	  System.out.println("Mail Ripper v" + Parser.getVersion ());
! 	  if (args.length<1 || args[0].equals("-help"))
! 	  {
! 	    System.out.println();
! 	    System.out.println("Syntax : java -classpath htmlparser.jar org.htmlparser.parserapplications.MailRipper <resourceLocn/website>");
! 	    System.out.println();
! 	    System.out.println("   <resourceLocn> the name of the file to be parsed (with complete path ");
! 	    System.out.println("                  if not in current directory)");
! 	    System.out.println("   -help This screen");
! 	    System.out.println();
! 	    System.out.println("HTML Parser home page : http://htmlparser.sourceforge.net");
! 	    System.out.println();
! 	    System.out.println("Example : java -classpath htmlparser.jar com.kizna.parserapplications.MailRipper http://htmlparser.sourceforge.net");
! 	    System.out.println();
! 	    System.out.println("If you have any doubts, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page instead of mailing any of the contributors directly. You will be surprised with the quality of open source support. ");
! 	    System.exit(-1);
! 	  }		
! 	  String resourceLocation = "http://htmlparser.sourceforge.net";
! 	  if (args.length!=0) resourceLocation = args[0];
! 	
! 	  MailRipper ripper = new MailRipper(resourceLocation);	
! 	  System.out.println("Ripping Site "+resourceLocation);
! 	  try {
! 		  for (Enumeration e=ripper.rip();e.hasMoreElements();) {
! 		    LinkTag tag = (LinkTag)e.nextElement();
! 		    System.out.println("Ripped mail address : "+tag.getLink());
! 		  }
! 	  }
! 	  catch (ParserException e) {
! 	  	e.printStackTrace();
! 	  }
! 	}
! 	/**
! 	 * Rip all mail addresses from the given url, and return an enumeration of such mail addresses.
! 	 * @return Enumeration of mail addresses (a vector of LinkTag)
! 	 */
! 	public Enumeration rip() throws ParserException {
! 	  Node node;
! 	  Vector mailAddresses = new Vector();
! 	  for (NodeIterator e = parser.elements();e.hasMoreNodes();)
! 	  {
! 	    node = e.nextNode();
! 	    if (node instanceof LinkTag)
! 	    {
! 	      LinkTag linkTag = (LinkTag)node;
! 	      if (linkTag.isMailLink()) mailAddresses.addElement(linkTag);
! 	    }
! 	  }
! 	  return mailAddresses.elements();	
! 	}
  }
--- 46,113 ----
  public class MailRipper {
    private org.htmlparser.Parser parser;
!     /**
!      * MailRipper c'tor takes the url to be ripped
!      * @param resourceLocation url to be ripped
!      */
!     public MailRipper(String resourceLocation) {
!         try {
!           parser = new Parser(resourceLocation,new DefaultParserFeedback());
!           parser.registerScanners();
!         }
!         catch (ParserException e) {
!             System.err.println("Could not create parser object");
!             e.printStackTrace();
!         }
!     }
!     public static void main(String[] args) {
!       System.out.println("Mail Ripper v" + Parser.getVersion ());
!       if (args.length<1 || args[0].equals("-help"))
!       {
!         System.out.println();
!         System.out.println("Syntax : java -classpath htmlparser.jar org.htmlparser.parserapplications.MailRipper <resourceLocn/website>");
!         System.out.println();
!         System.out.println("   <resourceLocn> the name of the file to be parsed (with complete path ");
!         System.out.println("                  if not in current directory)");
!         System.out.println("   -help This screen");
!         System.out.println();
!         System.out.println("HTML Parser home page : http://htmlparser.sourceforge.net");
!         System.out.println();
!         System.out.println("Example : java -classpath htmlparser.jar com.kizna.parserapplications.MailRipper http://htmlparser.sourceforge.net");
!         System.out.println();
!         System.out.println("If you have any doubts, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page instead of mailing any of the contributors directly. You will be surprised with the quality of open source support. ");
!         System.exit(-1);
!       }     
!       String resourceLocation = "http://htmlparser.sourceforge.net";
!       if (args.length!=0) resourceLocation = args[0];
!     
!       MailRipper ripper = new MailRipper(resourceLocation); 
!       System.out.println("Ripping Site "+resourceLocation);
!       try {
!           for (Enumeration e=ripper.rip();e.hasMoreElements();) {
!             LinkTag tag = (LinkTag)e.nextElement();
!             System.out.println("Ripped mail address : "+tag.getLink());
!           }
!       }
!       catch (ParserException e) {
!         e.printStackTrace();
!       }
!     }
!     /**
!      * Rip all mail addresses from the given url, and return an enumeration of such mail addresses.
!      * @return Enumeration of mail addresses (a vector of LinkTag)
!      */
!     public Enumeration rip() throws ParserException {
!       Node node;
!       Vector mailAddresses = new Vector();
!       for (NodeIterator e = parser.elements();e.hasMoreNodes();)
!       {
!         node = e.nextNode();
!         if (node instanceof LinkTag)
!         {
!           LinkTag linkTag = (LinkTag)node;
!           if (linkTag.isMailLink()) mailAddresses.addElement(linkTag);
!         }
!       }
!       return mailAddresses.elements();  
!     }
  }

Index: Robot.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/Robot.java,v
retrieving revision 1.44
retrieving revision 1.45
diff -C2 -d -r1.44 -r1.45
*** Robot.java	24 Aug 2003 21:59:42 -0000	1.44
--- Robot.java	3 Sep 2003 23:36:19 -0000	1.45
***************
*** 40,138 ****
  public class Robot {
    private org.htmlparser.Parser parser;
! 	/**
! 	 * Robot crawler - Provide the starting url 
! 	 */
! 	public Robot(String resourceLocation) {
! 		try {
! 		  parser = new Parser(resourceLocation,new DefaultParserFeedback());
! 		  parser.registerScanners();
! 		}
! 		catch (ParserException e) {
! 			System.err.println("Error, could not create parser object");
! 			e.printStackTrace();
! 		}
! 	}
! 	/**
! 	 * Crawl using a given crawl depth.
! 	 * @param crawlDepth Depth of crawling
! 	 */
! 	public void crawl(int crawlDepth) throws ParserException
! 	{
! 		try {
! 		  crawl(parser,crawlDepth);
! 		}
! 		catch (ParserException e) {
! 			throw new ParserException("HTMLParserException at crawl("+crawlDepth+")",e);
! 		}
! 	}
! 	/**
! 	 * Crawl using a given parser object, and a given crawl depth.
! 	 * @param parser Parser object
! 	 * @param crawlDepth Depth of crawling
! 	 */
! 	public void crawl(Parser parser,int crawlDepth) throws ParserException {
! 	  System.out.println(" crawlDepth = "+crawlDepth);
! 	  for (NodeIterator e = parser.elements();e.hasMoreNodes();)
! 	  {
! 	    Node node = e.nextNode();
! 	    if (node instanceof LinkTag)
! 	    {
! 	      LinkTag linkTag = (LinkTag)node;
! 	      {
! 	        if (!linkTag.isMailLink())
! 	        {
! 	          if (linkTag.getLink().toUpperCase().indexOf("HTM")!=-1 || 
! 	            linkTag.getLink().toUpperCase().indexOf("COM")!=-1 ||
! 	            linkTag.getLink().toUpperCase().indexOf("ORG")!=-1)
! 	          {
! 	            if (crawlDepth>0)
! 	            {
! 	              Parser newParser = new Parser(linkTag.getLink(),new DefaultParserFeedback());
! 	              newParser.registerScanners();
! 	              System.out.print("Crawling to "+linkTag.getLink());
! 	              crawl(newParser,crawlDepth-1);
! 	            }
! 	            else System.out.println(linkTag.getLink());
! 	          }
! 	        }
! 	      }
! 	    }
! 	  }
! 	}
  
! 	public static void main(String[] args) 
! 	{
! 	  System.out.println("Robot Crawler v" + Parser.getVersion ());
! 	  if (args.length<2 || args[0].equals("-help"))
! 	  {
! 	    System.out.println();
! 	    System.out.println("Syntax : java -classpath htmlparser.jar org.htmlparser.parserapplications.Robot <resourceLocn/website> <depth>");
! 	    System.out.println();
! 	    System.out.println("   <resourceLocn> the name of the file to be parsed (with complete path ");
! 	    System.out.println("                  if not in current directory)");
! 	    System.out.println("   <depth> No of links to be followed from each link");
! 	    System.out.println("   -help This screen");
! 	    System.out.println();
! 	    System.out.println("HTML Parser home page : http://htmlparser.sourceforge.net");
! 	    System.out.println();
! 	    System.out.println("Example : java -classpath htmlparser.jar com.kizna.parserapplications.Robot http://www.google.com 3");
! 	    System.out.println();
! 	    System.out.println("If you have any doubts, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page instead of mailing any of the contributors directly. You will be surprised with the quality of open source support. ");
! 	    System.exit(-1);
! 	  }	
! 	  String resourceLocation="";
! 	  int crawlDepth = 1;
! 	  if (args.length!=0) resourceLocation = args[0];
! 	  if (args.length==2) crawlDepth=Integer.valueOf(args[1]).intValue();
! 	  
! 	    
! 	  Robot robot = new Robot(resourceLocation);	
! 	  System.out.println("Crawling Site "+resourceLocation);
! 	  try {
! 		  robot.crawl(crawlDepth);
! 	  }
! 	  catch (ParserException e) {
! 	  	e.printStackTrace();
! 	  }
! 	}
  }
--- 40,138 ----
  public class Robot {
    private org.htmlparser.Parser parser;
!     /**
!      * Robot crawler - Provide the starting url 
!      */
!     public Robot(String resourceLocation) {
!         try {
!           parser = new Parser(resourceLocation,new DefaultParserFeedback());
!           parser.registerScanners();
!         }
!         catch (ParserException e) {
!             System.err.println("Error, could not create parser object");
!             e.printStackTrace();
!         }
!     }
!     /**
!      * Crawl using a given crawl depth.
!      * @param crawlDepth Depth of crawling
!      */
!     public void crawl(int crawlDepth) throws ParserException
!     {
!         try {
!           crawl(parser,crawlDepth);
!         }
!         catch (ParserException e) {
!             throw new ParserException("HTMLParserException at crawl("+crawlDepth+")",e);
!         }
!     }
!     /**
!      * Crawl using a given parser object, and a given crawl depth.
!      * @param parser Parser object
!      * @param crawlDepth Depth of crawling
!      */
!     public void crawl(Parser parser,int crawlDepth) throws ParserException {
!       System.out.println(" crawlDepth = "+crawlDepth);
!       for (NodeIterator e = parser.elements();e.hasMoreNodes();)
!       {
!         Node node = e.nextNode();
!         if (node instanceof LinkTag)
!         {
!           LinkTag linkTag = (LinkTag)node;
!           {
!             if (!linkTag.isMailLink())
!             {
!               if (linkTag.getLink().toUpperCase().indexOf("HTM")!=-1 || 
!                 linkTag.getLink().toUpperCase().indexOf("COM")!=-1 ||
!                 linkTag.getLink().toUpperCase().indexOf("ORG")!=-1)
!               {
!                 if (crawlDepth>0)
!                 {
!                   Parser newParser = new Parser(linkTag.getLink(),new DefaultParserFeedback());
!                   newParser.registerScanners();
!                   System.out.print("Crawling to "+linkTag.getLink());
!                   crawl(newParser,crawlDepth-1);
!                 }
!                 else System.out.println(linkTag.getLink());
!               }
!             }
!           }
!         }
!       }
!     }
  
!     public static void main(String[] args) 
!     {
!       System.out.println("Robot Crawler v" + Parser.getVersion ());
!       if (args.length<2 || args[0].equals("-help"))
!       {
!         System.out.println();
!         System.out.println("Syntax : java -classpath htmlparser.jar org.htmlparser.parserapplications.Robot <resourceLocn/website> <depth>");
!         System.out.println();
!         System.out.println("   <resourceLocn> the name of the file to be parsed (with complete path ");
!         System.out.println("                  if not in current directory)");
!         System.out.println("   <depth> No of links to be followed from each link");
!         System.out.println("   -help This screen");
!         System.out.println();
!         System.out.println("HTML Parser home page : http://htmlparser.sourceforge.net");
!         System.out.println();
!         System.out.println("Example : java -classpath htmlparser.jar com.kizna.parserapplications.Robot http://www.google.com 3");
!         System.out.println();
!         System.out.println("If you have any doubts, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page instead of mailing any of the contributors directly. You will be surprised with the quality of open source support. ");
!         System.exit(-1);
!       } 
!       String resourceLocation="";
!       int crawlDepth = 1;
!       if (args.length!=0) resourceLocation = args[0];
!       if (args.length==2) crawlDepth=Integer.valueOf(args[1]).intValue();
!       
!         
!       Robot robot = new Robot(resourceLocation);    
!       System.out.println("Crawling Site "+resourceLocation);
!       try {
!           robot.crawl(crawlDepth);
!       }
!       catch (ParserException e) {
!         e.printStackTrace();
!       }
!     }
  }

[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserapplications MailRipper.java,1.42,1.43 Robot.ja

[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserapplications MailRipper.java,1.42,1.43 Robot.java,1.44,1.45