[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserapplications SiteCapturer.java,1.3,1.4

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications
In directory sc8-pr-cvs1:/tmp/cvs-serv28098/src/org/htmlparser/parserapplications

Modified Files:
	SiteCapturer.java 
Log Message:


Index: SiteCapturer.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/SiteCapturer.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** SiteCapturer.java	10 Jan 2004 00:06:03 -0000	1.3
--- SiteCapturer.java	14 Jan 2004 02:53:46 -0000	1.4
***************
*** 41,57 ****
  import javax.swing.JOptionPane;
  
- import org.htmlparser.Node;
- import org.htmlparser.NodeFilter;
  import org.htmlparser.Parser;
  import org.htmlparser.PrototypicalNodeFactory;
- import org.htmlparser.filters.AndFilter;
- import org.htmlparser.filters.HasAttributeFilter;
- import org.htmlparser.filters.NodeClassFilter;
- import org.htmlparser.lexer.nodes.Attribute;
  import org.htmlparser.tags.BaseHrefTag;
  import org.htmlparser.tags.FrameTag;
  import org.htmlparser.tags.ImageTag;
  import org.htmlparser.tags.LinkTag;
- import org.htmlparser.tags.MetaTag;
  import org.htmlparser.util.NodeIterator;
  import org.htmlparser.util.NodeList;
--- 41,50 ----
***************
*** 60,64 ****
  /**
   * Save a web site locally.
!  * Illustrative program to save a web site contents locally.
   * It was created to demonstrate URL rewriting in it's simplest form.
   * It uses customized tags in the NodeFactory to alter the URLs.
--- 53,57 ----
  /**
   * Save a web site locally.
!  * Illustrative prgram to save a web site contents locally.
   * It was created to demonstrate URL rewriting in it's simplest form.
   * It uses customized tags in the NodeFactory to alter the URLs.
***************
*** 131,139 ****
  
      /**
-      * The filter to apply to the nodes retrieved.
-      */
-     protected NodeFilter mFilter;
- 
-     /**
       * Copy buffer size.
       * Resources are moved to disk in chunks this size or less.
--- 124,127 ----
***************
*** 148,153 ****
          PrototypicalNodeFactory factory;
  
-         mSource = null;
-         mTarget = null;
          mPages = new ArrayList ();
          mFinished = new HashSet ();
--- 136,139 ----
***************
*** 161,166 ****
          factory.registerTag (new LocalImageTag ());
          mParser.setNodeFactory (factory);
-         mCaptureResources = true;
-         mFilter = null;
      }
  
--- 147,150 ----
***************
*** 229,251 ****
      }
      
-     
-     /** Getter for property filter.
-      * @return Value of property filter.
-      *
-      */
-     public NodeFilter getFilter ()
-     {
-         return (mFilter);
-     }
-     
-     /** Setter for property filter.
-      * @param filter New value of property filter.
-      *
-      */
-     public void setFilter (NodeFilter filter)
-     {
-         mFilter = filter;
-     }
-     
      /**
       * Returns <code>true</code> if the link is one we are interested in.
--- 213,216 ----
***************
*** 316,320 ****
          String ret;
  
!         if (link.equals (getSource ()) || (!getSource ().endsWith ("/") && link.equals (getSource () + "/")))
              ret = "index.html"; // handle the root page specially
          else if (link.startsWith (getSource ())
--- 281,285 ----
          String ret;
  
!         if (link.equals (getSource ()))
              ret = "index.html"; // handle the root page specially
          else if (link.startsWith (getSource ())
***************
*** 417,430 ****
       * Process a single page.
       */
!     protected void process (NodeFilter filter)
          throws
              ParserException
      {
          String url;
-         int bookmark;
          NodeList list;
-         NodeList robots;
-         MetaTag robot;
-         String content;
          File file;
          File dir;
--- 382,391 ----
       * Process a single page.
       */
!     protected void process ()
          throws
              ParserException
      {
          String url;
          NodeList list;
          File file;
          File dir;
***************
*** 437,443 ****
  
          try
!         {
!             bookmark = mPages.size ();
!             // fetch the page and gather the list of nodes
              mParser.setURL (url);
              list = new NodeList ();
--- 398,402 ----
  
          try
!         {   // fetch the page and gather the list of nodes
              mParser.setURL (url);
              list = new NodeList ();
***************
*** 445,470 ****
                  list.add (e.nextNode ()); // URL conversion occurs in the tags
  
-             // handle robots meta tag according to http://www.robotstxt.org/wc/meta-user.html
-             // <meta name="robots" content="index,follow" />
-             // <meta name="robots" content="noindex,nofollow" />
-             robots = list.extractAllNodesThatMatch (
-                 new AndFilter (
-                     new NodeClassFilter (MetaTag.class),
-                     new HasAttributeFilter ("name", "robots")), true);
-             if (0 != robots.size ())
-             {
-                 robot = (MetaTag)robots.elementAt (0);
-                 content = robot.getAttribute ("content").toLowerCase ();
-                 if ((-1 != content.indexOf ("none")) || (-1 != content.indexOf ("nofollow")))
-                     // reset mPages
-                     for (int i = bookmark; i < mPages.size (); i++)
-                         mPages.remove (i);
-                 if ((-1 != content.indexOf ("none")) || (-1 != content.indexOf ("noindex")))
-                     return;
-             }
-     
-             if (null != filter)
-                 list.keepAllNodesThatMatch (filter, true);
- 
              // save the page locally
              file = new File (getTarget (), makeLocalLink (url, ""));
--- 404,407 ----
***************
*** 472,483 ****
              if (!dir.exists ())
                  dir.mkdirs ();
-             else if (!dir.isDirectory ())
-             {
-                 dir = new File (dir.getParentFile (), dir.getName () + ".content");
-                 if (!dir.exists ())
-                     dir.mkdirs ();
-                 file = new File (dir, file.getName ());
-             }
-                 
              try
              {
--- 409,412 ----
***************
*** 652,656 ****
              try
              {
!                 process (getFilter ());
                  while (0 != mImages.size ())
                      copy ();
--- 581,585 ----
              try
              {
!                 process ();
                  while (0 != mImages.size ())
                      copy ();