[Htmlparser-cvs] htmlparser/src/org/htmlparser/scanners CompositeTagScanner.java,1.75,1.76 FormScann

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners
In directory sc8-pr-cvs1:/tmp/cvs-serv5437/scanners

Modified Files:
	CompositeTagScanner.java FormScanner.java ScriptScanner.java 
	TagScanner.java 
Log Message:
Remove TagScanner cruft.


Index: CompositeTagScanner.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/CompositeTagScanner.java,v
retrieving revision 1.75
retrieving revision 1.76
diff -C2 -d -r1.75 -r1.76
*** CompositeTagScanner.java	28 Oct 2003 10:31:02 -0000	1.75
--- CompositeTagScanner.java	28 Oct 2003 12:54:21 -0000	1.76
***************
*** 238,242 ****
                              scanner = parser.getScanner (name);
                              if ((null != scanner) && scanner.evaluate (next, this))
!                                 node = scanner.createScannedNode (next, lexer.getPage ().getUrl (), lexer);
                          }
                      }
--- 238,242 ----
                              scanner = parser.getScanner (name);
                              if ((null != scanner) && scanner.evaluate (next, this))
!                                 node = scanner.scan (next, lexer.getPage ().getUrl (), lexer);
                          }
                      }
***************
*** 252,255 ****
--- 252,256 ----
  
          composite = (CompositeTag)createTag (lexer.getPage (), tag.elementBegin (), endTag.elementEnd (), tag.getAttributesEx (), tag, endTag, nodeList);
+         composite.setThisScanner (this);
          for (int i = 0; i < composite.getChildCount (); i++)
              composite.childAt (i).setParent (composite);

Index: FormScanner.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FormScanner.java,v
retrieving revision 1.50
retrieving revision 1.51
diff -C2 -d -r1.50 -r1.51
*** FormScanner.java	28 Oct 2003 10:31:02 -0000	1.50
--- FormScanner.java	28 Oct 2003 12:54:21 -0000	1.51
***************
*** 45,50 ****
  {
      private static final String [] MATCH_ID = { "FORM" };
-     public static final String PREVIOUS_DIRTY_LINK_MESSAGE="Encountered a form tag after an open link tag.\nThere should have been an end tag for the link before the form tag began.\nCorrecting this..";
-     private boolean linkScannerAlreadyOpen=false;
      private static final String [] formTagEnders = {"FORM","HTML","BODY"};
      
--- 45,48 ----
***************
*** 110,133 ****
      {
          return MATCH_ID;
-     }
- 
-     public boolean evaluate(Tag tag, TagScanner previousOpenScanner)
-     {
-         if (previousOpenScanner instanceof LinkScanner)
-         {
-             linkScannerAlreadyOpen = true;
-             StringBuffer msg= new StringBuffer();
-                 msg.append(tag.toHtml ());
-                 msg.append(PREVIOUS_DIRTY_LINK_MESSAGE);
-                 feedback.warning(msg.toString());
-                 // This is dirty HTML. Assume the current tag is
-                 // not a new link tag - but an end tag. This is actually a really wild bug -
-                 // Internet Explorer actually parses such tags.
-                 // So - we shall then proceed to fool the scanner into sending an endtag of type </A>
-                 // For this - set the dirty flag to true and return
-         }
-         else
-             linkScannerAlreadyOpen = false;
-         return super.evaluate(tag, previousOpenScanner);
      }
  
--- 108,111 ----

Index: ScriptScanner.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ScriptScanner.java,v
retrieving revision 1.45
retrieving revision 1.46
diff -C2 -d -r1.45 -r1.46
*** ScriptScanner.java	28 Oct 2003 03:04:18 -0000	1.45
--- ScriptScanner.java	28 Oct 2003 12:54:21 -0000	1.46
***************
*** 169,172 ****
--- 169,173 ----
  //TODO: use the factory:
              ret = createTag (lexer.getPage (), tag.elementBegin(), end.elementEnd(), tag.getAttributesEx (), tag, end, new NodeList (last));
+             ret.setThisScanner (this);
          }
          finally

Index: TagScanner.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TagScanner.java,v
retrieving revision 1.45
retrieving revision 1.46
diff -C2 -d -r1.45 -r1.46
*** TagScanner.java	27 Oct 2003 02:18:04 -0000	1.45
--- TagScanner.java	28 Oct 2003 12:54:21 -0000	1.46
***************
*** 70,138 ****
          Serializable
  {
!       /**
!        * A filter which is used to associate this tag. The filter contains a string
!        * that is used to match which tags are to be allowed to pass through. This can
!        * be useful when one wishes to dynamically filter out all tags except one type
!        * which may be programmed later than the parser. Is also useful for command line
!        * implementations of the parser.
!        */
!       protected String filter;
! 
!       /**
!        * HTMLParserFeedback object automatically initialized
!        */
!       protected ParserFeedback feedback;
!       /**
!        * Default Constructor, automatically registers the scanner into a static array of
!        * scanners inside Tag
!        */
!       public TagScanner()
!       {
!         this.filter="";
!       }
!       /**
!        * This constructor automatically registers the scanner, and sets the filter for this
!        * tag.
!        * @param filter The filter which will allow this tag to pass through.
!        */
!       public TagScanner(String filter)
!       {
!         this.filter=filter;
!       }
!         /**
!      * Insert the method's description here.
!      * Creation date: (6/4/2001 11:44:09 AM)
!      * @return java.lang.String
!      * @param c char
       */
!     public String absorb(String s,char c) {
!       int index = s.indexOf(c);
!       if (index!=-1)    s=s.substring(index+1,s.length());
!       return s;
      }
  
      /**
!      * Remove whitespace from the front of the given string.
!      * @param s The string to trim.
!      * @return Either the same string or a string with whitespace chopped off.
       */
!     public static String absorbLeadingBlanks (String s)
      {
!         int length;
!         int i;
!         String ret;
! 
!         i = 0;
!         length = s.length ();
!         while (i < length && Character.isWhitespace (s.charAt (i)))
!             i++;
!         if (0 == i)
!             ret = s;
!         else if (length == i)
!             ret = "";
!         else
!             ret = s.substring (i);
! 
!         return (ret);
      }
  
--- 70,99 ----
          Serializable
  {
!     /**
!      * A filter which is used to associate this tag. The filter contains a string
!      * that is used to match which tags are to be allowed to pass through. This can
!      * be useful when one wishes to dynamically filter out all tags except one type
!      * which may be programmed later than the parser. Is also useful for command line
!      * implementations of the parser.
       */
!     protected String filter;
!     
!     /**
!      * Default Constructor, automatically registers the scanner into a static array of
!      * scanners inside Tag
!      */
!     public TagScanner ()
!     {
!         this ("");
      }
  
      /**
!      * This constructor automatically registers the scanner, and sets the filter for this
!      * tag.
!      * @param filter The filter which will allow this tag to pass through.
       */
!     public TagScanner (String filter)
      {
!         this.filter=filter;
      }
  
***************
*** 153,356 ****
      }
      
!     /**
!      * Pull the text between two matching capitalized 'XML' tags.
!      * @deprecated This reads ahead on your iterator and doesn't put them back if it's not an XML tag.
!      */
!     public static String extractXMLData (Node node, String tagName, NodeIterator iterator)
!     throws
!     ParserException
      {
-         try
-         {
-             String xmlData = "";
-             
-             boolean xmlTagFound = isXMLTagFound (node, tagName);
-             if (xmlTagFound)
-             {
-                 try
-                 {
-                     do
-                     {
-                         node = iterator.nextNode ();
-                         if (node!=null)
-                         {
-                             if (node instanceof StringNode)
-                             {
-                                 StringNode stringNode = (StringNode)node;
-                                 if (xmlData.length ()>0)
-                                     xmlData+=" ";
-                                 xmlData += stringNode.getText ();
-                             }
-                             else
-                                 if (!(node instanceof Tag && ((Tag)node).isEndTag ()))
-                                     xmlTagFound = false;
-                         }
-                     }
-                     while (node instanceof StringNode);
-                     
-                 }
-                 
-                 catch (Exception e)
-                 {
-                     throw new ParserException ("TagScanner.extractXMLData() : error while trying to find xml tag",e);
-                 }
-             }
-             // check end tag matches start tag
-             if (xmlTagFound)
-             {
-                 if (node!=null)
-                 {
-                     if (node instanceof Tag && ((Tag)node).isEndTag ())
-                     {
-                         Tag endTag = (Tag)node;
-                         if (!endTag.getTagName ().equals (tagName))
-                             xmlTagFound = false;
-                     }
-                     
-                 }
-                 
-             }
-             if (xmlTagFound)
-                 return xmlData;
-             else
-                 return null;
-         }
-         catch (Exception e)
-         {
-             throw new ParserException ("TagScanner.extractXMLData() : Error occurred while trying to extract xml tag",e);
-         }
-     }
- 
-     public String getFilter() {
          return filter;
      }
  
-     public static boolean isXMLTagFound(Node node, String tagName) {
-         boolean xmlTagFound=false;
-         if (node instanceof Tag) {
-             Tag tag = (Tag)node;
-             if (tag.getText().toUpperCase().indexOf(tagName)==0) {
-               xmlTagFound=true;
-             }
-         }
-         return xmlTagFound;
-     }
- 
-     public final Tag createScannedNode(Tag tag,String url,Lexer lexer) throws ParserException {
-         Tag thisTag = scan(tag,url,lexer);
-         thisTag.setThisScanner(this);
-         thisTag.setAttributesEx(tag.getAttributesEx());
-         return thisTag;
-     }
- 
-     /**
-      * Override this method to create your own tag type
-      * @param tagData
-      * @param tag
-      * @param url
-      * @return Tag
-      * @throws ParserException
-      */
-     protected abstract Tag createTag(Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException;
- 
      /**
       * Scan the tag and extract the information related to this type. The url of the
       * initiating scan has to be provided in case relative links are found. The initial
       * url is then prepended to it to give an absolute link.
!      * The NodeReader is provided in order to do a lookahead operation. We assume that
       * the identification has already been performed using the evaluate() method.
!      * @param tag HTML Tag to be scanned for identification
!      * @param url The initiating url of the scan (Where the html page lies)
!      * @param reader The reader object responsible for reading the html page
       */
!     public Tag scan(Tag tag,String url,Lexer lexer) throws ParserException
!     {
!         return (createTag(lexer.getPage (), tag.elementBegin(), tag.elementEnd(), tag.getAttributesEx (), tag, url));
!     }
! 
!     public String removeChars(String s,String occur)  {
!         StringBuffer newString = new StringBuffer();
!         int index;
!         do {
!             index = s.indexOf(occur);
!             if (index!=-1) {
!                 newString.append(s.substring(0,index));
!                 s=s.substring(index+occur.length());
!             }
!         }
!         while (index!=-1);
!         newString.append(s);
!         return newString.toString();
!     }
! 
!     public abstract String [] getID();
! 
!     public final void setFeedback(ParserFeedback feedback) {
!         this.feedback = feedback;
!     }
! 
!     public static Map adjustScanners(Parser parser)
      {
!         Map ret;
! 
!         ret = parser.getScanners();
!         // Remove all existing scanners
!         parser.flushScanners();
  
          return (ret);
      }
  
-     public static void restoreScanners(Parser parser, Hashtable tempScanners)
-     {
-         // Flush the scanners
-         parser.setScanners(tempScanners);
-     }
- 
      /**
!      * Insert an EndTag in the currentLine, just before the occurence of the provided tag
       */
!     public String insertEndTagBeforeNode(AbstractNode node, String currentLine) {
!         String newLine = currentLine.substring(0,node.elementBegin());
!         newLine += "</A>";
!         newLine += currentLine.substring(node.elementBegin(),currentLine.length());
!         return newLine;
!     }
! 
! //    protected Tag getReplacedEndTag(Tag tag, NodeReader reader, String currentLine) {
! //        // Replace tag - it was a <A> tag - replace with </a>
! //        String newLine = replaceFaultyTagWithEndTag(tag, currentLine);
! //        reader.changeLine(newLine);
! //        return new EndTag(
! //            new TagData(
! //                tag.elementBegin(),
! //                tag.elementBegin()+3,
! //                tag.getTagName(),
! //                currentLine
! //            )
! //        );
! //    }
! 
!     public String replaceFaultyTagWithEndTag(Tag tag, String currentLine) {
!         String newLine = currentLine.substring(0,tag.elementBegin());
!         newLine+="</"+tag.getTagName()+">";
!         newLine+=currentLine.substring(tag.elementEnd()+1,currentLine.length());
! 
!         return newLine;
!     }
! 
! //    protected Tag getInsertedEndTag(Tag tag, String currentLine) {
! //        // Insert end tag
! //        String newLine = insertEndTagBeforeNode(tag, currentLine);
! //        reader.changeLine(newLine);
! //        return new EndTag(
! //            new TagData(
! //                tag.elementBegin(),
! //                tag.elementBegin()+3,
! //                tag.getTagName(),
! //                currentLine
! //            )
! //        );
! //    }
! 
  
  }
--- 114,154 ----
      }
      
!     public String getFilter()
      {
          return filter;
      }
  
      /**
       * Scan the tag and extract the information related to this type. The url of the
       * initiating scan has to be provided in case relative links are found. The initial
       * url is then prepended to it to give an absolute link.
!      * The Lexer is provided in order to do a lookahead operation. We assume that
       * the identification has already been performed using the evaluate() method.
!      * @param tag HTML Tag to be scanned for identification.
!      * @param url The initiating url of the scan (Where the html page lies).
!      * @param lexer Provides html page access.
!      * @return The resultant tag (may be unchanged).
       */
!     public Tag scan (Tag tag, String url, Lexer lexer) throws ParserException
      {
!         Tag ret;
!         
!         ret = createTag(lexer.getPage (), tag.elementBegin(), tag.elementEnd(), tag.getAttributesEx (), tag, url);
!         ret.setThisScanner(this);
  
          return (ret);
      }
  
      /**
!      * Create a tag.
!      * Override this method to create your own tag type.
!      * @param tagData
!      * @param tag
!      * @param url
!      * @return Tag
!      * @throws ParserException
       */
!     protected abstract Tag createTag(Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException;
  
+     public abstract String [] getID();
  }

[Htmlparser-cvs] htmlparser/src/org/htmlparser/scanners CompositeTagScanner.java,1.75,1.76 FormScann

[Htmlparser-cvs] htmlparser/src/org/htmlparser/scanners CompositeTagScanner.java,1.75,1.76 FormScanner.java,1.50,1.51 ScriptScanner.java,1.45,1.46 TagScanner.java,1.45,1.46