[Htmlparser-cvs] htmlparser/src/org/htmlparser/scanners FormScanner.java,1.47,1.48 LinkScanner.java,

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners
In directory sc8-pr-cvs1:/tmp/cvs-serv25308/scanners

Modified Files:
	FormScanner.java LinkScanner.java TagScanner.java 
	TitleScanner.java 
Log Message:
Some speed improvements; passing tags to evaluate, creating strings without string buffers, etc.


Index: FormScanner.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FormScanner.java,v
retrieving revision 1.47
retrieving revision 1.48
diff -C2 -d -r1.47 -r1.48
*** FormScanner.java	26 Oct 2003 19:46:19 -0000	1.47
--- FormScanner.java	27 Oct 2003 02:18:04 -0000	1.48
***************
*** 115,126 ****
      }
  
!     public boolean evaluate(String s, TagScanner previousOpenScanner)
      {
!         if (previousOpenScanner instanceof LinkScanner) {
              linkScannerAlreadyOpen = true;
              StringBuffer msg= new StringBuffer();
!                 msg.append("<");
!                 msg.append(s);
!                 msg.append(">");
                  msg.append(PREVIOUS_DIRTY_LINK_MESSAGE);
                  feedback.warning(msg.toString());
--- 115,125 ----
      }
  
!     public boolean evaluate(Tag tag, TagScanner previousOpenScanner)
      {
!         if (previousOpenScanner instanceof LinkScanner)
!         {
              linkScannerAlreadyOpen = true;
              StringBuffer msg= new StringBuffer();
!                 msg.append(tag.toHtml ());
                  msg.append(PREVIOUS_DIRTY_LINK_MESSAGE);
                  feedback.warning(msg.toString());
***************
*** 133,137 ****
          else
              linkScannerAlreadyOpen = false;
!         return super.evaluate(s, previousOpenScanner);
      }
  
--- 132,136 ----
          else
              linkScannerAlreadyOpen = false;
!         return super.evaluate(tag, previousOpenScanner);
      }
  

Index: LinkScanner.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/LinkScanner.java,v
retrieving revision 1.56
retrieving revision 1.57
diff -C2 -d -r1.56 -r1.57
*** LinkScanner.java	26 Oct 2003 19:46:20 -0000	1.56
--- LinkScanner.java	27 Oct 2003 02:18:04 -0000	1.57
***************
*** 89,117 ****
  
      /**
!      * Template Method, used to decide if this scanner can handle the Link tag type. If
!      * the evaluation returns true, the calling side makes a call to scan().
!      * @param s The complete text contents of the Tag.
!      * @param previousOpenScanner Indicates any previous scanner which hasnt completed, before the current
!      * scan has begun, and hence allows us to write scanners that can work with dirty html
       */
!     public boolean evaluate (String s, TagScanner previousOpenScanner)
      {
!         char ch;
!         boolean ret;
! 
!         // eat up leading blanks
!         s = absorbLeadingBlanks (s);
!         if (5 > s.length ())
!             ret = false;
!         else
!         {
!             ch = s.charAt (0);
!             if ((ch=='a' || ch=='A') && Character.isWhitespace (s.charAt (1)))
!                 ret = -1 != s.toUpperCase().indexOf ("HREF");
!             else
!                 ret = false;
!         }
! 
!         return (ret);
      }
  
--- 89,100 ----
  
      /**
!      * Check if we can handle this tag.
!      * @param tag The generic tag with the name A.
!      * @param previousOpenScanner Indicates any previous scanner which hasn't
!      * completed, before the current scan has begun.
       */
!     public boolean evaluate (Tag tag, TagScanner previousOpenScanner)
      {
!         return (null != tag.getAttributeEx ("HREF"));
      }
  

Index: TagScanner.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TagScanner.java,v
retrieving revision 1.44
retrieving revision 1.45
diff -C2 -d -r1.44 -r1.45
*** TagScanner.java	26 Oct 2003 19:46:21 -0000	1.44
--- TagScanner.java	27 Oct 2003 02:18:04 -0000	1.45
***************
*** 57,61 ****
   * <br>
   * If you wish to write your own scanner, then you must implement scan().
!  * You MAY implement evaluate() as well, if your evaluation logic is not based on a simple text match.
   * You MUST implement getID() - which identifies your scanner uniquely in the hashtable of scanners.
   *
--- 57,62 ----
   * <br>
   * If you wish to write your own scanner, then you must implement scan().
!  * You MAY implement evaluate() as well, if your evaluation logic is not based
!  * on a match of the tag name.
   * You MUST implement getID() - which identifies your scanner uniquely in the hashtable of scanners.
   *
***************
*** 136,221 ****
      }
  
!   /**
!    * This method is used to decide if this scanner can handle this tag type. If the
!    * evaluation returns true, the calling side makes a call to scan().
!    * <strong>This method has to be implemented meaningfully only if a first-word match with
!    * the scanner id does not imply a match (or extra processing needs to be done).
!    * Default returns true</strong>
!    * @param tagContents The complete text contents of the Tag.
!    * @param previousOpenScanner Indicates any previous scanner which hasnt completed, before the current
!    * scan has begun, and hence allows us to write scanners that can work with dirty html
!    */
!   public boolean evaluate(String tagContents,TagScanner previousOpenScanner) {
!     return true;
!   }
! 
!   /**
!    * Pull the text between two matching capitalized 'XML' tags.
!    * @deprecated This reads ahead on your iterator and doesn't put them back if it's not an XML tag.
!    */
!   public static String extractXMLData (Node node, String tagName, NodeIterator iterator)
      throws
!         ParserException
!   {
!       try
!       {
!           String xmlData = "";
!           
!           boolean xmlTagFound = isXMLTagFound (node, tagName);
!           if (xmlTagFound)
!           {
!               try
!               {
!                   do
!                   {
!                       node = iterator.nextNode ();
!                       if (node!=null)
!                       {
!                           if (node instanceof StringNode)
!                           {
!                               StringNode stringNode = (StringNode)node;
!                               if (xmlData.length ()>0)
!                                 xmlData+=" ";
!                               xmlData += stringNode.getText ();
!                           }
!                           else
!                               if (!(node instanceof Tag && ((Tag)node).isEndTag ()))
!                                 xmlTagFound = false;
!                       }
!                   }
!                   while (node instanceof StringNode);
!                   
!               }
!               
!               catch (Exception e)
!               {
!                   throw new ParserException ("TagScanner.extractXMLData() : error while trying to find xml tag",e);
!               }
!           }
!           // check end tag matches start tag
!           if (xmlTagFound)
!           {
!               if (node!=null)
!               {
!                   if (node instanceof Tag && ((Tag)node).isEndTag ())
!                   {
!                       Tag endTag = (Tag)node;
!                       if (!endTag.getTagName ().equals (tagName))
!                           xmlTagFound = false;
!                   }
!                   
!               }
!               
!           }
!           if (xmlTagFound)
!              return xmlData;
!           else
!               return null;
!       }
!       catch (Exception e)
!       {
!           throw new ParserException ("TagScanner.extractXMLData() : Error occurred while trying to extract xml tag",e);
!       }
!   }
  
      public String getFilter() {
--- 137,224 ----
      }
  
!     /**
!      * This method is used to decide if this scanner can handle this tag type. If the
!      * evaluation returns true, the calling side makes a call to scan().
!      * <strong>This method has to be implemented meaningfully only if a first-word match with
!      * the scanner id does not imply a match (or extra processing needs to be done).
!      * Default returns true</strong>
!      * @param tag The tag with a name that matches a value from {@link #getID}.
!      * @param previousOpenScanner Indicates any previous scanner which hasn't
!      * completed, before the current scan has begun, and hence allows us to
!      * write scanners that can work with dirty html.
!      */
!     public boolean evaluate (Tag tag, TagScanner previousOpenScanner)
!     {
!         return (true);
!     }
!     
!     /**
!      * Pull the text between two matching capitalized 'XML' tags.
!      * @deprecated This reads ahead on your iterator and doesn't put them back if it's not an XML tag.
!      */
!     public static String extractXMLData (Node node, String tagName, NodeIterator iterator)
      throws
!     ParserException
!     {
!         try
!         {
!             String xmlData = "";
!             
!             boolean xmlTagFound = isXMLTagFound (node, tagName);
!             if (xmlTagFound)
!             {
!                 try
!                 {
!                     do
!                     {
!                         node = iterator.nextNode ();
!                         if (node!=null)
!                         {
!                             if (node instanceof StringNode)
!                             {
!                                 StringNode stringNode = (StringNode)node;
!                                 if (xmlData.length ()>0)
!                                     xmlData+=" ";
!                                 xmlData += stringNode.getText ();
!                             }
!                             else
!                                 if (!(node instanceof Tag && ((Tag)node).isEndTag ()))
!                                     xmlTagFound = false;
!                         }
!                     }
!                     while (node instanceof StringNode);
!                     
!                 }
!                 
!                 catch (Exception e)
!                 {
!                     throw new ParserException ("TagScanner.extractXMLData() : error while trying to find xml tag",e);
!                 }
!             }
!             // check end tag matches start tag
!             if (xmlTagFound)
!             {
!                 if (node!=null)
!                 {
!                     if (node instanceof Tag && ((Tag)node).isEndTag ())
!                     {
!                         Tag endTag = (Tag)node;
!                         if (!endTag.getTagName ().equals (tagName))
!                             xmlTagFound = false;
!                     }
!                     
!                 }
!                 
!             }
!             if (xmlTagFound)
!                 return xmlData;
!             else
!                 return null;
!         }
!         catch (Exception e)
!         {
!             throw new ParserException ("TagScanner.extractXMLData() : Error occurred while trying to extract xml tag",e);
!         }
!     }
  
      public String getFilter() {

Index: TitleScanner.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TitleScanner.java,v
retrieving revision 1.32
retrieving revision 1.33
diff -C2 -d -r1.32 -r1.33
*** TitleScanner.java	26 Oct 2003 19:46:21 -0000	1.32
--- TitleScanner.java	27 Oct 2003 02:18:04 -0000	1.33
***************
*** 53,60 ****
      }
  
!     public boolean evaluate(String tagNameBeingChecked, TagScanner previousOpenScanner) {
!         absorbLeadingBlanks(tagNameBeingChecked);
!         return (tagNameBeingChecked.toUpperCase ().startsWith (MATCH_NAME[0]) && ((null == previousOpenScanner)
!         || !previousOpenScanner.getID ()[0].equals  ("TITLE")));
      }
  
--- 53,59 ----
      }
  
!     public boolean evaluate(Tag tag, TagScanner previousOpenScanner)
!     {
!         return ((null == previousOpenScanner) || !previousOpenScanner.getID ()[0].equals  ("TITLE"));
      }

[Htmlparser-cvs] htmlparser/src/org/htmlparser/scanners FormScanner.java,1.47,1.48 LinkScanner.java,

[Htmlparser-cvs] htmlparser/src/org/htmlparser/scanners FormScanner.java,1.47,1.48 LinkScanner.java,1.56,1.57 TagScanner.java,1.44,1.45 TitleScanner.java,1.32,1.33