htmlparser-cvs Mailing List for HTML Parser (Page 21)

Brought to you by: derrickoswald

htmlparser-cvs — syncmail email notification of CVS commits

You can subscribe to this list here.

2003	_Jan	_Feb	_Mar	_Apr	_May (141)	_Jun (108)	_Jul (66)	_Aug (127)	_Sep (155)	_Oct (149)	_Nov (72)	_Dec (72)
2004	_Jan (100)	_Feb (36)	_Mar (21)	_Apr (3)	_May (87)	_Jun (28)	_Jul (84)	_Aug (5)	_Sep (14)	_Oct	_Nov	_Dec
2005	_Jan (1)	_Feb (39)	_Mar (26)	_Apr (38)	_May (14)	_Jun (10)	_Jul	_Aug	_Sep (13)	_Oct (8)	_Nov (10)	_Dec
2006	_Jan	_Feb (1)	_Mar (17)	_Apr (20)	_May (28)	_Jun (24)	_Jul	_Aug	_Sep	_Oct	_Nov	_Dec
2015	_Jan	_Feb	_Mar (1)	_Apr	_May	_Jun	_Jul	_Aug	_Sep	_Oct	_Nov	_Dec

Flat | Threaded

<< < 1 .. 19 20 21 22 23 .. 61 > >> (Page 21 of 61)

[Htmlparser-cvs] htmlparser/src/org/htmlparser/scanners StyleScanner.java,1.32,1.33

From: <der...@us...> - 2004-02-29 14:34:35

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8741/scanners

Added Files:
	StyleScanner.java 
Log Message:
Fix bug #900125 Style Tag Children not grouped
Added StyleScanner, a near copy of ScriptScanner.
Added testStyleChildren() in StyleTagTest to check it's operation.

[Htmlparser-cvs] htmlparser/src/org/htmlparser/tags StyleTag.java,1.34,1.35

From: <der...@us...> - 2004-02-29 14:34:35

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8741/tags

Modified Files:
	StyleTag.java 
Log Message:
Fix bug #900125 Style Tag Children not grouped
Added StyleScanner, a near copy of ScriptScanner.
Added testStyleChildren() in StyleTagTest to check it's operation.



Index: StyleTag.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/StyleTag.java,v
retrieving revision 1.34
retrieving revision 1.35
diff -C2 -d -r1.34 -r1.35
*** StyleTag.java	2 Jan 2004 16:24:55 -0000	1.34
--- StyleTag.java	29 Feb 2004 14:16:27 -0000	1.35
***************
*** 27,30 ****
--- 27,32 ----
  package org.htmlparser.tags;
  
+ import org.htmlparser.scanners.StyleScanner;
+ 
  /**
   * A StyleTag represents a &lt;style&gt; tag.
***************
*** 38,45 ****
--- 40,53 ----
  
      /**
+      * The set of end tag names that indicate the end of this tag.
+      */
+     private static final String[] mEndTagEnders = new String[] {"BODY", "HTML"};
+ 
+     /**
       * Create a new style tag.
       */
      public StyleTag ()
      {
+         setThisScanner (new StyleScanner ());
      }
  
***************
*** 54,79 ****
  
      /**
       * Get the style data in this tag.
       * @return The HTML of the children of this tag.
       */
!     public String getStyleCode()
      {
!         return getChildrenHTML();
      }
  
      /**
       * Print the contents of the style node.
       */
      public String toString()
      {
!         String guts = toHtml();
!         guts = guts.substring (1, guts.length () - 2);
!         StringBuffer sb = new StringBuffer();
!         sb.append("Style Node : \n");
!         sb.append("\n");
!         sb.append("Code\n");
!         sb.append("****\n");
!         sb.append(guts+"\n");
!         return sb.toString();
      }
  }
--- 62,100 ----
  
      /**
+      * Return the set of end tag names that cause this tag to finish.
+      * @return The names of following end tags that stop further scanning.
+      */
+     public String[] getEndTagEnders ()
+     {
+         return (mEndTagEnders);
+     }
+ 
+     /**
       * Get the style data in this tag.
       * @return The HTML of the children of this tag.
       */
!     public String getStyleCode ()
      {
!         return (getChildrenHTML ());
      }
  
      /**
       * Print the contents of the style node.
+      * @return A string suitable for debugging or a printout.
       */
      public String toString()
      {
!         String guts;
!         StringBuffer ret;
!         
!         ret = new StringBuffer ();
! 
!         guts = toHtml ();
!         guts = guts.substring (1, guts.length () - 1);
!         ret.append ("Style node :\n");
!         ret.append (guts);
!         ret.append ("\n");
! 
!         return (ret.toString ());
      }
  }

[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/tagTests StyleTagTest.java,1.35,1.36

From: <der...@us...> - 2004-02-29 14:34:35

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8741/tests/tagTests

Modified Files:
	StyleTagTest.java 
Log Message:
Fix bug #900125 Style Tag Children not grouped
Added StyleScanner, a near copy of ScriptScanner.
Added testStyleChildren() in StyleTagTest to check it's operation.



Index: StyleTagTest.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/StyleTagTest.java,v
retrieving revision 1.35
retrieving revision 1.36
diff -C2 -d -r1.35 -r1.36
*** StyleTagTest.java	2 Jan 2004 16:24:57 -0000	1.35
--- StyleTagTest.java	29 Feb 2004 14:16:27 -0000	1.36
***************
*** 28,31 ****
--- 28,32 ----
  
  import org.htmlparser.Parser;
+ import org.htmlparser.StringNode;
  import org.htmlparser.tags.HeadTag;
  import org.htmlparser.tags.Html;
***************
*** 65,69 ****
          "</STYLE>";
          createParser(style);
-         Parser.setLineSeparator("\r\n");
          parseAndAssertNodeCount(1);
          assertTrue(node[0] instanceof StyleTag);
--- 66,69 ----
***************
*** 130,132 ****
--- 130,163 ----
          assertStringEquals("Expected Style Code",expectedCode,styleTag.getStyleCode());
      }
+     
+     /**
+      * See bug #900125 Style Tag Children not grouped
+      */
+     public void testStyleChildren () throws ParserException
+     {
+         String style =
+             "\nbody {color:white}\n" +
+             "<!--\n" +
+             ".teliabox {\n" +
+             "color: #A9014E;\n" +
+             "text-align: center;\n" +
+             "background-image:url(hallo.gif);\n" +
+             "}\n" +
+             "-->";
+         String html =
+             "<style type=\"text/css\" media=\"screen\">" +
+             style +
+             "</style>";
+         StyleTag tag;
+         StringNode string;
+ 
+         createParser (html);
+         parseAndAssertNodeCount (1);
+         assertTrue ("Node should be a STYLE tag", node[0] instanceof StyleTag);
+         tag = (StyleTag)node[0];
+         assertTrue ("STYLE tag should have one child", 1 == tag.getChildCount ());
+         assertTrue ("Child should be a StringNode", tag.getChild (0) instanceof StringNode);
+         string = (StringNode)tag.getChild (0);
+         assertStringEquals ("Style text incorrect", style, string.toHtml ());
+     }
  }

[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests ParserTest.java,1.56,1.57

From: <der...@us...> - 2004-02-29 13:10:26

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25994/tests

Modified Files:
	ParserTest.java 
Log Message:
Fix bug #900128 RemarkNode.setText() does not set Text
Add override setText() to StringNode and RemarkNode.
Add unit tests to excercise the new code.
Remove remaining XX_FILTER constants.



Index: ParserTest.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTest.java,v
retrieving revision 1.56
retrieving revision 1.57
diff -C2 -d -r1.56 -r1.57
*** ParserTest.java	25 Jan 2004 21:33:12 -0000	1.56
--- ParserTest.java	29 Feb 2004 12:52:21 -0000	1.57
***************
*** 42,45 ****
--- 42,46 ----
  import org.htmlparser.Parser;
  import org.htmlparser.PrototypicalNodeFactory;
+ import org.htmlparser.RemarkNode;
  import org.htmlparser.StringNode;
  import org.htmlparser.filters.NodeClassFilter;
***************
*** 884,886 ****
--- 885,958 ----
          }
      }
+     
+     /**
+      * See bug #900128 RemarkNode.setText() does not set Text
+      */
+     public void testSetStringText () throws Exception
+     {
+         String text;
+         String html;
+         String newtext;
+         String newhtml;
+         Node txt;
+ 
+         text = "This is just text.";
+         html = "<body>" + text + "</body>";
+         newtext = "This is different text.";
+         newhtml = "<body>" + newtext + "</body>";
+         createParser (html);
+         parseAndAssertNodeCount (1);
+         assertStringEquals ("html wrong", html, node[0].toHtml ());
+         assertTrue ("wrong number of children", 1 == node[0].getChildren ().size ());
+         assertTrue ("string node expected", node[0].getChildren ().elementAt (0) instanceof StringNode);
+         txt = node[0].getChildren ().elementAt (0);
+         assertStringEquals ("string html wrong", text, txt.toHtml ());
+         assertStringEquals ("string contents wrong", text, txt.getText ());
+         assertTrue ("toString wrong", txt.toString ().endsWith (text));
+         txt.setText (newtext);
+         assertStringEquals ("html wrong", newhtml, node[0].toHtml ());
+         assertStringEquals ("new string html wrong", newtext, txt.toHtml ());
+         assertStringEquals ("new string contents wrong", newtext, txt.getText ());
+         assertTrue ("toString wrong", txt.toString ().endsWith (newtext));
+     }
+ 
+     /**
+      * See bug #900128 RemarkNode.setText() does not set Text
+      */
+     public void testSetRemarkText () throws Exception
+     {
+         String text;
+         String remark;
+         String html;
+         String newtext;
+         String newremark;
+         String newhtml;
+         Node rem;
+ 
+         text = " This is a remark. ";
+         remark = "<!--" + text + "-->";
+         html = "<body>" + remark + "</body>";
+         newtext = " This is a different remark. ";
+         newremark = "<!--" + newtext + "-->";
+         newhtml = "<body>" + newremark + "</body>";
+         createParser (html);
+         parseAndAssertNodeCount (1);
+         assertStringEquals ("html wrong", html, node[0].toHtml ());
+         assertTrue ("wrong number of children", 1 == node[0].getChildren ().size ());
+         assertTrue ("remark node expected", node[0].getChildren ().elementAt (0) instanceof RemarkNode);
+         rem = node[0].getChildren ().elementAt (0);
+         assertStringEquals ("remark html wrong", remark, rem.toHtml ());
+         assertStringEquals ("remark contents wrong", text, rem.getText ());
+         assertTrue ("toString wrong", rem.toString ().endsWith (text));
+         rem.setText (newtext);
+         assertStringEquals ("html wrong", newhtml, node[0].toHtml ());
+         assertStringEquals ("new remark html wrong", newremark, rem.toHtml ());
+         assertStringEquals ("new remark contents wrong", newtext, rem.getText ());
+         assertTrue ("toString wrong", rem.toString ().endsWith (newtext));
+         rem.setText (newremark);
+         assertStringEquals ("html wrong", newhtml, node[0].toHtml ());
+         assertStringEquals ("new remark html wrong", newremark, rem.toHtml ());
+         assertStringEquals ("new remark contents wrong", newtext, rem.getText ());
+         assertTrue ("toString wrong", rem.toString ().endsWith (newtext));
+     }
  }

[Htmlparser-cvs] htmlparser/src/org/htmlparser/tags ImageTag.java,1.42,1.43 LinkTag.java,1.47,1.48

From: <der...@us...> - 2004-02-29 13:10:26

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25994/tags

Modified Files:
	ImageTag.java LinkTag.java 
Log Message:
Fix bug #900128 RemarkNode.setText() does not set Text
Add override setText() to StringNode and RemarkNode.
Add unit tests to excercise the new code.
Remove remaining XX_FILTER constants.



Index: ImageTag.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/ImageTag.java,v
retrieving revision 1.42
retrieving revision 1.43
diff -C2 -d -r1.42 -r1.43
*** ImageTag.java	25 Jan 2004 21:33:12 -0000	1.42
--- ImageTag.java	29 Feb 2004 12:52:21 -0000	1.43
***************
*** 39,44 ****
  public class ImageTag extends Tag
  {
-     public static final String IMAGE_TAG_FILTER="-i";
- 
      /**
       * The set of names handled by this tag.
--- 39,42 ----

Index: LinkTag.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LinkTag.java,v
retrieving revision 1.47
retrieving revision 1.48
diff -C2 -d -r1.47 -r1.48
*** LinkTag.java	2 Jan 2004 16:24:55 -0000	1.47
--- LinkTag.java	29 Feb 2004 12:52:21 -0000	1.48
***************
*** 37,42 ****
  public class LinkTag extends CompositeTag
  {
-     public static final String LINK_TAG_FILTER="-l";
- 
      /**
       * The set of names handled by this tag.
--- 37,40 ----

[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer/nodes RemarkNode.java,1.16,1.17 StringNode.java,1.17,1.18

From: <der...@us...> - 2004-02-29 13:10:25

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25994/lexer/nodes

Modified Files:
	RemarkNode.java StringNode.java 
Log Message:
Fix bug #900128 RemarkNode.setText() does not set Text
Add override setText() to StringNode and RemarkNode.
Add unit tests to excercise the new code.
Remove remaining XX_FILTER constants.



Index: RemarkNode.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/RemarkNode.java,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -d -r1.16 -r1.17
*** RemarkNode.java	2 Jan 2004 16:24:53 -0000	1.16
--- RemarkNode.java	29 Feb 2004 12:52:20 -0000	1.17
***************
*** 38,56 ****
  public class RemarkNode extends AbstractNode
  {
!     public final static String REMARK_NODE_FILTER="-r";
  
      /**
!      * Constructor takes in the text string, beginning and ending posns.
!      * @param page The page this string is on.
!      * @param start The beginning position of the string.
!      * @param end The ending positiong of the string.
       */
      public RemarkNode (Page page, int start, int end)
      {
          super (page, start, end);
      }
  
      /**
       * Returns the text contents of the comment tag.
       */
      public String getText()
--- 38,72 ----
  public class RemarkNode extends AbstractNode
  {
!     /**
!      * The contents of the remark node, or override text.
!      */
!     protected String mText;
  
      /**
!      * Constructor takes in the text string.
!      * @param text The string node text. For correct generation of HTML, this
!      * should not contain representations of tags (unless they are balanced).
!      */
!     public RemarkNode (String text)
!     {
!         super (null, 0, 0);
!         setText (text);
!     }
! 
!     /**
!      * Constructor takes in the page and beginning and ending posns.
!      * @param page The page this remark is on.
!      * @param start The beginning position of the remark.
!      * @param end The ending positiong of the remark.
       */
      public RemarkNode (Page page, int start, int end)
      {
          super (page, start, end);
+         mText = null;
      }
  
      /**
       * Returns the text contents of the comment tag.
+      * @return The contents of the text inside the comment delimiters.
       */
      public String getText()
***************
*** 60,73 ****
          String ret;
  
!         start = getStartPosition () + 4;
!         end = getEndPosition () - 3;
!         if (start >= end)
!             ret = "";
          else
!             ret = mPage.getText (start, end);
  
          return (ret);
      }
  
      public String toPlainTextString()
      {
--- 76,108 ----
          String ret;
  
!         if (null == mText)
!         {
!             start = getStartPosition () + 4; // <!--
!             end = getEndPosition () - 3; // -->
!             if (start >= end)
!                 ret = "";
!             else
!                 ret = mPage.getText (start, end);
!         }
          else
!             ret = mText;
  
          return (ret);
      }
  
+     /**
+      * Sets the string contents of the node.
+      * If the text has the remark delimiters (&lt;!-- --&gt;), these are stripped off.
+      * @param text The new text for the node.
+      */
+     public void setText (String text)
+     {
+         mText = text;
+         if (text.startsWith ("<!--") && text.endsWith ("-->"))
+             mText = text.substring (4, text.length () - 3);
+         nodeBegin = 0;
+         nodeEnd = mText.length ();
+     }
+ 
      public String toPlainTextString()
      {
***************
*** 77,85 ****
      public String toHtml()
      {
!         return (mPage.getText (getStartPosition (), getEndPosition ()));
      }
  
      /**
       * Print the contents of the remark tag.
       */
      public String toString()
--- 112,138 ----
      public String toHtml()
      {
!         StringBuffer buffer;
!         String ret;
!         
!         if (null == mText)
!             ret = mPage.getText (getStartPosition (), getEndPosition ());
!         else
!         {
!             buffer = new StringBuffer (mText.length () + 7);
!             buffer.append ("<!--");
!             buffer.append (mText);
!             buffer.append ("-->");
!             ret = buffer.toString ();
!         }
! 
!         return (ret);
      }
  
      /**
       * Print the contents of the remark tag.
+      * This is suitable for display in a debugger or output to a printout.
+      * Control characters are replaced by their equivalent escape
+      * sequence and contents is truncated to 80 characters.
+      * @return A string representation of the remark node.
       */
      public String toString()
***************
*** 95,110 ****
          endpos = getEndPosition ();
          ret = new StringBuffer (endpos - startpos + 20);
!         start = new Cursor (getPage (), startpos);
!         end = new Cursor (getPage (), endpos);
!         ret.append ("Rem (");
!         ret.append (start);
!         ret.append (",");
!         ret.append (end);
!         ret.append ("): ");
!         while (start.getPosition () < endpos)
          {
!             try
              {
!                 c = mPage.getCharacter (start);
                  switch (c)
                  {
--- 148,203 ----
          endpos = getEndPosition ();
          ret = new StringBuffer (endpos - startpos + 20);
!         if (null == mText)
          {
!             start = new Cursor (getPage (), startpos);
!             end = new Cursor (getPage (), endpos);
!             ret.append ("Rem (");
!             ret.append (start);
!             ret.append (",");
!             ret.append (end);
!             ret.append ("): ");
!             start.setPosition (startpos + 4); // <!--
!             endpos -= 3; // -->
!             while (start.getPosition () < endpos)
              {
!                 try
!                 {
!                     c = mPage.getCharacter (start);
!                     switch (c)
!                     {
!                         case '\t':
!                             ret.append ("\\t");
!                             break;
!                         case '\n':
!                             ret.append ("\\n");
!                             break;
!                         case '\r':
!                             ret.append ("\\r");
!                             break;
!                         default:
!                             ret.append (c);
!                     }
!                 }
!                 catch (ParserException pe)
!                 {
!                     // not really expected, but we're only doing toString, so ignore
!                 }
!                 if (77 <= ret.length ())
!                 {
!                     ret.append ("...");
!                     break;
!                 }
!             }
!         }
!         else
!         {
!             ret.append ("Rem (");
!             ret.append (startpos);
!             ret.append (",");
!             ret.append (endpos);
!             ret.append ("): ");
!             while (startpos < endpos)
!             {
!                 c = mText.charAt (startpos);
                  switch (c)
                  {
***************
*** 121,133 ****
                          ret.append (c);
                  }
!             }
!             catch (ParserException pe)
!             {
!                 // not really expected, but we'return only doing toString, so ignore
!             }
!             if (77 <= ret.length ())
!             {
!                 ret.append ("...");
!                 break;
              }
          }
--- 214,223 ----
                          ret.append (c);
                  }
!                 if (77 <= ret.length ())
!                 {
!                     ret.append ("...");
!                     break;
!                 }
!                 startpos++;
              }
          }

Index: StringNode.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/StringNode.java,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** StringNode.java	2 Jan 2004 16:24:53 -0000	1.17
--- StringNode.java	29 Feb 2004 12:52:20 -0000	1.18
***************
*** 38,45 ****
  public class StringNode extends AbstractNode
  {
!     public static final String STRING_FILTER = "-string";
  
      /**
!      * Constructor takes in the text string, beginning and ending posns.
       * @param page The page this string is on.
       * @param start The beginning position of the string.
--- 38,59 ----
  public class StringNode extends AbstractNode
  {
!     /**
!      * The contents of the string node, or override text.
!      */
!     protected String mText;
  
      /**
!      * Constructor takes in the text string.
!      * @param text The string node text. For correct generation of HTML, this
!      * should not contain representations of tags (unless they are balanced).
!      */
!     public StringNode (String text)
!     {
!         super (null, 0, 0);
!         setText (text);
!     }
! 
!     /**
!      * Constructor takes in the page and beginning and ending posns.
       * @param page The page this string is on.
       * @param start The beginning position of the string.
***************
*** 49,52 ****
--- 63,67 ----
      {
          super (page, start, end);
+         mText = null;
      }
  
***************
*** 65,81 ****
      public void setText (String text)
      {
!         mPage = new Page (text);
          nodeBegin = 0;
!         nodeEnd = text.length ();
!         // TODO: this really needs work
!         try
!         {
!             Cursor cursor = new Cursor (mPage, nodeBegin);
!             for (int i = nodeBegin; i < nodeEnd; i++)
!                 mPage.getCharacter (cursor);
!         }
!         catch (ParserException pe)
!         {
!         }
      }
  
--- 80,86 ----
      public void setText (String text)
      {
!         mText = text;
          nodeBegin = 0;
!         nodeEnd = mText.length ();
      }
  
***************
*** 87,91 ****
      public String toHtml ()
      {
!         return (mPage.getText (getStartPosition (), getEndPosition ()));
      }
  
--- 92,102 ----
      public String toHtml ()
      {
!         String ret;
!         
!         ret = mText;
!         if (null == ret)
!             ret = mPage.getText (getStartPosition (), getEndPosition ());
! 
!         return (ret);
      }
  
***************
*** 109,124 ****
          endpos = getEndPosition ();
          ret = new StringBuffer (endpos - startpos + 20);
!         start = new Cursor (getPage (), startpos);
!         end = new Cursor (getPage (), endpos);
!         ret.append ("Txt (");
!         ret.append (start);
!         ret.append (",");
!         ret.append (end);
!         ret.append ("): ");
!         while (start.getPosition () < endpos)
          {
!             try
              {
!                 c = mPage.getCharacter (start);
                  switch (c)
                  {
--- 120,173 ----
          endpos = getEndPosition ();
          ret = new StringBuffer (endpos - startpos + 20);
!         if (null == mText)
          {
!             start = new Cursor (getPage (), startpos);
!             end = new Cursor (getPage (), endpos);
!             ret.append ("Txt (");
!             ret.append (start);
!             ret.append (",");
!             ret.append (end);
!             ret.append ("): ");
!             while (start.getPosition () < endpos)
              {
!                 try
!                 {
!                     c = mPage.getCharacter (start);
!                     switch (c)
!                     {
!                         case '\t':
!                             ret.append ("\\t");
!                             break;
!                         case '\n':
!                             ret.append ("\\n");
!                             break;
!                         case '\r':
!                             ret.append ("\\r");
!                             break;
!                         default:
!                             ret.append (c);
!                     }
!                 }
!                 catch (ParserException pe)
!                 {
!                     // not really expected, but we're only doing toString, so ignore
!                 }
!                 if (77 <= ret.length ())
!                 {
!                     ret.append ("...");
!                     break;
!                 }
!             }
!         }
!         else
!         {
!             ret.append ("Txt (");
!             ret.append (startpos);
!             ret.append (",");
!             ret.append (endpos);
!             ret.append ("): ");
!             while (startpos < endpos)
!             {
!                 c = mText.charAt (startpos);
                  switch (c)
                  {
***************
*** 135,147 ****
                          ret.append (c);
                  }
!             }
!             catch (ParserException pe)
!             {
!                 // not really expected, but we'return only doing toString, so ignore
!             }
!             if (77 <= ret.length ())
!             {
!                 ret.append ("...");
!                 break;
              }
          }
--- 184,193 ----
                          ret.append (c);
                  }
!                 if (77 <= ret.length ())
!                 {
!                     ret.append ("...");
!                     break;
!                 }
!                 startpos++;
              }
          }

[Htmlparser-cvs] htmlparser/src/org/htmlparser/tags ScriptTag.java,1.36,1.37

From: <der...@us...> - 2004-02-29 01:56:19

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19426

Modified Files:
	ScriptTag.java 
Log Message:
Correct booboo in ScriptTag toHtml() injected by fix to bug #902121.



Index: ScriptTag.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/ScriptTag.java,v
retrieving revision 1.36
retrieving revision 1.37
diff -C2 -d -r1.36 -r1.37
*** ScriptTag.java	28 Feb 2004 15:52:43 -0000	1.36
--- ScriptTag.java	29 Feb 2004 01:38:36 -0000	1.37
***************
*** 27,31 ****
--- 27,33 ----
  package org.htmlparser.tags;
  
+ import org.htmlparser.Node;
  import org.htmlparser.scanners.ScriptScanner;
+ import org.htmlparser.util.SimpleNodeIterator;
  
  /**
***************
*** 136,161 ****
      }
  
!     /**
!      * Render the tag as HTML.
!      * @return The tag as an HTML fragment.
!      * @see org.htmlparser.Node#toHtml()
!      */
!     public String toHtml()
      {
!         StringBuffer ret;
!         
!         ret = new StringBuffer ();
!         ret.append (super.toHtml ());
!         if (!isEmptyXmlTag ())
!         {
!             if (null != getScriptCode ())
!                 ret.append (getScriptCode ());
!             else
!                 putChildrenInto (ret);
!             if (null != getEndTag ())
!                 putEndTagInto (ret);
!         }
  
!         return (ret.toString());
      }
  
--- 138,155 ----
      }
  
!     protected void putChildrenInto(StringBuffer sb)
      {
!         Node node;
  
!         if (null != getScriptCode ())
!             sb.append (getScriptCode ());
!         else
!             for (SimpleNodeIterator e = children (); e.hasMoreNodes ();)
!             {
!                 node = e.nextNode ();
!                 // eliminate virtual tags
!     //            if (!(node.getStartPosition () == node.getEndPosition ()))
!                     sb.append (node.toHtml ());
!             }
      }

[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/scannersTests ScriptScannerTest.java,1.52,1.53

From: <der...@us...> - 2004-02-28 16:10:13

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv977/tests/scannersTests

Modified Files:
	ScriptScannerTest.java 
Log Message:
Fix bug #902121 StringBean throws NullPointerException.
Added ScriptDecoder to handle Microsoft Script Encoder encrypted tags.
Added accessor to ScriptTag's scriptCode property to be able to override it.
Ensured that a Tag always has a non-null name.
Skip STYLE tags in StringBean, just like SCRIPT.



Index: ScriptScannerTest.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/ScriptScannerTest.java,v
retrieving revision 1.52
retrieving revision 1.53
diff -C2 -d -r1.52 -r1.53
*** ScriptScannerTest.java	14 Jan 2004 02:53:47 -0000	1.52
--- ScriptScannerTest.java	28 Feb 2004 15:52:44 -0000	1.53
***************
*** 27,38 ****
--- 27,44 ----
  package org.htmlparser.tests.scannersTests;
  
+ import java.io.IOException;
+ import java.io.StringReader;
  import java.util.Hashtable;
  
  import org.htmlparser.Node;
  import org.htmlparser.Parser;
+ import org.htmlparser.filters.TagNameFilter;
+ import org.htmlparser.lexer.Lexer;
+ import org.htmlparser.scanners.ScriptDecoder;
  import org.htmlparser.tags.BodyTag;
  import org.htmlparser.tags.ScriptTag;
  import org.htmlparser.tests.ParserTestCase;
  import org.htmlparser.util.NodeIterator;
+ import org.htmlparser.util.NodeList;
  import org.htmlparser.util.ParserException;
  
***************
*** 579,581 ****
--- 585,673 ----
          assertStringEquals ("bad html", teststring, htmlBuffer.toString ());
      }
+ 
+     /**
+      * See bug #902121 StringBean throws NullPointerException
+      * Contributed by Reza Motori (rezamotori)
+      */
+     public void testDecodeScript ()
+         throws ParserException
+     {
+         String plaintext =
+             "<HTML>\n" +
+             "<HEAD>\n" +
+             "<TITLE>Script Encoder Sample Page</TITLE>\n" +
+             "<SCRIPT LANGUAGE=\"JScript.Encode\">\n" +
+             "<!--//\n" +
+             "//CopyrightÂ© 1998 Microsoft Corporation. All Rights Reserved.\n" +
+             "//**Start Encode**\r\n" +
+             "function verifyCorrectBrowser(){\r\n" +
+             "  if(navigator.appName == \"Microsoft Internet Explorer\")\r\n" +
+             "    if (navigator.appVersion.indexOf (\"5.\") >= 0)\r\n" +
+             "      return(true);\r\n" +
+             "    else\r\n" +
+             "      return(false);\r\n" +
+             "}\r\n" +
+             "function getAppropriatePage(){\r\n" +
+             "  var str1 = \"Had this been an actual Web site, a page compatible with \";\r\n" +
+             "  var str2 = \"browsers other than \";\r\n" +
+             "  var str3 = \"Microsoft Internet Explorer 5.0 \";\r\n" +
+             "  var str4 = \"would have been loaded.\";\r\n" +
+             "  if (verifyCorrectBrowser())\r\n" +
+             "    document.write(str1 + str3 + str4);\r\n" +
+             "  else\r\n" +
+             "    document.write(str1 + str2 + str3 + str4);\r\n" +
+             "}\r\n" +
+             "//-->\r\n" +
+             "</SCRIPT>\n" +
+             "</HEAD>\n" +
+             "<BODY onload=\"getAppropriatePage()\">\n" +
+             "</BODY>\n" +
+             "</HTML>";
+         String cryptext =
+             "<HTML>\n" +
+             "<HEAD>\n" +
+             "<TITLE>Script Encoder Sample Page</TITLE>\n" +
+             "<SCRIPT LANGUAGE=\"JScript.Encode\">\n" +
+             "<!--//\n" +
+             "//CopyrightÂ© 1998 Microsoft Corporation. All Rights Reserved.\n" +
+             "//**Start Encode**#@~^ZwIAAA==@#@&0;	mDkW	P7nDb0zZKD.n1YAMGhk+Dvb`@#@&P,kW`UC7kLlDGDcl22gl:n~{'~Jtr1DGkW6YP&xDnD	+OPA62sKD+ME#@#@&P,~~k6PvxC\\rLmYGDcCwa.n.kkWU bx[+X66Pcr*cJ#,@*{~!*@#@&P,P~~,D+D;D	`YM;n#p@#@&P~P~n^/n@#@&~P,P~~M+Y;.	`Wl^d#I@#@&)@#@&6E	^YbWUPT+O)awDK2DblYKCo`*	@#@&~~7l.PkOD8Px~rCl[~Dtr/,8+U,l	Pl1Y!CV,n4,/rO~Pm~wmo+,^G:alDk8Vn~SkOt,Ei@#@&~~7lD~dDD+P{~r4.Khk+DkPKOtD~Y4lU~ri@#@&~P7lD,dOD2P{PEHr^MWdW6OP&xOnMx+O~A62VK.D~lRZPJp@#@&~P7l.PkY.*,'PrAW!VN,4C\\P(+nx~sKl[+9 Jp@#@&~,k0~c7+.k6z;W.M+1YAMWSd+M`b#@#@&~~,PNK^Es+xD ADbY`dY.q,_~/D.&,_~dDDcbI@#@&~Psk+@#@&P,PP9W1;:xORSDrO`/D.F,_PkO. ,_,/ODf~3PdYM*#p@#@&N@#@&z&R @*@#@&qrIAAA==^#~@</SCRIPT>\n" +
+             "</HEAD>\n" +
+             "<BODY onload=\"getAppropriatePage()\">\n" +
+             "</BODY>\n" +
+             "</HTML>";
+         Lexer lexer;
+         
+         lexer = new Lexer (cryptext);
+         ScriptDecoder.LAST_STATE = ScriptDecoder.STATE_INITIAL; // read everything
+         try
+         {
+             String result = ScriptDecoder.Decode (lexer.getPage (), lexer.getCursor ());
+             assertStringEquals ("decoding failed", plaintext, result);
+         }
+         finally
+         {
+             ScriptDecoder.LAST_STATE = ScriptDecoder.STATE_DONE;
+         }
+     }
+     
+     /**
+      * See bug #902121 StringBean throws NullPointerException
+      * Contributed by Reza Motori (rezamotori)
+      */
+     public void testDecodePage ()
+         throws ParserException
+     {
+         String url = "http://htmlparser.sourceforge.net/test/EncryptedScriptExample.html";
+         String plaintext =
+             "\r\n" +
+             "var nows = new Date();\r\n" +
+             "var nIndexs = nows.getTime();\r\n" +
+             "document.write(\"<img src=\\\"http://www.parsads.com/adserve/scriptinject.asp?F=4&Z=3,4,5,10,12&N=1&U=644&O=&nocache=\"  + nIndexs + \"\\\" width=\\\"1\\\" hight=\\\"1\\\"><img src=\\\"http://www.parsads.com/adserve/scriptinject.asp?F=4&Z=3,4,5,10,12&N=1&U=643&O=&nocache=\"  + nIndexs + \"\\\" width=\\\"1\\\" hight=\\\"1\\\"><img src=\\\"http://www.parsads.com/adserve/scriptinject.asp?F=4&Z=3,4,5,10,12&N=1&U=324&O=&nocache=\"  + nIndexs + \"\\\" width=\\\"1\\\" hight=\\\"1\\\">\");\r\n";
+         
+         parser = new Parser (url);
+         NodeList scripts = parser.extractAllNodesThatMatch (new TagNameFilter ("SCRIPT"));
+         assertEquals ("wrong number of scripts found", 2, scripts.size ());
+         ScriptTag script = (ScriptTag)scripts.elementAt (1);
+         assertStringEquals ("script not decoded correctly", plaintext, script.getScriptCode ());
+     }
  }

[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests ParserTestCase.java,1.44,1.45

From: <der...@us...> - 2004-02-28 16:10:13

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv977/tests

Modified Files:
	ParserTestCase.java 
Log Message:
Fix bug #902121 StringBean throws NullPointerException.
Added ScriptDecoder to handle Microsoft Script Encoder encrypted tags.
Added accessor to ScriptTag's scriptCode property to be able to override it.
Ensured that a Tag always has a non-null name.
Skip STYLE tags in StringBean, just like SCRIPT.



Index: ParserTestCase.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTestCase.java,v
retrieving revision 1.44
retrieving revision 1.45
diff -C2 -d -r1.44 -r1.45
*** ParserTestCase.java	14 Jan 2004 02:53:47 -0000	1.44
--- ParserTestCase.java	28 Feb 2004 15:52:43 -0000	1.45
***************
*** 108,122 ****
          if (expected.length() < actual.length()) {
              mismatchInfo = "\n\nACTUAL result has "+(actual.length()-expected.length())+" extra characters at the end. They are :";
! 
!             for (int i = expected.length(); i < actual.length(); i++) {
                  mismatchInfo += ("\nPosition : " + i + " , Code = " + (int) actual.charAt(i));
!             }
          } else if(expected.length() > actual.length()) {
              mismatchInfo = "\n\nEXPECTED result has "+(expected.length()-actual.length())+" extra characters at the end. They are :";
! 
!             for (int i = actual.length(); i < expected.length(); i++) {
                  mismatchInfo += ("\nPosition : " + i + " , Code = " + (int) expected.charAt(i));
!             }
! 
          }
          for (int i = 0; i < expected.length(); i++) {
--- 108,123 ----
          if (expected.length() < actual.length()) {
              mismatchInfo = "\n\nACTUAL result has "+(actual.length()-expected.length())+" extra characters at the end. They are :";
!             int limit = Math.min (expected.length() + 10, actual.length());
!             for (int i = expected.length(); i < limit; i++)
                  mismatchInfo += ("\nPosition : " + i + " , Code = " + (int) actual.charAt(i));
!             if (limit != actual.length())
!                 mismatchInfo += "\netc.";
          } else if(expected.length() > actual.length()) {
              mismatchInfo = "\n\nEXPECTED result has "+(expected.length()-actual.length())+" extra characters at the end. They are :";
!             int limit = Math.min (actual.length() + 10, expected.length());
!             for (int i = actual.length(); i < expected.length(); i++)
                  mismatchInfo += ("\nPosition : " + i + " , Code = " + (int) expected.charAt(i));
!             if (limit != expected.length ())
!                 mismatchInfo += "\netc.";
          }
          for (int i = 0; i < expected.length(); i++) {

[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer/nodes TagNode.java,1.30,1.31

From: <der...@us...> - 2004-02-28 16:10:09

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv977/lexer/nodes

Modified Files:
	TagNode.java 
Log Message:
Fix bug #902121 StringBean throws NullPointerException.
Added ScriptDecoder to handle Microsoft Script Encoder encrypted tags.
Added accessor to ScriptTag's scriptCode property to be able to override it.
Ensured that a Tag always has a non-null name.
Skip STYLE tags in StringBean, just like SCRIPT.



Index: TagNode.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v
retrieving revision 1.30
retrieving revision 1.31
diff -C2 -d -r1.30 -r1.31
*** TagNode.java	9 Feb 2004 02:09:44 -0000	1.30
--- TagNode.java	28 Feb 2004 15:52:43 -0000	1.31
***************
*** 601,605 ****
  
      /**
!      * A call to a tag's toHTML() method will render it in HTML.
       * @see org.htmlparser.Node#toHtml()
       */
--- 601,607 ----
  
      /**
!      * Render the tag as HTML.
!      * A call to a tag's <code>toHtml()</code> method will render it in HTML.
!      * @return The tag as an HTML fragment.
       * @see org.htmlparser.Node#toHtml()
       */
***************
*** 819,823 ****
          raw = getRawTagName ();
  
!         return ((null == raw) ? false : ('/' == raw.charAt (0)));
      }
  
--- 821,825 ----
          raw = getRawTagName ();
  
!         return ((null == raw) ? false : ((0 != raw.length ()) && ('/' == raw.charAt (0))));
      }

[Htmlparser-cvs] htmlparser/src/org/htmlparser/filters package.html,1.4,1.5

From: <der...@us...> - 2004-02-28 16:10:09

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv977/filters

Modified Files:
	package.html 
Log Message:
Fix bug #902121 StringBean throws NullPointerException.
Added ScriptDecoder to handle Microsoft Script Encoder encrypted tags.
Added accessor to ScriptTag's scriptCode property to be able to override it.
Ensured that a Tag always has a non-null name.
Skip STYLE tags in StringBean, just like SCRIPT.



Index: package.html
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/package.html,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** package.html	2 Jan 2004 16:24:53 -0000	1.4
--- package.html	28 Feb 2004 15:52:43 -0000	1.5
***************
*** 38,42 ****
  parser.parse (new HasAttributeFilter ("id"));
  </pre>
! These filters can be combined to yield powerfull extraction capabilities.
  For example, to get a list of links where the contents is an image, you could use:
  <pre>
--- 38,42 ----
  parser.parse (new HasAttributeFilter ("id"));
  </pre>
! These filters can be combined to yield powerful extraction capabilities.
  For example, to get a list of links where the contents is an image, you could use:
  <pre>

[Htmlparser-cvs] htmlparser/src/org/htmlparser/scanners ScriptDecoder.java,NONE,1.1 ScriptScanner.java,1.55,1.56

From: <der...@us...> - 2004-02-28 16:10:08

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv977/scanners

Modified Files:
	ScriptScanner.java 
Added Files:
	ScriptDecoder.java 
Log Message:
Fix bug #902121 StringBean throws NullPointerException.
Added ScriptDecoder to handle Microsoft Script Encoder encrypted tags.
Added accessor to ScriptTag's scriptCode property to be able to override it.
Ensured that a Tag always has a non-null name.
Skip STYLE tags in StringBean, just like SCRIPT.



--- NEW FILE: ScriptDecoder.java ---
// HTMLParser Library $Name:  $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Derrick Oswald
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ScriptDecoder.java,v $
// $Author: derrickoswald $
// $Date: 2004/02/28 15:52:43 $
// $Revision: 1.1 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//

package org.htmlparser.scanners;

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import org.htmlparser.lexer.Cursor;
import org.htmlparser.lexer.Page;
import org.htmlparser.util.ParserException;

/**
 * Decode script.
 * Script obfuscated by the <A href="http://www.microsoft.com/downloads/details.aspx?FamilyId=E7877F67-C447-4873-B1B0-21F0626A6329&displaylang=en" target="_parent">Windows Script Encoder</A>
 * provided by Microsoft, is converted to plaintext. This code is based loosely
 * on example code provided by MrBrownstone with changes by Joe Steele, see
 * <A href="http://www.virtualconspiracy.com/download/scrdec14.c" target="_parent">scrdec14.c</A>.
 */
public class ScriptDecoder
{
    /**
     * Termination state.
     */
    public static final int STATE_DONE = 0;

    /**
     * State on entry.
     */
    public static final int STATE_INITIAL = 1;

    /**
     * State while reading the encoded length.
     */
    protected static final int STATE_LENGTH = 2;

    /**
     * State when reading up to decoded text.
     */
    protected static final int STATE_PREFIX = 3;

    /**
     * State while decoding.
     */
    protected static final int STATE_DECODE = 4;

    /**
     * State when reading an escape sequence.
     */
    protected static final int STATE_ESCAPE = 5;

    /**
     * State when reading the checksum.
     */
    protected static final int STATE_CHECKSUM = 6;

    /**
     * State while exiting.
     */
    protected static final int STATE_FINAL = 7;

    /**
     * The state to enter when decrypting is complete.
     * If this is STATE_DONE, the decryption will return with any characters
     * following the encoded text still unconsumed.
     * Otherwise, if this is STATE_INITIAL, the input will be exhausted and
     * all following characters will be contained in the return value
     * of the <code>Decode()</code> method.
     */
    public static int LAST_STATE = STATE_DONE;

    /**
     * Table of lookup choice.
     * The decoding cycles between three flavours determined
     * by this sequence of 64 choices, corresponding to the
     * first dimension of the lookup table.
     */
    protected static byte mEncodingIndex[] =
    {
        1, 2, 0, 1, 2, 0, 2, 0, 0, 2, 0, 2, 1, 0, 2, 0, 
        1, 0, 2, 0, 1, 1, 2, 0, 0, 2, 1, 0, 2, 0, 0, 2, 
        1, 1, 0, 2, 0, 2, 0, 1, 0, 1, 1, 2, 0, 1, 0, 2, 
        1, 0, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 0, 1, 0, 2,
    };

    /**
     * Two dimensional lookup table.
     * The decoding uses this table to determine the plaintext for
     * characters that aren't mEscaped.
     */
    protected static char mLookupTable[][] =
    {
        {
            '{', 
            '2',  '0',  '!',  ')',  '[',  '8',  '3',  '=', 
            'X',  ':',  '5',  'e',  '9', '\\',  'V',  's', 
            'f',  'N',  'E',  'k',  'b',  'Y',  'x',  '^', 
            '}',  'J',  'm',  'q',    0,  '`',    0,  'S', 
              0,  'B', '\'',  'H',  'r',  'u',  '1',  '7', 
            'M',  'R',  '"',  'T',  'j',  'G',  'd',  '-', 
            ' ',  '',  '.',  'L',  ']',  '~',  'l',  'o', 
            'y',  't',  'C',  '&',  'v',  '%',  '$',  '+', 
            '(',  '#',  'A',  '4', '\t',  '*',  'D',  '?', 
            'w',  ';',  'U',  'i',  'a',  'c',  'P',  'g', 
            'Q',  'I',  'O',  'F',  'h',  '|',  '6',  'p', 
            'n',  'z',  '/',  '_',  'K',  'Z',  ',',  'W', 
        },
        {
            'W', 
            '.',  'G',  'z',  'V',  'B',  'j',  '/',  '&', 
            'I',  'A',  '4',  '2',  '[',  'v',  'r',  'C', 
            '8',  '9',  'p',  'E',  'h',  'q',  'O', '\t', 
            'b',  'D',  '#',  'u',    0,  '~',    0,  '^', 
              0,  'w',  'J',  'a',  ']',  '"',  'K',  'o', 
            'N',  ';',  'L',  'P',  'g',  '*',  '}',  't', 
            'T',  '+',  '-',  ',',  '0',  'n',  'k',  'f', 
            '5',  '%',  '!',  'd',  'M',  'R',  'c',  '?', 
            '{',  'x',  ')',  '(',  's',  'Y',  '3',  '', 
            'm',  'U',  'S',  '|',  ':',  '_',  'e',  'F', 
            'X',  '1',  'i',  'l',  'Z',  'H', '\'', '\\', 
            '=',  '$',  'y',  '7',  '`',  'Q',  ' ',  '6', 
        },
        {
            'n', 
            '-',  'u',  'R',  '`',  'q',  '^',  'I', '\\', 
            'b',  '}',  ')',  '6',  ' ',  '|',  'z',  '', 
            'k',  'c',  '3',  '+',  'h',  'Q',  'f',  'v', 
            '1',  'd',  'T',  'C',    0,  ':',    0,  '~', 
              0,  'E',  ',',  '*',  't', '\'',  '7',  'D', 
            'y',  'Y',  '/',  'o',  '&',  'r',  'j',  '9', 
            '{',  '?',  '8',  'w',  'g',  'S',  'G',  '4', 
            'x',  ']',  '0',  '#',  'Z',  '[',  'l',  'H', 
            'U',  'p',  'i',  '.',  'L',  '!',  '$',  'N', 
            'P', '\t',  'V',  's',  '5',  'a',  'K',  'X', 
            ';',  'W',  '"',  'm',  'M',  '%',  '(',  'F', 
            'J',  '2',  'A',  '=',  '_',  'O',  'B',  'e', 
        },
    };

    /**
     * The base 64 decoding table.
     * This array determines the value of decoded base 64 elements.
     */
    protected static int mDigits[];
    static
    {
        mDigits = new int[0x7b];
        for (int i = 0; i < 26; i++)
        {
            mDigits['A' + i] = i;
            mDigits['a' + i] = i + 26;
        }
        for (int i = 0; i < 10; i++)
            mDigits['0' + i] = i + 52;
        mDigits[0x2b] = '>';
        mDigits[0x2f] = '?';
    }

    /**
     * The leader.
     * The prefix to the encoded script is #@~^nnnnnn== where the n are the
     * length digits in base64.
     */
    protected static char mLeader[] =
    { 
        '#',
        '@',
        '~',
        '^',
    };

    /**
     * The prefix.
     * The prfix separates the encoded text from the length.
     */
    protected static char mPrefix[] =
    { 
        '=',
        '=',
    };

    /**
     * The trailer.
     * The suffix to the encoded script is nnnnnn==^#~@ where the n are the
     * checksum digits in base64. These characters are the part after the checksum.
     */
    protected static char mTrailer[] =
    { 
        '=',
        '=',
        '^',
        '#',
        '~',
        '@',
    };

    /**
     * Escape sequence characters.
     */
    protected static char mEscapes[] =
    {
        '#',
        '&',
        '!',
        '*',
        '$',
    };

    /**
     * The escaped characters corresponding to the each escape sequence.
     */
    protected static char mEscaped[] = //"\r\n<>@";
    {
        '\r',
        '\n',
        '<',
        '>',
        '@',
    };

    /**
     * Extract the base 64 encoded number.
     * This is a very limited subset of base 64 encoded characters.
     * Six characters are expected. These are translated into a single long
     * value. For a more complete base 64 codec see for example the base64
     * package of <A href="http://sourceforge.net/projects/iharder/" target="_parent">iHarder.net</A>
     * @param p Six base 64 encoded digits.
     * @return The value of the decoded number.
     */
    protected static long decodeBase64 (char[] p)
    {
        long ret;
        
        ret = 0;

        ret +=  (mDigits[p[0]] << 2);
        ret +=  (mDigits[p[1]] >> 4);
        ret +=  (mDigits[p[1]] & 0xf) << 12;
        ret += ((mDigits[p[2]] >> 2) << 8); 
        ret += ((mDigits[p[2]] & 0x3) << 22);
        ret +=  (mDigits[p[3]] << 16);
        ret += ((mDigits[p[4]] << 2) << 24);
        ret += ((mDigits[p[5]] >> 4) << 24);

        return (ret);
    }

    /**
     * Decode script encoded by the Microsoft obfuscator.
     * @param page The source for encoded text.
     * @param cursor The position at which to start decoding.
     * This is advanced to the end of the encoded text.
     * @return The plaintext.
     * @exception ParserException If an error is discovered while decoding.
     */
    public static String Decode (Page page, Cursor cursor)
        throws
            ParserException
    {
        int state;
        int substate_initial;
        int substate_length;
        int substate_prefix;
        int substate_checksum;
        int substate_final;
        long checksum;
        long length;
        char buffer[];
        buffer = new char[6];
        int index;
        char character;
        int input_character;
        boolean found;
        StringBuffer ret;
        
        ret = new StringBuffer (1024);

        state = STATE_INITIAL;
        substate_initial = 0;
        substate_length = 0;
        substate_prefix = 0;
        substate_checksum = 0;
        substate_final = 0;
        length = 0L;
        checksum = 0L;
        index = 0;
        while (STATE_DONE != state)
        {
            input_character = page.getCharacter (cursor);
            character = (char)input_character;
            if (0 == input_character)
            {
                if (   (STATE_INITIAL != state)
                    || (0 != substate_initial)
                    || (0 != substate_length)
                    || (0 != substate_prefix)
                    || (0 != substate_checksum)
                    || (0 != substate_final))
                    throw new ParserException ("illegal state for exit");
                state = STATE_DONE;
            }
            else
                switch (state)
                {
                    case STATE_INITIAL:
                        if (character == mLeader[substate_initial])
                        {
                            substate_initial++;
                            if (substate_initial == mLeader.length)
                            {
                                substate_initial = 0;
                                state = STATE_LENGTH;
                            }
                        }
                        else
                        {
                            // oops, flush
                            for (int k = 0; 0 < substate_initial; k++)
                            {
                                ret.append (mLeader[k++]);
                                substate_initial--;
                            }
                            ret.append (character);
                        }
                        break;

                    case STATE_LENGTH:
                        buffer[substate_length] = character;
                        substate_length++;
                        if (substate_length >= buffer.length)
                        {
                            length = decodeBase64 (buffer);
                            if (0 > length)
                                throw new ParserException ("illegal length: " + length);
                            substate_length = 0;
                            state = STATE_PREFIX;
                        }
                        break;

                    case STATE_PREFIX:
                        if (character == mPrefix[substate_prefix])
                            substate_prefix++;
                        else
                            throw new ParserException ("illegal character encountered: " + (int)character + " ('" + character + "')");
                        if (substate_prefix >= mPrefix.length)
                        {
                            substate_prefix = 0;
                            state = STATE_DECODE;
                        }
                        break;

                    case STATE_DECODE:
                        if ('@' == character)
                            state = STATE_ESCAPE;
                        else
                        {
                            if (input_character < 0x80)
                            {
                                if (input_character == '\t')
                                    input_character = 0;
                                else if (input_character >= ' ')
                                    input_character -= ' ' - 1;
                                else
                                    throw new ParserException ("illegal encoded character: " + input_character + " ('" + character + "')");
                                char ch = mLookupTable[mEncodingIndex[index % 64]][input_character];
                                ret.append (ch);
                                checksum += ch;
                                index++;
                            }
                            else
                                ret.append (character);
                        }
                        length--;
                        if (0 == length)
                        {
                            index = 0;
                            state = STATE_CHECKSUM;
                        }
                        break;

                    case STATE_ESCAPE:
                        found = false;
                        for (int i = 0; i < mEscapes.length; i++)
                            if (character == mEscapes[i])
                            {
                                found = true;
                                character = mEscaped[i];
                            }
                        if (!found)
                            throw new ParserException ("unexpected escape character: " + (int)character + " ('" + character + "')");
                        ret.append (character);
                        checksum += character;
                        index++;
                        state = STATE_DECODE;
                        length--;
                        if (0 == length)
                        {
                            index = 0;
                            state = STATE_CHECKSUM;
                        }
                        break;

                    case STATE_CHECKSUM:
                        buffer[substate_checksum] = character;
                        substate_checksum++;
                        if (substate_checksum >= buffer.length)
                        {
                            long check = decodeBase64 (buffer);
                            if (check != checksum)
                                throw new ParserException ("incorrect checksum, expected " + check + ", calculated " + checksum);
                            checksum = 0;
                            substate_checksum = 0;
                            state = STATE_FINAL;
                        }
                        break;

                    case STATE_FINAL:
                        if (character == mTrailer[substate_final])
                            substate_final++;
                        else
                            throw new ParserException ("illegal character encountered: " + (int)character + " ('" + character + "')");
                        if (substate_final >= mTrailer.length)
                        {
                            substate_final = 0;
                            state = LAST_STATE;
                        }
                        break;
                    default:
                        throw new ParserException ("invalid state: " + state);
                }
        }

        return (ret.toString ());
    }

//    /**
//     * Example mainline for decrypting script.
//     * Change a file with encrypted script into one without.
//     * <em>WARNING: This does not preserve DOS type line endings.</em>
//     * @param args Command line arguments. Two file names, input and output.
//     * Optionally, the character set to use as a third argument.
//     * @exception IOException If the input file doesn't exist, or the output
//     * file cannot be created.
//     * @exception ParserException If there is a decryption problem.
//     */
//    public static void main (String[] args)
//         throws
//            IOException,
//            ParserException
//    {
//        String charset;
//        FileInputStream in;
//        Page page;
//        Cursor cursor;
//        String string;
//        int ret;
//        
//        if (args.length < 2)
//        {
//            System.out.println ("Usage: java org.htmlparser.scanners.ScriptDecoder <infile> <outfile> [charset]");
//            ret = 1;
//        }
//        else
//        {
//            if (2 < args.length)
//                charset = args[2];
//            else
//                charset = "ISO-8859-1";
//            in = new FileInputStream (args[0]);
//            page = new Page (in, charset);
//            cursor = new Cursor (page, 0);
//            ScriptDecoder.LAST_STATE = STATE_INITIAL;
//            string = ScriptDecoder.Decode (page, cursor);
//            in.close ();
//            
//            FileOutputStream outfile = new FileOutputStream (args[1]);
//            outfile.write (string.getBytes (charset));
//            outfile.close ();
//            ret = (0 != string.length ()) ? 0 : 1;
//        }
//        
//        System.exit (ret);
//    }
}
Index: ScriptScanner.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ScriptScanner.java,v
retrieving revision 1.55
retrieving revision 1.56
diff -C2 -d -r1.55 -r1.56
*** ScriptScanner.java	14 Jan 2004 02:53:46 -0000	1.55
--- ScriptScanner.java	28 Feb 2004 15:52:43 -0000	1.56
***************
*** 33,39 ****
--- 33,42 ----
  import org.htmlparser.RemarkNode;
  import org.htmlparser.StringNode;
+ import org.htmlparser.lexer.Cursor;
  import org.htmlparser.lexer.Lexer;
  import org.htmlparser.lexer.nodes.NodeFactory;
+ import org.htmlparser.scanners.ScriptDecoder;
  import org.htmlparser.tags.CompositeTag;
+ import org.htmlparser.tags.ScriptTag;
  import org.htmlparser.tags.Tag;
  import org.htmlparser.util.NodeList;
***************
*** 68,71 ****
--- 71,75 ----
          throws ParserException
      {
+         String language;
          Node node;
          boolean done;
***************
*** 80,83 ****
--- 84,100 ----
          end = null;
          factory = lexer.getNodeFactory ();
+         if (tag instanceof ScriptTag)
+         {
+             language = ((ScriptTag)tag).getLanguage ();
+             if ((null != language) &&
+                 (language.equalsIgnoreCase ("JScript.Encode") ||
+                  language.equalsIgnoreCase ("VBScript.Encode")))
+             {
+                 int start = lexer.getPosition ();
+                 String code = ScriptDecoder.Decode (lexer.getPage (), lexer.getCursor ());
+                 ((ScriptTag)tag).setScriptCode (code);
+                 last = (StringNode)factory.createStringNode (lexer.getPage (), start, lexer.getPosition ());
+             }
+         }
          lexer.setNodeFactory (new PrototypicalNodeFactory (true));
          try

[Htmlparser-cvs] htmlparser/src/org/htmlparser/tags ScriptTag.java,1.35,1.36 Tag.java,1.61,1.62

From: <der...@us...> - 2004-02-28 16:10:08

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv977/tags

Modified Files:
	ScriptTag.java Tag.java 
Log Message:
Fix bug #902121 StringBean throws NullPointerException.
Added ScriptDecoder to handle Microsoft Script Encoder encrypted tags.
Added accessor to ScriptTag's scriptCode property to be able to override it.
Ensured that a Tag always has a non-null name.
Skip STYLE tags in StringBean, just like SCRIPT.



Index: ScriptTag.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/ScriptTag.java,v
retrieving revision 1.35
retrieving revision 1.36
diff -C2 -d -r1.35 -r1.36
*** ScriptTag.java	2 Jan 2004 16:24:55 -0000	1.35
--- ScriptTag.java	28 Feb 2004 15:52:43 -0000	1.36
***************
*** 45,48 ****
--- 45,53 ----
  
      /**
+      * Script code if different from the page contents.
+      */
+     protected String mCode;
+ 
+     /**
       * Create a new script tag.
       */
***************
*** 79,87 ****
  
      /**
!      * Get the contents of the tag's children.
       */
!     public String getScriptCode()
      {
!         return (getChildrenHTML ());
      }
  
--- 84,111 ----
  
      /**
!      * Get the script code.
!      * Normally this is the contents of the children, but in the rare case that
!      * the script is encoded, this is the plaintext decrypted code.
!      * @return The plaintext or overridden code contents of the tag.
       */
!     public String getScriptCode ()
      {
!         String ret;
!         
!         if (null != mCode)
!             ret = mCode;
!         else
!             ret = getChildrenHTML ();
! 
!         return (ret);
!     }
! 
!     /**
!      * Set the code contents.
!      * @param code The new code contents of this tag.
!      */
!     public void setScriptCode (String code)
!     {
!         mCode = code;
      }
  
***************
*** 113,116 ****
--- 137,164 ----
  
      /**
+      * Render the tag as HTML.
+      * @return The tag as an HTML fragment.
+      * @see org.htmlparser.Node#toHtml()
+      */
+     public String toHtml()
+     {
+         StringBuffer ret;
+         
+         ret = new StringBuffer ();
+         ret.append (super.toHtml ());
+         if (!isEmptyXmlTag ())
+         {
+             if (null != getScriptCode ())
+                 ret.append (getScriptCode ());
+             else
+                 putChildrenInto (ret);
+             if (null != getEndTag ())
+                 putEndTagInto (ret);
+         }
+ 
+         return (ret.toString());
+     }
+ 
+     /**
       * Print the contents of the script tag.
       */

Index: Tag.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Tag.java,v
retrieving revision 1.61
retrieving revision 1.62
diff -C2 -d -r1.61 -r1.62
*** Tag.java	14 Jan 2004 02:53:46 -0000	1.61
--- Tag.java	28 Feb 2004 15:52:43 -0000	1.62
***************
*** 64,67 ****
--- 64,69 ----
          if ((null != names) && (0 != names.length))
              setTagName (names[0]);
+         else
+             setTagName (""); // make sure it's not null
          setThisScanner (mDefaultScanner);
      }
***************
*** 71,74 ****
--- 73,78 ----
          super (node.getPage (), node.getTagBegin (), node.getTagEnd (), node.getAttributesEx ());
          mScanner = scanner;
+         if (null == getTagName ())
+             setTagName (""); // make sure it's not null
      }
  
***************
*** 77,80 ****
--- 81,86 ----
          super (page, start, end, attributes);
          mScanner = null;
+         if (null == getTagName ())
+             setTagName (""); // make sure it's not null
      }

[Htmlparser-cvs] htmlparser/src/org/htmlparser/beans StringBean.java,1.37,1.38

From: <der...@us...> - 2004-02-28 16:10:06

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv977/beans

Modified Files:
	StringBean.java 
Log Message:
Fix bug #902121 StringBean throws NullPointerException.
Added ScriptDecoder to handle Microsoft Script Encoder encrypted tags.
Added accessor to ScriptTag's scriptCode property to be able to override it.
Ensured that a Tag always has a non-null name.
Skip STYLE tags in StringBean, just like SCRIPT.



Index: StringBean.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/StringBean.java,v
retrieving revision 1.37
retrieving revision 1.38
diff -C2 -d -r1.37 -r1.38
*** StringBean.java	11 Feb 2004 12:37:59 -0000	1.37
--- StringBean.java	28 Feb 2004 15:52:42 -0000	1.38
***************
*** 162,165 ****
--- 162,170 ----
  
      /**
+      * Set <code>true</code> when traversing a STYLE tag.
+      */
+     protected boolean mIsStyle;
+ 
+    /**
       * Create a StringBean object.
       * Default property values are set to 'do the right thing':
***************
*** 185,188 ****
--- 190,194 ----
  		mIsScript = false;
  		mIsPre = false;
+         mIsStyle = false;
      }
  
***************
*** 322,325 ****
--- 328,332 ----
                  mIsPre = false;
                  mIsScript = false;
+                 mIsStyle = false;
                  try
                  {   // try again with the encoding now in force
***************
*** 616,620 ****
      public void visitStringNode (StringNode string)
      {
!         if (!mIsScript)
          {
              String text = string.getText ();
--- 623,627 ----
      public void visitStringNode (StringNode string)
      {
!         if (!mIsScript && !mIsStyle)
          {
              String text = string.getText ();
***************
*** 647,650 ****
--- 654,659 ----
          else if (name.equalsIgnoreCase ("SCRIPT"))
              mIsScript = true;
+         else if (name.equalsIgnoreCase ("STYLE"))
+             mIsStyle = true;
          if (tag.breaksFlow ())
              carriage_return ();
***************
*** 664,667 ****
--- 673,678 ----
          else if (name.equalsIgnoreCase ("SCRIPT"))
              mIsScript = false;
+         else if (name.equalsIgnoreCase ("STYLE"))
+             mIsStyle = false;
      }

[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer Lexer.java,1.26,1.27

From: <der...@us...> - 2004-02-18 12:43:36

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27481/lexer

Modified Files:
	Lexer.java 
Log Message:
Fix bug #899413 bug in javascript end detection.
Patch submitted by Gernot Fricke handles escaped quotes in strings when
lexing with smartquote turned on. Added test case in LexerTests.



Index: Lexer.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v
retrieving revision 1.26
retrieving revision 1.27
diff -C2 -d -r1.26 -r1.27
*** Lexer.java	7 Feb 2004 12:53:09 -0000	1.26
--- Lexer.java	18 Feb 2004 12:34:04 -0000	1.27
***************
*** 401,404 ****
--- 401,413 ----
              else if (quotesmart && (0 == quote) && (('\'' == ch) || ('"' == ch)))
                  quote = ch; // enter quoted state
+             // patch contributed by Gernot Fricke to handle escaped closing quote
+             else if (quotesmart && (0 != quote) && ('\\' == ch))
+             {
+                 ch = mPage.getCharacter (cursor); //try to consume escaped character
+                 if (  (ch != '\\') // escaped backslash
+                     && (ch != quote)) // escaped quote character 
+                        // ( reflects ["] or [']  whichever opened the quotation)
+                     cursor.retreat(); // unconsume char if character was not an escapable char.
+             }
              else if (quotesmart && (ch == quote))
                  quote = 0; // exit quoted state

[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/lexerTests LexerTests.java,1.18,1.19

From: <der...@us...> - 2004-02-18 12:43:36

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27481/tests/lexerTests

Modified Files:
	LexerTests.java 
Log Message:
Fix bug #899413 bug in javascript end detection.
Patch submitted by Gernot Fricke handles escaped quotes in strings when
lexing with smartquote turned on. Added test case in LexerTests.



Index: LexerTests.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/LexerTests.java,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -d -r1.18 -r1.19
*** LexerTests.java	24 Jan 2004 17:14:20 -0000	1.18
--- LexerTests.java	18 Feb 2004 12:34:04 -0000	1.19
***************
*** 789,792 ****
--- 789,815 ----
          assertNull ("too many nodes", lexer.nextNode ());
      }
+     
+     /**
+      * See bug #899413 bug in javascript end detection.
+      */
+     public void testEscapedQuote () throws ParserException
+     {
+         String string;
+         String html;
+         Lexer lexer;
+         Node node;
+         
+         string = "\na='\\'';\n";
+         html = string + "</script>";
+         lexer = new Lexer (html);
+         node = lexer.nextNode (true);
+         if (node == null)
+             fail ("too few nodes");
+         else
+             assertStringEquals ("bad string", string, node.toHtml());
+         assertNotNull ("too few nodes", lexer.nextNode (true));
+         assertNull ("too many nodes", lexer.nextNode (true));
+     }
+ 
  }

[Htmlparser-cvs] htmlparser/docs changes.txt,1.196,1.197 contributors.html,1.5,1.6 release.txt,1.55,1.56

From: <der...@us...> - 2004-02-16 22:54:28

Update of /cvsroot/htmlparser/htmlparser/docs
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25002/docs

Modified Files:
	changes.txt contributors.html release.txt 
Log Message:
Update version to 1.4-20040216.



Index: changes.txt
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/docs/changes.txt,v
retrieving revision 1.196
retrieving revision 1.197
diff -C2 -d -r1.196 -r1.197
*** changes.txt	26 Jan 2004 01:01:56 -0000	1.196
--- changes.txt	16 Feb 2004 22:46:07 -0000	1.197
***************
*** 13,16 ****
--- 13,77 ----
  *******************************************************************************
  
+ Integration Build 1.4 - 20040216
+ --------------------------------
+ 
+ 2004-02-11 07:37  derrickoswald
+ 
+ 	* docs/contributors.html, src/org/htmlparser/beans/StringBean.java:
+ 
+ 	Incorporate patch from Nick Burch to make StringBean a NodeVisistor for other parsers.
+ 	See task #93155 StringBean driven by visitor.
+ 	
+ 2004-02-08 21:09  derrickoswald
+ 
+ 	* build.xml, src/org/htmlparser/lexer/nodes/Attribute.java,
+ 	src/org/htmlparser/lexer/nodes/TagNode.java,
+ 	src/org/htmlparser/tests/tagTests/TagTest.java,
+ 	src/org/htmlparser/tests/utilTests/CharacterTranslationTest.java,
+ 	bin/translate, bin/translate.bat,
+ 	src/org/htmlparser/util/CharacterReference.java,
+ 	src/org/htmlparser/util/Generate.java,
+ 	src/org/htmlparser/util/Translate.java,
+ 	src/org/htmlparser/util/package.html:
+ 
+ 	Rework character entity translation.
+ 	See task 58599  enhance character reference translation.
+ 	Decode now handles missing semi colons, encoding is more efficient,
+ 	hexadecimal numeric character entity references are handled and
+ 	both encoding and decoding make minimal use of substring().
+ 	Augmented the tests in CharacterTranslationTest significantly, and
+ 	merged the Generate class into the tests.
+ 	Added translate command scripts in bin, which read from stdin and write to stdout.
+ 	
+ 2004-02-07 07:53  derrickoswald
+ 
+ 	* src/org/htmlparser/: lexer/Lexer.java,
+ 	tests/lexerTests/AttributeTests.java:
+ 
+ 	Fix bug #891058 Bug in lexer.
+ 	Patch submitted by Gernot Fricke.
+ 	This change causes attribute parsing to be more 'greedy' resulting in 'empty' attributes
+ 	consuming the next attribute. This brings the lexer parsing more in line with other
+ 	(browser) interpretations and simplifies it immensely.
+ 	
+ 2004-01-31 15:51  derrickoswald
+ 
+ 	* src/org/htmlparser/lexer/Page.java:
+ 
+ 	Compare encoding names without case sensitivity.
+ 	From HTML spec (http://www.w3.org/TR/html4/charset.html section 5.2.1):
+ 	  Names for character encodings are case-insensitive, so that for
+ 	  example "SHIFT_JIS", "Shift_JIS", and "shift_jis" are equivalent.
+ 	and from to IANA(http://www.iana.org/assignments/character-sets):
+ 	  The character set names may be up to 40 characters taken from the
+ 	  printable characters of US-ASCII.  However, no distinction is made
+ 	  between use of upper and lower case letters.
+ 	
+ 2004-01-31 11:31  derrickoswald
+ 
+ 	* src/doc-files/: overview.html, todo.html:
+ 
+ 	Move ToDo list to SourceForge trackers and tasks.
+ 
  Integration Build 1.4 - 20040125
  --------------------------------

Index: contributors.html
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/docs/contributors.html,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** contributors.html	11 Feb 2004 12:37:52 -0000	1.5
--- contributors.html	16 Feb 2004 22:46:08 -0000	1.6
***************
*** 353,360 ****
    </tr>
  </table>
! <p>Thanks to Stephen Harrington, Domenico Lordi, Kamen, John Zook, Nick Burch,
!   Cheng Jun, Mazlan Mat, Rob Shields, Wolfgang Germund, Raj Sharma, Robert Kausch, 
!   Gordon Deudney, Serge Kruppa, Roger Kjensrud, Rodney S Foley and Manpreet Singh 
!   for suggestions, bug reports and feature ideas. <br>
    &nbsp; 
  </body>
--- 353,360 ----
    </tr>
  </table>
! <p>Thanks to Gernot Fricke, Nick Burch, Stephen Harrington, Domenico Lordi, Kamen,
!   John Zook, Cheng Jun, Mazlan Mat, Rob Shields, Wolfgang Germund, Raj Sharma,
!   Robert Kausch, Gordon Deudney, Serge Kruppa, Roger Kjensrud, Rodney S Foley
!   and Manpreet Singh for suggestions, bug reports and feature ideas. <br>
    &nbsp; 
  </body>

Index: release.txt
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/docs/release.txt,v
retrieving revision 1.55
retrieving revision 1.56
diff -C2 -d -r1.55 -r1.56
*** release.txt	26 Jan 2004 01:02:09 -0000	1.55
--- release.txt	16 Feb 2004 22:46:08 -0000	1.56
***************
*** 1,3 ****
! HTMLParser Version 1.4 (Integration Build Jan 25, 2004)
  *********************************************
  
--- 1,3 ----
! HTMLParser Version 1.4 (Integration Build Feb 16, 2004)
  *********************************************
  
***************
*** 21,24 ****
--- 21,29 ----
  Changes since Version 1.3
  -------------------------
+ Translation
+     Character entity encoding and decoding has been revamped, leading to
+     higher throughput and less memory churn.
+ Beans
+     The StringBean can now be used as a visitor for parsers external to the bean.
  Decorators
      The node decorator package has been added to provide support for the
***************
*** 57,63 ****
--- 62,71 ----
  Applications
      New example applications Thumbelina and SiteCapturer.
+     A mainline has been added to the Translate class to encode/decode stdin to
+     stdout.
  
  Bug Fixes
  ---------
+ 891058 Bug in lexer
  865279 Documentation
  851882 zero length alt tag causes bug in ImageScanner
***************
*** 121,124 ****
--- 129,135 ----
  [26] Stephen Nightingale
  [27] Donnla Nic Gearailt
+ [28] Pim Schrama
+ [29] Nick Burch
+ [30] Gernot Fricke
  
  If you find any bugs, please go to

[Htmlparser-cvs] htmlparser/src/org/htmlparser Parser.java,1.86,1.87

From: <der...@us...> - 2004-02-16 22:54:27

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25002/src/org/htmlparser

Modified Files:
	Parser.java 
Log Message:
Update version to 1.4-20040216.



Index: Parser.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v
retrieving revision 1.86
retrieving revision 1.87
diff -C2 -d -r1.86 -r1.87
*** Parser.java	26 Jan 2004 01:02:10 -0000	1.86
--- Parser.java	16 Feb 2004 22:46:08 -0000	1.87
***************
*** 88,92 ****
       */
      public final static String
!     VERSION_DATE = "Jan 25, 2004"
      ;
  
--- 88,92 ----
       */
      public final static String
!     VERSION_DATE = "Feb 16, 2004"
      ;

[Htmlparser-cvs] htmlparser/src/org/htmlparser/beans StringBean.java,1.36,1.37

From: <der...@us...> - 2004-02-11 12:42:07

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3754/src/org/htmlparser/beans

Modified Files:
	StringBean.java 
Log Message:
Incorporate patch from Nick Burch to make StringBean a NodeVisistor for other parsers.
See task #93155 StringBean driven by visitor.



Index: StringBean.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/StringBean.java,v
retrieving revision 1.36
retrieving revision 1.37
diff -C2 -d -r1.36 -r1.37
*** StringBean.java	10 Jan 2004 15:23:33 -0000	1.36
--- StringBean.java	11 Feb 2004 12:37:59 -0000	1.37
***************
*** 55,60 ****
   *     String s = sb.getStrings ();
   * </pre>
!  * @author Derrick Oswald
!  * Created on December 23, 2002, 5:01 PM
   */
  public class StringBean extends NodeVisitor implements Serializable
--- 55,74 ----
   *     String s = sb.getStrings ();
   * </pre>
!  * You can also use the StringBean as a NodeVisitor on your own parser,
!  * in which case you have to refetch your page if you change one of the
!  * properties because it resets the Strings property:</p>
!  * <pre>
!  *     StringBean sb = new StringBean ();
!  *     Parser parser = new Parser ("http://cbc.ca");
!  *     parser.visitAllNodesWith (sb);
!  *     String s = sb.getStrings ();
!  *     sb.setLinks (true);
!  *     parser.reset ();
!  *     parser.visitAllNodesWith (sb);
!  *     String sl = sb.getStrings ();
!  * </pre>
!  * According to Nick Burch, who contributed the patch, this is handy if you
!  * don't want StringBean to wander off and get the content itself, either
!  * because you already have it, it's not on a website etc.
   */
  public class StringBean extends NodeVisitor implements Serializable
***************
*** 168,171 ****
--- 182,188 ----
          mReplaceSpace = true;
          mCollapse = true;
+ 		mBuffer = new StringBuffer (4096);
+ 		mIsScript = false;
+ 		mIsPre = false;
      }
  
***************
*** 259,268 ****
          String ret;
  
-         mIsPre = false;
-         mIsScript = false;
-         mBuffer = new StringBuffer (4096);
          mParser.visitAllNodesWith (this);
          ret = mBuffer.toString ();
!         mBuffer = null;
  
          return (ret);
--- 276,282 ----
          String ret;
  
          mParser.visitAllNodesWith (this);
          ret = mBuffer.toString ();
!         mBuffer = new StringBuffer(4096);
  
          return (ret);
***************
*** 294,302 ****
              try
              {
-                 mIsPre = false;
-                 mIsScript = false;
                  try
                  {
-                     mBuffer = new StringBuffer (4096);
                      mParser.visitAllNodesWith (this);
                      updateStrings (mBuffer.toString ());
--- 308,313 ----
***************
*** 304,308 ****
                  finally
                  {
!                     mBuffer = null;
                  }
              }
--- 315,319 ----
                  finally
                  {
!                     mBuffer = new StringBuffer (4096);
                  }
              }
***************
*** 331,334 ****
--- 342,352 ----
                  updateStrings (pe.toString ());
              }
+         else
+         {
+             // reset in case this StringBean is used as a visitor
+             // on another parser, not it's own
+             mStrings = null;
+             mBuffer = new StringBuffer (4096);
+         }
      }
  
***************
*** 388,392 ****
      {
          if (null == mStrings)
!             setStrings ();
  
          return (mStrings);
--- 406,413 ----
      {
          if (null == mStrings)
! 			if (0 == mBuffer.length ())
! 				setStrings ();
! 			else
! 				updateStrings (mBuffer.toString ());
  
          return (mStrings);

[Htmlparser-cvs] htmlparser/docs contributors.html,1.4,1.5

From: <der...@us...> - 2004-02-11 12:42:07

Update of /cvsroot/htmlparser/htmlparser/docs
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3754/docs

Modified Files:
	contributors.html 
Log Message:
Incorporate patch from Nick Burch to make StringBean a NodeVisistor for other parsers.
See task #93155 StringBean driven by visitor.



Index: contributors.html
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/docs/contributors.html,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** contributors.html	4 Jan 2004 03:23:08 -0000	1.4
--- contributors.html	11 Feb 2004 12:37:52 -0000	1.5
***************
*** 353,357 ****
    </tr>
  </table>
! <p>Thanks to Stephen Harrington, Domenico Lordi, Kamen, John Zook,
    Cheng Jun, Mazlan Mat, Rob Shields, Wolfgang Germund, Raj Sharma, Robert Kausch, 
    Gordon Deudney, Serge Kruppa, Roger Kjensrud, Rodney S Foley and Manpreet Singh 
--- 353,357 ----
    </tr>
  </table>
! <p>Thanks to Stephen Harrington, Domenico Lordi, Kamen, John Zook, Nick Burch,
    Cheng Jun, Mazlan Mat, Rob Shields, Wolfgang Germund, Raj Sharma, Robert Kausch, 
    Gordon Deudney, Serge Kruppa, Roger Kjensrud, Rodney S Foley and Manpreet Singh

[Htmlparser-cvs] htmlparser/bin translate,NONE,1.1 translate.bat,NONE,1.1

From: <der...@us...> - 2004-02-09 02:12:56

Update of /cvsroot/htmlparser/htmlparser/bin
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9169/bin

Added Files:
	translate translate.bat 
Log Message:
Rework character entity translation.
See task 58599  enhance character reference translation.
Decode now handles missing semi colons, encoding is more efficient,
hexadecimal numeric character entity references are handled and
both encoding and decoding make minimal use of substring().
Augmented the tests in CharacterTranslationTest significantly, and
merged the Generate class into the tests.
Added translate command scripts in bin, which read from stdin and write to stdout.



--- NEW FILE: translate ---
#! /bin/sh

if [ -z "$HTMLPARSER_HOME" ] ; then

  ## resolve links - $0 may be a link to the home
  PRG="$0"
  progname=`basename "$0"`
  saveddir=`pwd`

  # need this for relative symlinks
  dirname_prg=`dirname "$PRG"`
  cd "$dirname_prg"
  
  while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
	PRG="$link"
    else
	PRG=`dirname "$PRG"`"/$link"
    fi
  done
  
  HTMLPARSER_HOME=`dirname "$PRG"`/..

  cd "$saveddir"

  # make it fully qualified
  HTMLPARSER_HOME=`cd "$HTMLPARSER_HOME" && pwd`
fi

if [ -z "$JAVACMD" ] ; then 
  if [ -n "$JAVA_HOME"  ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 
      # IBM's JDK on AIX uses strange locations for the executables
      JAVACMD="$JAVA_HOME/jre/sh/java"
    else
      JAVACMD="$JAVA_HOME/bin/java"
    fi
  else
    JAVACMD=`which java 2> /dev/null `
    if [ -z "$JAVACMD" ] ; then 
        JAVACMD=java
    fi
  fi
fi
 
if [ ! -x "$JAVACMD" ] ; then
  echo "Error: JAVA_HOME is not defined correctly."
  echo "  We cannot execute $JAVACMD"
  exit 1
fi

if [ -n "$CLASSPATH" ] ; then
  LOCALCLASSPATH="$CLASSPATH"
fi

HTMLPARSER_LIB="${HTMLPARSER_HOME}/lib"

# add in the parser .jar file
if [ -z "$LOCALCLASSPATH" ] ; then
  LOCALCLASSPATH="${HTMLPARSER_LIB}/htmlparser.jar"
else
  LOCALCLASSPATH="${HTMLPARSER_LIB}/htmlparser.jar":"$LOCALCLASSPATH"
fi

# handle 1.1x JDKs
if [ -n "$JAVA_HOME" ] ; then
  if [ -f "$JAVA_HOME/lib/classes.zip" ] ; then
    LOCALCLASSPATH="$LOCALCLASSPATH:$JAVA_HOME/lib/classes.zip"
  fi
fi

"$JAVACMD" -classpath "$LOCALCLASSPATH" org.htmlparser.util.Translate "$@"


--- NEW FILE: translate.bat ---
java -classpath ..\lib\htmlparser.jar org.htmlparser.util.Translate %1 %2

[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/utilTests CharacterTranslationTest.java,1.41,1.42

From: <der...@us...> - 2004-02-09 02:12:55

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9169/src/org/htmlparser/tests/utilTests

Modified Files:
	CharacterTranslationTest.java 
Log Message:
Rework character entity translation.
See task 58599  enhance character reference translation.
Decode now handles missing semi colons, encoding is more efficient,
hexadecimal numeric character entity references are handled and
both encoding and decoding make minimal use of substring().
Augmented the tests in CharacterTranslationTest significantly, and
merged the Generate class into the tests.
Added translate command scripts in bin, which read from stdin and write to stdout.



Index: CharacterTranslationTest.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/CharacterTranslationTest.java,v
retrieving revision 1.41
retrieving revision 1.42
diff -C2 -d -r1.41 -r1.42
*** CharacterTranslationTest.java	14 Jan 2004 03:20:01 -0000	1.41
--- CharacterTranslationTest.java	9 Feb 2004 02:09:44 -0000	1.42
***************
*** 29,41 ****
  import java.io.ByteArrayInputStream;
  import java.io.ByteArrayOutputStream;
  import java.io.IOException;
  import java.io.InputStream;
  import java.io.PrintStream;
  import java.net.URL;
  import java.net.URLConnection;
  import org.htmlparser.tests.ParserTestCase;
  import org.htmlparser.util.Translate;
  
[...1507 lines suppressed...]
!                             stimulus.append (character);
!                             response.append ("&gt;");
!                         }
!                         else
!                         {
!                             stimulus.append (character);
!                             response.append (character);
!                         }
!                     }
!                 }
!             }
!             string = Translate.decode (response.toString ());
!             if (!string.equals (stimulus.toString ()))
!                 fail ("decoding incorrect:\nexpected \"" + stimulus.toString () + "\"\n decoded \"" + string + "\"\n encoded \"" + response.toString () + "\""); 
!             stimulus.setLength (0);
!             response.setLength (0);
!         }   
!     }
  }

[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/tagTests TagTest.java,1.56,1.57

From: <der...@us...> - 2004-02-09 02:12:55

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9169/src/org/htmlparser/tests/tagTests

Modified Files:
	TagTest.java 
Log Message:
Rework character entity translation.
See task 58599  enhance character reference translation.
Decode now handles missing semi colons, encoding is more efficient,
hexadecimal numeric character entity references are handled and
both encoding and decoding make minimal use of substring().
Augmented the tests in CharacterTranslationTest significantly, and
merged the Generate class into the tests.
Added translate command scripts in bin, which read from stdin and write to stdout.



Index: TagTest.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/TagTest.java,v
retrieving revision 1.56
retrieving revision 1.57
diff -C2 -d -r1.56 -r1.57
*** TagTest.java	2 Jan 2004 16:24:57 -0000	1.56
--- TagTest.java	9 Feb 2004 02:09:44 -0000	1.57
***************
*** 368,372 ****
          assertEquals("font sans-serif parameter","sans-serif",table.get("SANS-SERIF"));
          // an alternate interpretation: assertEquals("font face parameter","Arial,helvetica,",table.get("FACE"));
!         assertEquals("font face parameter","Arial,\"helvetica,",table.get("FACE"));
      }
  
--- 368,373 ----
          assertEquals("font sans-serif parameter","sans-serif",table.get("SANS-SERIF"));
          // an alternate interpretation: assertEquals("font face parameter","Arial,helvetica,",table.get("FACE"));
!         // another: assertEquals("font face parameter","Arial,\"helvetica,",table.get("FACE"));
!         assertEquals("font face parameter","Arial,",table.get("FACE"));
      }

[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer/nodes Attribute.java,1.17,1.18 TagNode.java,1.29,1.30

From: <der...@us...> - 2004-02-09 02:12:55

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9169/src/org/htmlparser/lexer/nodes

Modified Files:
	Attribute.java TagNode.java 
Log Message:
Rework character entity translation.
See task 58599  enhance character reference translation.
Decode now handles missing semi colons, encoding is more efficient,
hexadecimal numeric character entity references are handled and
both encoding and decoding make minimal use of substring().
Augmented the tests in CharacterTranslationTest significantly, and
merged the Generate class into the tests.
Added translate command scripts in bin, which read from stdin and write to stdout.



Index: Attribute.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/Attribute.java,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** Attribute.java	2 Jan 2004 16:24:53 -0000	1.17
--- Attribute.java	9 Feb 2004 02:09:44 -0000	1.18
***************
*** 580,584 ****
                          // references, so convert all double quotes into &#34;
                          quote = '"';
!                         ref = Translate.convertToString (quote);
                          // JDK 1.4: value = value.replaceAll ("\"", ref);
                          buffer = new StringBuffer (value.length() * 5);
--- 580,584 ----
                          // references, so convert all double quotes into &#34;
                          quote = '"';
!                         ref = Translate.encode (quote);
                          // JDK 1.4: value = value.replaceAll ("\"", ref);
                          buffer = new StringBuffer (value.length() * 5);

Index: TagNode.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v
retrieving revision 1.29
retrieving revision 1.30
diff -C2 -d -r1.29 -r1.30
*** TagNode.java	25 Jan 2004 21:32:59 -0000	1.29
--- TagNode.java	9 Feb 2004 02:09:44 -0000	1.30
***************
*** 186,190 ****
                  // convert all double quotes into &#34;
                  quote = '"';
!                 ref = Translate.convertToString (quote);
                  // JDK 1.4: value = value.replaceAll ("\"", ref);
                  buffer = new StringBuffer (value.length() * 5);
--- 186,190 ----
                  // convert all double quotes into &#34;
                  quote = '"';
!                 ref = Translate.encode (quote);
                  // JDK 1.4: value = value.replaceAll ("\"", ref);
                  buffer = new StringBuffer (value.length() * 5);

[Htmlparser-cvs] htmlparser/src/org/htmlparser/util CharacterReference.java,NONE,1.1 Translate.java,1.42,1.43 package.html,1.19,1.20 Generate.java,1.48,NONE

From: <der...@us...> - 2004-02-09 02:12:55

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9169/src/org/htmlparser/util

Modified Files:
	Translate.java package.html 
Added Files:
	CharacterReference.java 
Removed Files:
	Generate.java 
Log Message:
Rework character entity translation.
See task 58599  enhance character reference translation.
Decode now handles missing semi colons, encoding is more efficient,
hexadecimal numeric character entity references are handled and
both encoding and decoding make minimal use of substring().
Augmented the tests in CharacterTranslationTest significantly, and
merged the Generate class into the tests.
Added translate command scripts in bin, which read from stdin and write to stdout.



--- NEW FILE: CharacterReference.java ---
/*
 * CharacterReference.java
 *
 * Created on February 5, 2004, 9:40 PM
 */

package org.htmlparser.util;

import java.io.Serializable;

import org.htmlparser.util.sort.Ordered;

/**
 * Structure to hold a character and it's equivalent entity reference kernel.
 * For the character reference &amp;copy; the character would be '&copy;' and
 * the kernel would be "copy", for example.<p>
 * Character references are described at <a href="Character references">http://www.w3.org/TR/REC-html40/charset.html#entities</a>
 * Supports the Ordered interface so it's easy to create a list sorted by
 * kernel, to perform binary searches on.<p>
 */
public class CharacterReference
    implements
        Serializable,
        Cloneable,
        Ordered
{
    /**
     * The character value as an integer.
     */
    protected int mCharacter;

    /**
     * This entity reference kernel.
     * The text between the ampersand and the semicolon.
     */
    protected String mKernel;

    /**
     * Construct a <code>CharacterReference</code> with the character and kernel given.
     * @param kernel The kernel in the equivalent character entity reference.
     * @param character The character needing encoding.
     */
    public CharacterReference (String kernel, int character)
    {
        mKernel = kernel;
        mCharacter = character;
        if (null == mKernel)
            mKernel = "";
    }

    /**
     * Get this CharacterReference's kernel.
     * @return The kernel in the equivalent character entity reference.
     */
    public String getKernel ()
    {
        return (mKernel);
    }

    /**
     * Set this CharacterReference's kernel.
     * This is used to avoid creating a new object to perform a binary search.
     * @param kernel The kernel in the equivalent character entity reference.
     */
    void setKernel (String kernel)
    {
        mKernel = kernel;
    }

    /**
     * Get the character needing translation.
     * @return The character.
     */
    public int getCharacter ()
    {
        return (mCharacter);
    }

    /**
     * Set the character.
     * This is used to avoid creating a new object to perform a binary search.
     * @param character The character needing translation.
     */
    void setCharacter (int character)
    {
        mCharacter = character;
    }

    /**
     * Visualize this character reference as a string.
     * @return A string with the character and kernel.
     */
    public String toString ()
    {
        String hex;
        StringBuffer ret;

        ret = new StringBuffer (6 + 8 + 2); // max 8 in string
        hex = Integer.toHexString ((int)getCharacter ());
        ret.append ("\\u");
        for (int i = hex.length (); i < 4; i++)
            ret.append ("0");
        ret.append (hex);
        ret.append ("[");
        ret.append (getKernel ());
        ret.append ("]");

        return (ret.toString ());
    }

    //
    // Ordered interface
    //

    /**
     * Compare one reference to another.
     * @see org.htmlparser.util.sort.Ordered
     */
    public int compare (Object that)
    {
        CharacterReference r;
        
        r = (CharacterReference)that;

        return (getKernel ().compareTo (r.getKernel ()));
    }
}


Index: Translate.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/Translate.java,v
retrieving revision 1.42
retrieving revision 1.43
diff -C2 -d -r1.42 -r1.43
*** Translate.java	2 Jan 2004 16:24:58 -0000	1.42
--- Translate.java	9 Feb 2004 02:09:45 -0000	1.43
***************
*** 27,57 ****
  package org.htmlparser.util;
  
  import java.util.HashMap;
  import java.util.Iterator;
  import java.util.Map;
  
  /**
   * Translate numeric character references and character entity references to unicode characters.
   * Based on tables found at <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">
   * http://www.w3.org/TR/REC-html40/sgml/entities.html</a>
[...1684 lines suppressed...]
+      * Numeric character reference and character entity reference to unicode codec.
+      * Translate the <code>System.in</code> input into an encoded or decoded
+      * stream and send the results to <code>System.out</code>.
+      * @param args If arg[0] is <code>-encode</code> perform an encoding on
+      * <code>System.in</code>, otherwise perform a decoding.
+      */
+     public static void main (String[] args)
+     {
+         boolean encode;
+ 
+         if (0 < args.length && args[0].equalsIgnoreCase ("-encode"))
+             encode = true;
+         else
+             encode = false;
+         if (encode)
+             encode (System.in, System.out);
+         else
+             decode (System.in, System.out);
+     }
  }

Index: package.html
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/package.html,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** package.html	2 Jan 2004 16:24:58 -0000	1.19
--- package.html	9 Feb 2004 02:09:45 -0000	1.20
***************
*** 29,46 ****
  -->
  </head>
! <body bgcolor="white">
! The util package is intended for holding utility classes that dont directly help with the parsing,
! but can take responsibilities out from some classes. Resuable code which can be reused by many classes, should be located
! in this package.
! 
! <h2>Related Documentation</h2>
! 
! For overviews, tutorials, examples, guides, and tool documentation, please see:
! <ul>
!   <li><a href="http://htmlparser.sourceforge.net">HTML Parser Home Page</a>
! </ul>
! 
! <!-- Put @see and @since tags down here. -->
! 
  </body>
  </html>
--- 29,36 ----
  -->
  </head>
! <body>
! Code which can be reused by many classes, is located in this package.
! The util package is intended for holding utility classes that don't directly
! help with parsing, but can take responsibilities out of some classes.
  </body>
  </html>

--- Generate.java DELETED ---

1 message has been excluded from this view by a project administrator.

Flat | Threaded

<< < 1 .. 19 20 21 22 23 .. 61 > >> (Page 21 of 61)

2003	Jan	Feb	Mar	Apr	May (141)	Jun (108)	Jul (66)	Aug (127)	Sep (155)	Oct (149)	Nov (72)	Dec (72)
2004	Jan (100)	Feb (36)	Mar (21)	Apr (3)	May (87)	Jun (28)	Jul (84)	Aug (5)	Sep (14)	Oct	Nov	Dec
2005	Jan (1)	Feb (39)	Mar (26)	Apr (38)	May (14)	Jun (10)	Jul	Aug	Sep (13)	Oct (8)	Nov (10)	Dec
2006	Jan	Feb (1)	Mar (17)	Apr (20)	May (28)	Jun (24)	Jul	Aug	Sep	Oct	Nov	Dec
2015	Jan	Feb	Mar (1)	Apr	May	Jun	Jul	Aug	Sep	Oct	Nov	Dec