htmlparser-cvs Mailing List for HTML Parser (Page 5)
Brought to you by:
derrickoswald
You can subscribe to this list here.
| 2003 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(141) |
Jun
(108) |
Jul
(66) |
Aug
(127) |
Sep
(155) |
Oct
(149) |
Nov
(72) |
Dec
(72) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2004 |
Jan
(100) |
Feb
(36) |
Mar
(21) |
Apr
(3) |
May
(87) |
Jun
(28) |
Jul
(84) |
Aug
(5) |
Sep
(14) |
Oct
|
Nov
|
Dec
|
| 2005 |
Jan
(1) |
Feb
(39) |
Mar
(26) |
Apr
(38) |
May
(14) |
Jun
(10) |
Jul
|
Aug
|
Sep
(13) |
Oct
(8) |
Nov
(10) |
Dec
|
| 2006 |
Jan
|
Feb
(1) |
Mar
(17) |
Apr
(20) |
May
(28) |
Jun
(24) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2015 |
Jan
|
Feb
|
Mar
(1) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
|
From: Ian M. <ian...@us...> - 2005-11-01 08:55:34
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv4510/src/org/htmlparser/nodeDecorators Modified Files: AbstractNodeDecorator.java Log Message: Add methods first/last child previous/next sibling added to AbstractNode. This is required to enable the project to compile. Index: AbstractNodeDecorator.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators/AbstractNodeDecorator.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** AbstractNodeDecorator.java 10 Apr 2005 23:20:43 -0000 1.23 --- AbstractNodeDecorator.java 1 Nov 2005 08:55:24 -0000 1.24 *************** *** 244,247 **** --- 244,263 ---- delegate.setChildren (children); } + + public Node getFirstChild (){ + return delegate.getFirstChild(); + } + + public Node getLastChild (){ + return delegate.getLastChild(); + } + + public Node getPreviousSibling (){ + return delegate.getPreviousSibling(); + } + + public Node getNextSibling (){ + return delegate.getNextSibling(); + } public void setText(String text) { |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25403/src/org/htmlparser/tags Modified Files: TableColumn.java TableHeader.java TableRow.java Added Files: DefinitionList.java DefinitionListBullet.java HeadingTag.java ParagraphTag.java Log Message: Added support for P and h1-h6 tags. Added support for definition list tags (dl, dt, dd). Let table row/column tags know when to close if encounter TBODY/TFOOT/THEAD. --- NEW FILE: ParagraphTag.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2004 Somik Raha // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/ParagraphTag.java,v $ // $Author: ian_macfarlane $ // $Date: 2005/10/31 16:26:11 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.tags; /** * A paragraph (p) tag. */ public class ParagraphTag extends CompositeTag { /** * The set of names handled by this tag. */ private static final String[] mIds = new String[] {"P"}; /** * The set of tag names that indicate the end of this tag. */ private static final String[] mEnders = new String[] {"ADDRESS", "BLOCKQUOTE", "CENTER", "DD", "DIR", "DIV", "DL", "DT", "FIELDSET", "FORM", "H1", "H2", "H3", "H4", "H5", "H6", "HR", "ISINDEX", "LI", "MENU", "NOFRAMES", "OL", "P", "PARAM", "PRE", "UL"}; /** * The set of end tag names that indicate the end of this tag. */ private static final String[] mEndTagEnders = new String[] {"BODY", "HTML"}; /** * Create a new p tag. */ public ParagraphTag () { } /** * Return the set of names handled by this tag. * @return The names to be matched that create tags of this type. */ public String[] getIds () { return (mIds); } /** * Return the set of tag names that cause this tag to finish. * @return The names of following tags that stop further scanning. */ public String[] getEnders () { return (mEnders); } /** * Return the set of end tag names that cause this tag to finish. * @return The names of following end tags that stop further scanning. */ public String[] getEndTagEnders () { return (mEndTagEnders); } } --- NEW FILE: DefinitionListBullet.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2004 Somik Raha // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/DefinitionListBullet.java,v $ // $Author: ian_macfarlane $ // $Date: 2005/10/31 16:26:11 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.tags; /** * A definition list bullet tag (either DD or DT). */ public class DefinitionListBullet extends CompositeTag { /** * The set of names handled by this tag. */ private static final String[] mIds = new String[] {"DD", "DT"}; /** * The set of end tag names that indicate the end of this tag. */ private static final String[] mEndTagEnders = new String[] {"DL", "BODY", "HTML"}; /** * Create a new bullet tag. */ public DefinitionListBullet () { } /** * Return the set of names handled by this tag. * @return The names to be matched that create tags of this type. */ public String[] getIds () { return (mIds); } /** * Return the set of tag names that cause this tag to finish. * @return The names of following tags that stop further scanning. */ public String[] getEnders () { return (mIds); } /** * Return the set of end tag names that cause this tag to finish. * @return The names of following end tags that stop further scanning. */ public String[] getEndTagEnders () { return (mEndTagEnders); } } --- NEW FILE: DefinitionList.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2004 Somik Raha // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/DefinitionList.java,v $ // $Author: ian_macfarlane $ // $Date: 2005/10/31 16:26:11 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.tags; /** * A definition list tag (dl). */ public class DefinitionList extends CompositeTag { /** * The set of names handled by this tag. */ private static final String[] mIds = new String[] {"DL"}; /** * The set of end tag names that indicate the end of this tag. */ private static final String[] mEndTagEnders = new String[] {"BODY", "HTML"}; /** * Create a new bullet list (ordered or unordered) tag. */ public DefinitionList () { } /** * Return the set of names handled by this tag. * @return The names to be matched that create tags of this type. */ public String[] getIds () { return (mIds); } /** * Return the set of end tag names that cause this tag to finish. * @return The names of following end tags that stop further scanning. */ public String[] getEndTagEnders () { return (mEndTagEnders); } } Index: TableHeader.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/TableHeader.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** TableHeader.java 31 Jul 2004 16:42:34 -0000 1.2 --- TableHeader.java 31 Oct 2005 16:26:11 -0000 1.3 *************** *** 1,82 **** ! // HTMLParser Library $Name$ - A java-based parser for HTML ! // http://sourceforge.org/projects/htmlparser ! // Copyright (C) 2004 Pim Schrama ! // ! // Revision Control Information ! // ! // $Source$ ! // $Author$ ! // $Date$ ! // $Revision$ ! // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. ! // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ! // Lesser General Public License for more details. ! // ! // You should have received a copy of the GNU Lesser General Public ! // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // ! ! package org.htmlparser.tags; ! ! /** ! * A table header tag. ! */ ! public class TableHeader extends CompositeTag ! { ! /** ! * The set of names handled by this tag. ! */ ! private static final String[] mIds = new String[] {"TH"}; ! ! /** ! * The set of tag names that indicate the end of this tag. ! */ ! private static final String[] mEnders = new String[] {"TH", "TR"}; ! ! /** ! * The set of end tag names that indicate the end of this tag. ! */ ! private static final String[] mEndTagEnders = new String[] {"TR", "TABLE"}; ! ! /** ! * Create a new table header tag. ! */ ! public TableHeader () ! { ! } ! ! /** ! * Return the set of names handled by this tag. ! * @return The names to be matched that create tags of this type. ! */ ! public String[] getIds () ! { ! return (mIds); ! } ! ! /** ! * Return the set of tag names that cause this tag to finish. ! * @return The names of following tags that stop further scanning. ! */ ! public String[] getEnders () ! { ! return (mEnders); ! } ! ! /** ! * Return the set of end tag names that cause this tag to finish. ! * @return The names of following end tags that stop further scanning. ! */ ! public String[] getEndTagEnders () ! { ! return (mEndTagEnders); ! } ! } --- 1,84 ---- ! // HTMLParser Library $Name$ - A java-based parser for HTML ! // http://sourceforge.org/projects/htmlparser ! // Copyright (C) 2004 Pim Schrama ! // ! // Revision Control Information ! // ! // $Source$ ! // $Author$ ! // $Date$ ! // $Revision$ ! // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. ! // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ! // Lesser General Public License for more details. ! // ! // You should have received a copy of the GNU Lesser General Public ! // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // ! ! package org.htmlparser.tags; ! ! /** ! ! * A table header tag. ! ! */ ! public class TableHeader extends CompositeTag ! { ! /** ! * The set of names handled by this tag. ! */ ! private static final String[] mIds = new String[] {"TH"}; ! ! /** ! * The set of tag names that indicate the end of this tag. ! */ ! private static final String[] mEnders = new String[] {"TH", "TR", "TBODY", "TFOOT", "THEAD"}; ! ! /** ! * The set of end tag names that indicate the end of this tag. ! */ ! private static final String[] mEndTagEnders = new String[] {"TR", "TBODY", "TFOOT", "THEAD", "TABLE"}; ! ! /** ! * Create a new table header tag. ! */ ! public TableHeader () ! { ! } ! ! /** ! * Return the set of names handled by this tag. ! * @return The names to be matched that create tags of this type. ! */ ! public String[] getIds () ! { ! return (mIds); ! } ! ! /** ! * Return the set of tag names that cause this tag to finish. ! * @return The names of following tags that stop further scanning. ! */ ! public String[] getEnders () ! { ! return (mEnders); ! } ! ! /** ! * Return the set of end tag names that cause this tag to finish. ! * @return The names of following end tags that stop further scanning. ! */ ! public String[] getEndTagEnders () ! { ! return (mEndTagEnders); ! } ! } Index: TableColumn.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/TableColumn.java,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** TableColumn.java 31 Jul 2004 16:42:34 -0000 1.37 --- TableColumn.java 31 Oct 2005 16:26:11 -0000 1.38 *************** *** 40,49 **** * The set of tag names that indicate the end of this tag. */ ! private static final String[] mEnders = new String[] {"TD", "TR"}; /** * The set of end tag names that indicate the end of this tag. */ ! private static final String[] mEndTagEnders = new String[] {"TR", "TABLE"}; /** --- 40,49 ---- * The set of tag names that indicate the end of this tag. */ ! private static final String[] mEnders = new String[] {"TD", "TR", "TBODY", "TFOOT", "THEAD"}; /** * The set of end tag names that indicate the end of this tag. */ ! private static final String[] mEndTagEnders = new String[] {"TR", "TBODY", "TFOOT", "THEAD", "TABLE"}; /** Index: TableRow.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/TableRow.java,v retrieving revision 1.42 retrieving revision 1.43 diff -C2 -d -r1.42 -r1.43 *** TableRow.java 10 Apr 2005 23:20:45 -0000 1.42 --- TableRow.java 31 Oct 2005 16:26:11 -0000 1.43 *************** *** 45,53 **** */ private static final String[] mIds = new String[] {"TR"}; ! /** * The set of end tag names that indicate the end of this tag. */ ! private static final String[] mEndTagEnders = new String[] {"TABLE"}; /** --- 45,58 ---- */ private static final String[] mIds = new String[] {"TR"}; ! ! /** ! * The set of tag names that indicate the end of this tag. ! */ ! private static final String[] mEnders = new String[] {"TBODY", "TFOOT", "THEAD"}; ! /** * The set of end tag names that indicate the end of this tag. */ ! private static final String[] mEndTagEnders = new String[] {"TBODY", "TFOOT", "THEAD", "TABLE"}; /** *************** *** 73,77 **** public String[] getEnders () { ! return (mIds); } --- 78,82 ---- public String[] getEnders () { ! return (mEnders); } --- NEW FILE: HeadingTag.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2004 Somik Raha // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/HeadingTag.java,v $ // $Author: ian_macfarlane $ // $Date: 2005/10/31 16:26:11 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.tags; /** * A heading (h1 - h6) tag. */ public class HeadingTag extends CompositeTag { /** * The set of names handled by this tag. */ private static final String[] mIds = new String[] {"H1", "H2", "H3", "H4", "H5", "H6"}; /** * The set of tag names that indicate the end of this tag. */ private static final String[] mEnders = new String[] {"H1", "H2", "H3", "H4", "H5", "H6", "PARAM"}; /** * The set of end tag names that indicate the end of this tag. */ private static final String[] mEndTagEnders = new String[] {"BODY", "HTML"}; /** * Create a new heading tag. */ public HeadingTag() { } /** * Return the set of names handled by this tag. * @return The names to be matched that create tags of this type. */ public String[] getIds () { return (mIds); } /** * Return the set of tag names that cause this tag to finish. * @return The names of following tags that stop further scanning. */ public String[] getEnders () { return (mEnders); } /** * Return the set of end tag names that cause this tag to finish. * @return The names of following end tags that stop further scanning. */ public String[] getEndTagEnders () { return (mEndTagEnders); } } |
|
From: Ian M. <ian...@us...> - 2005-10-31 16:26:19
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25403/src/org/htmlparser Modified Files: PrototypicalNodeFactory.java Log Message: Added support for P and h1-h6 tags. Added support for definition list tags (dl, dt, dd). Let table row/column tags know when to close if encounter TBODY/TFOOT/THEAD. Index: PrototypicalNodeFactory.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/PrototypicalNodeFactory.java,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** PrototypicalNodeFactory.java 24 Apr 2005 17:48:27 -0000 1.15 --- PrototypicalNodeFactory.java 31 Oct 2005 16:26:11 -0000 1.16 *************** *** 43,46 **** --- 43,48 ---- import org.htmlparser.tags.Bullet; import org.htmlparser.tags.BulletList; + import org.htmlparser.tags.DefinitionList; + import org.htmlparser.tags.DefinitionListBullet; import org.htmlparser.tags.Div; import org.htmlparser.tags.DoctypeTag; *************** *** 48,51 **** --- 50,54 ---- import org.htmlparser.tags.FrameSetTag; import org.htmlparser.tags.FrameTag; + import org.htmlparser.tags.HeadingTag; import org.htmlparser.tags.HeadTag; import org.htmlparser.tags.Html; *************** *** 58,61 **** --- 61,65 ---- import org.htmlparser.tags.ObjectTag; import org.htmlparser.tags.OptionTag; + import org.htmlparser.tags.ParagraphTag; import org.htmlparser.tags.ScriptTag; import org.htmlparser.tags.SelectTag; *************** *** 299,306 **** --- 303,313 ---- registerTag (new Bullet ()); registerTag (new BulletList ()); + registerTag (new DefinitionList ()); + registerTag (new DefinitionListBullet ()); registerTag (new DoctypeTag ()); registerTag (new FormTag ()); registerTag (new FrameSetTag ()); registerTag (new FrameTag ()); + registerTag (new HeadingTag ()); registerTag (new ImageTag ()); registerTag (new InputTag ()); *************** *** 311,314 **** --- 318,322 ---- registerTag (new ObjectTag ()); registerTag (new OptionTag ()); + registerTag (new ParagraphTag ()); registerTag (new ScriptTag ()); registerTag (new SelectTag ()); *************** *** 325,328 **** --- 333,337 ---- registerTag (new HeadTag ()); registerTag (new Html ()); + return (this); |
|
From: Derrick O. <der...@us...> - 2005-10-26 22:01:36
|
Update of /cvsroot/htmlparser/htmlparser/docs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv11299/docs Modified Files: contributors.html Log Message: Incorporate patch #1338534 Support get first/last child, previous/next sibling from Ian Macfarlane. No unit tests. Index: contributors.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/docs/contributors.html,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** contributors.html 19 Sep 2005 02:35:05 -0000 1.19 --- contributors.html 26 Oct 2005 22:01:23 -0000 1.20 *************** *** 396,405 **** </tr> </table> ! <p>Thanks to Keiron McCammon, Martin Hudson, Matthew Buckett, Jamie McCrindle, ! John Derrick, David Andersen, Manuel Polo, Enrico Triolo, Gernot Fricke, ! Nick Burch, Stephen Harrington, Domenico Lordi, Kamen, John Zook, Cheng Jun, ! Mazlan Mat, Rob Shields, Wolfgang Germund, Raj Sharma, Robert Kausch, ! Gordon Deudney, Serge Kruppa, Roger Kjensrud, and Manpreet Singh ! for suggestions, bug reports and feature ideas. <br> <p>Thanks to Jon Gillette for the cool new logo.<br> </body> --- 396,405 ---- </tr> </table> ! <p>Thanks to Ian Macfarlane, Keiron McCammon, Martin Hudson, Matthew Buckett, ! Jamie McCrindle, John Derrick, David Andersen, Manuel Polo, Enrico Triolo, ! Gernot Fricke, Nick Burch, Stephen Harrington, Domenico Lordi, Kamen, ! John Zook, Cheng Jun, Mazlan Mat, Rob Shields, Wolfgang Germund, Raj Sharma, ! Robert Kausch, Gordon Deudney, Serge Kruppa, Roger Kjensrud, ! and Manpreet Singh for suggestions, bug reports and feature ideas. <br> <p>Thanks to Jon Gillette for the cool new logo.<br> </body> |
|
From: Derrick O. <der...@us...> - 2005-10-26 22:01:36
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv11299/src/org/htmlparser Modified Files: Node.java Log Message: Incorporate patch #1338534 Support get first/last child, previous/next sibling from Ian Macfarlane. No unit tests. Index: Node.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Node.java,v retrieving revision 1.53 retrieving revision 1.54 diff -C2 -d -r1.53 -r1.54 *** Node.java 24 Apr 2005 17:48:27 -0000 1.53 --- Node.java 26 Oct 2005 22:01:23 -0000 1.54 *************** *** 205,209 **** */ void setChildren (NodeList children); ! /** * Returns the text of the node. --- 205,237 ---- */ void setChildren (NodeList children); ! ! /** ! * Get the first child of this node. ! * @return The first child in the list of children contained by this node, ! * <code>null</code> otherwise. ! */ ! Node getFirstChild (); ! ! /** ! * Get the last child of this node. ! * @return The last child in the list of children contained by this node, ! * <code>null</code> otherwise. ! */ ! Node getLastChild (); ! ! /** ! * Get the previous sibling to this node. ! * @return The previous sibling to this node if one exists, ! * <code>null</code> otherwise. ! */ ! Node getPreviousSibling (); ! ! /** ! * Get the next sibling to this node. ! * @return The next sibling to this node if one exists, ! * <code>null</code> otherwise. ! */ ! Node getNextSibling (); ! /** * Returns the text of the node. |
|
From: Derrick O. <der...@us...> - 2005-10-26 22:01:36
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv11299/src/org/htmlparser/nodes Modified Files: AbstractNode.java Log Message: Incorporate patch #1338534 Support get first/last child, previous/next sibling from Ian Macfarlane. No unit tests. Index: AbstractNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodes/AbstractNode.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** AbstractNode.java 10 Apr 2005 23:20:44 -0000 1.4 --- AbstractNode.java 26 Oct 2005 22:01:23 -0000 1.5 *************** *** 273,276 **** --- 273,367 ---- this.children = children; } + + /** + * Get the first child of this node. + * @return The first child in the list of children contained by this node, + * <code>null</code> otherwise. + */ + public Node getFirstChild () + { + if (children == null) + return null; + if (children.size() == 0) + return null; + return children.elementAt(0); + } + + /** + * Get the last child of this node. + * @return The last child in the list of children contained by this node, + * <code>null</code> otherwise. + */ + public Node getLastChild () + { + if (children == null) + return null; + int numChildren = children.size(); + if (numChildren == 0) + return null; + return children.elementAt(numChildren - 1); + } + + /** + * Get the previous sibling to this node. + * @return The previous sibling to this node if one exists, + * <code>null</code> otherwise. + */ + public Node getPreviousSibling () + { + Node parentNode = this.getParent(); + if (parentNode == null)//root node + return null; + NodeList siblings = parentNode.getChildren(); + if (siblings == null)//this should actually be an error + return null; + int numSiblings = siblings.size(); + if (numSiblings < 2)//need at least one other node to have a chance of having any siblings + return null; + int positionInParent = -1; + for (int i = 0; i < numSiblings; i++) + { + if (siblings.elementAt(i) == this) + { + positionInParent = i; + break; + } + } + if (positionInParent < 1)//no previous siblings + return null; + return siblings.elementAt(positionInParent - 1); + } + + /** + * Get the next sibling to this node. + * @return The next sibling to this node if one exists, + * <code>null</code> otherwise. + */ + public Node getNextSibling () + { + Node parentNode = this.getParent(); + if (parentNode == null)//root node + return null; + NodeList siblings = parentNode.getChildren(); + if (siblings == null)//this should actually be an error + return null; + int numSiblings = siblings.size(); + if (numSiblings < 2)//need at least one other node to have a chance of having any siblings + return null; + int positionInParent = -1; + for (int i = 0; i < numSiblings; i++) + { + if (siblings.elementAt(i) == this) + { + positionInParent = i; + break; + } + } + if (positionInParent == -1)//this should actually be an error + return null; + if (positionInParent == (numSiblings - 1))//no next sibling + return null; + return siblings.elementAt(positionInParent + 1); + } /** |
|
From: Derrick O. <der...@us...> - 2005-10-25 02:07:02
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv15396/lexer Modified Files: Page.java Log Message: Fix bug 1322686 when illegal charset specified Use current source charset as the default if there is already a source. Index: Page.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v retrieving revision 1.51 retrieving revision 1.52 diff -C2 -d -r1.51 -r1.52 *** Page.java 20 Jun 2005 01:56:32 -0000 1.51 --- Page.java 25 Oct 2005 02:06:46 -0000 1.52 *************** *** 251,255 **** * @see #DEFAULT_CHARSET */ ! public static String getCharset (String content) { final String CHARSET_STRING = "charset"; --- 251,255 ---- * @see #DEFAULT_CHARSET */ ! public String getCharset (String content) { final String CHARSET_STRING = "charset"; *************** *** 257,261 **** String ret; ! ret = DEFAULT_CHARSET; if (null != content) { --- 257,266 ---- String ret; ! if (null == mSource) ! ret = DEFAULT_CHARSET; ! else ! // use existing (possibly supplied) character set: ! // bug #1322686 when illegal charset specified ! ret = mSource.getEncoding (); if (null != content) { |
|
From: Derrick O. <der...@us...> - 2005-10-25 02:07:02
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv15396/tags Modified Files: MetaTag.java Log Message: Fix bug 1322686 when illegal charset specified Use current source charset as the default if there is already a source. Index: MetaTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/MetaTag.java,v retrieving revision 1.39 retrieving revision 1.40 diff -C2 -d -r1.39 -r1.40 *** MetaTag.java 10 Apr 2005 23:20:45 -0000 1.39 --- MetaTag.java 25 Oct 2005 02:06:46 -0000 1.40 *************** *** 147,151 **** if ("Content-Type".equalsIgnoreCase (httpEquiv)) { ! charset = Page.getCharset (getAttribute ("CONTENT")); getPage ().setEncoding (charset); } --- 147,151 ---- if ("Content-Type".equalsIgnoreCase (httpEquiv)) { ! charset = getPage ().getCharset (getAttribute ("CONTENT")); getPage ().setEncoding (charset); } |
|
From: Derrick O. <der...@us...> - 2005-10-25 01:26:18
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8364 Modified Files: InputStreamSource.java Log Message: Fixed bug #1334408 Exception occurs based on string length Changed >= test to > to avoid off-by-one error. Index: InputStreamSource.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/InputStreamSource.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** InputStreamSource.java 19 Sep 2005 02:35:05 -0000 1.8 --- InputStreamSource.java 25 Oct 2005 01:26:09 -0000 1.9 *************** *** 619,623 **** if (null == mStream) throw new IOException ("source is closed"); ! if (offset + length >= mBuffer.length) throw new IOException ("illegal read ahead"); else --- 619,623 ---- if (null == mStream) throw new IOException ("source is closed"); ! if (offset + length > mBuffer.length) throw new IOException ("illegal read ahead"); else |
|
From: Derrick O. <der...@us...> - 2005-09-26 01:01:35
|
Update of /cvsroot/htmlparser/htmlparser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2036 Modified Files: build.xml Log Message: Fix htmlparser target. Index: build.xml =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/build.xml,v retrieving revision 1.80 retrieving revision 1.81 diff -C2 -d -r1.80 -r1.81 *** build.xml 25 Sep 2005 13:23:00 -0000 1.80 --- build.xml 26 Sep 2005 01:01:22 -0000 1.81 *************** *** 521,526 **** </target> ! <!-- Perform the htmlparser integration --><!--,release,sources" --> ! <target name="htmlparser" depends="init" description="create distribution zip file"> <mkdir dir="${distribution}"/> --- 521,526 ---- </target> ! <!-- Perform the htmlparser integration --> ! <target name="htmlparser" depends="init,release,sources" description="create distribution zip file"> <mkdir dir="${distribution}"/> |
|
From: Derrick O. <der...@us...> - 2005-09-25 13:23:15
|
Update of /cvsroot/htmlparser/htmlparser/docs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13518/htmlparser/docs Modified Files: changes.txt release.txt Log Message: Update version to 1.6-20050925. Index: release.txt =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/docs/release.txt,v retrieving revision 1.70 retrieving revision 1.71 diff -C2 -d -r1.70 -r1.71 *** release.txt 14 Jun 2005 10:37:33 -0000 1.70 --- release.txt 25 Sep 2005 13:23:00 -0000 1.71 *************** *** 1,3 **** ! HTMLParser Version 1.5 (Release Build Jun 14, 2005) ********************************************* --- 1,3 ---- ! HTMLParser Version 1.6 (Integration Build Sep 25, 2005) ********************************************* *************** *** 25,28 **** --- 25,42 ---- (vi) this file, readme.txt + Changes since Version 1.5 + ------------------------- + + Refactoring + ----------- + The FilterBean now has a 'recursive' property to control descent through + children when applying filters. + The NodeList class is a little more standard now with a remove(node) method. + Some refactoring to allow the htmllexer jar file to be compiled by gcj. + + Bug Fixes + --------- + #1227213 Particular SCRIPT tags close too late + Changes since Version 1.4 ------------------------- *************** *** 144,147 **** --- 158,166 ---- [35] Enrico Triolo [36] Dave Anderson + [37] Keiron McCammon + [38] Martin Hudson + [39] Matthew Buckett + [40] Jamie McCrindle + [41] John Derrick If you find any bugs, please go to Index: changes.txt =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/docs/changes.txt,v retrieving revision 1.206 retrieving revision 1.207 diff -C2 -d -r1.206 -r1.207 *** changes.txt 14 Jun 2005 10:37:33 -0000 1.206 --- changes.txt 25 Sep 2005 13:23:00 -0000 1.207 *************** *** 16,19 **** --- 16,71 ---- ******************************************************************************* + Integration Build 1.6 - 20050925 + -------------------------------- + + 2005-09-18 22:35 derrickoswald + + * docs/contributors.html, + src/org/htmlparser/lexer/InputStreamSource.java, + src/org/htmlparser/lexer/Lexer.java, + src/org/htmlparser/tests/lexerTests/LexerTests.java: + + Apply patch #1247128 Bug Fix: #1227213 Particular SCRIPT tags close too late + from Keiron McCammon. + + 2005-09-18 19:40 derrickoswald + + * docs/contributors.html, src/org/htmlparser/beans/FilterBean.java, + src/org/htmlparser/filters/LinkRegexFilter.java: + + Add recursive property on FilterBean suggested by Martin Hudson. + + 2005-09-18 19:00 derrickoswald + + * docs/contributors.html, + src/org/htmlparser/tests/utilTests/NodeListTest.java, + src/org/htmlparser/util/NodeList.java: + + Add remove(Node) method and code suggested by Matthew Buckett. + + 2005-06-19 21:56 derrickoswald + + * src/org/htmlparser/: http/ConnectionManager.java, + lexer/Lexer.java, lexer/Page.java, lexer/Stream.java, + tags/CompositeTag.java: + + Optimizations suggested by profiling. + Correction to previous drop: + -- use extractAllNodesThatmatch to replace searchFor(cls) -- + + 2005-06-19 08:01 derrickoswald + + * build.xml, src/org/htmlparser/Parser.java, + src/org/htmlparser/http/ConnectionManager.java, + src/org/htmlparser/http/HttpHeader.java, + src/org/htmlparser/http/package.html, + src/org/htmlparser/util/NodeList.java: + + Changes to allow compilation of htmllexer.jar by gcj. + Move non-JDK1.1 functionality to HttpHeader class. + Unhook NodeList from filters by removing searchFor(cls) - + use keepAllNodesThatMatch(new NodeClassFilter(cls)) instead. + Include classes missing from closure set. + Release Build 1.5 - 20050614 -------------------------------- |
|
From: Derrick O. <der...@us...> - 2005-09-25 13:23:15
|
Update of /cvsroot/htmlparser/htmlparser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13518/htmlparser Modified Files: build.xml Log Message: Update version to 1.6-20050925. Index: build.xml =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/build.xml,v retrieving revision 1.79 retrieving revision 1.80 diff -C2 -d -r1.79 -r1.80 *** build.xml 19 Jun 2005 12:01:13 -0000 1.79 --- build.xml 25 Sep 2005 13:23:00 -0000 1.80 *************** *** 108,112 **** --> <property name="versionMajor" value="1"/> ! <property name="versionMinor" value="5"/> <property name="versionType" value="Integration Build"/> <property name="versionNumber" value="${versionMajor}.${versionMinor}"/> --- 108,112 ---- --> <property name="versionMajor" value="1"/> ! <property name="versionMinor" value="6"/> <property name="versionType" value="Integration Build"/> <property name="versionNumber" value="${versionMajor}.${versionMinor}"/> |
|
From: Derrick O. <der...@us...> - 2005-09-25 13:23:15
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13518/htmlparser/src/org/htmlparser Modified Files: Parser.java Log Message: Update version to 1.6-20050925. Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.107 retrieving revision 1.108 diff -C2 -d -r1.107 -r1.108 *** Parser.java 19 Jun 2005 12:01:13 -0000 1.107 --- Parser.java 25 Sep 2005 13:23:00 -0000 1.108 *************** *** 119,123 **** */ public static final double ! VERSION_NUMBER = 1.5 ; --- 119,123 ---- */ public static final double ! VERSION_NUMBER = 1.6 ; *************** *** 126,130 **** */ public static final String ! VERSION_TYPE = "Release Build" ; --- 126,130 ---- */ public static final String ! VERSION_TYPE = "Integration Build" ; *************** *** 133,137 **** */ public static final String ! VERSION_DATE = "Jun 14, 2005" ; --- 133,137 ---- */ public static final String ! VERSION_DATE = "Sep 25, 2005" ; |
|
From: Derrick O. <der...@us...> - 2005-09-19 02:35:13
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30508/src/org/htmlparser/lexer Modified Files: Lexer.java InputStreamSource.java Log Message: Apply patch #1247128 Bug Fix: #1227213 Particular SCRIPT tags close too late from Keiron McCammon. Index: InputStreamSource.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/InputStreamSource.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** InputStreamSource.java 15 May 2005 11:49:04 -0000 1.7 --- InputStreamSource.java 19 Sep 2005 02:35:05 -0000 1.8 *************** *** 47,51 **** /** * An initial buffer size. ! * Has a default value of {@value}. */ public static int BUFFER_SIZE = 16384; --- 47,51 ---- /** * An initial buffer size. ! * Has a default value of {16384}. */ public static int BUFFER_SIZE = 16384; Index: Lexer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** Lexer.java 20 Jun 2005 01:56:32 -0000 1.40 --- Lexer.java 19 Sep 2005 02:35:05 -0000 1.41 *************** *** 1182,1185 **** --- 1182,1186 ---- char ch; int end; + boolean comment; start = mCursor.getPosition (); *************** *** 1187,1190 **** --- 1188,1193 ---- done = false; quote = 0; + comment = false; + while (!done) { *************** *** 1199,1203 **** break; case '\'': ! if (quotesmart) if (0 == quote) quote = '\''; // enter quoted state --- 1202,1206 ---- break; case '\'': ! if (quotesmart && !comment) if (0 == quote) quote = '\''; // enter quoted state *************** *** 1206,1210 **** break; case '"': ! if (quotesmart) if (0 == quote) quote = '"'; // enter quoted state --- 1209,1213 ---- break; case '"': ! if (quotesmart && !comment) if (0 == quote) quote = '"'; // enter quoted state *************** *** 1232,1240 **** done = true; else if ('/' == ch) ! { ! do ! ch = mPage.getCharacter (mCursor); ! while ((Page.EOF != ch) && ('\n' != ch)); ! } else if ('*' == ch) { --- 1235,1239 ---- done = true; else if ('/' == ch) ! comment = true; else if ('*' == ch) { *************** *** 1254,1257 **** --- 1253,1259 ---- } break; + case '\n': + comment = false; + break; case '<': if (quotesmart) *************** *** 1276,1279 **** --- 1278,1298 ---- state = 2; break; + case '!': + ch = mPage.getCharacter (mCursor); + if (Page.EOF == ch) + done = true; + else if ('-' == ch) + { + ch = mPage.getCharacter (mCursor); + if (Page.EOF == ch) + done = true; + else if ('-' == ch) + state = 3; + else + state = 0; + } + else + state = 0; + break; default: state = 0; *************** *** 1282,1285 **** --- 1301,1305 ---- break; case 2: // </ + comment = false; if (Page.EOF == ch) done = true; *************** *** 1295,1298 **** --- 1315,1337 ---- state = 0; break; + case 3: // <! + comment = false; + if (Page.EOF == ch) + done = true; + else if ('-' == ch) + { + ch = mPage.getCharacter (mCursor); + if (Page.EOF == ch) + done = true; + else if ('-' == ch) + { + ch = mPage.getCharacter (mCursor); + if (Page.EOF == ch) + done = true; + else if ('>' == ch) + state = 0; + } + } + break; default: throw new IllegalStateException ("how the fuck did we get in state " + state); |
|
From: Derrick O. <der...@us...> - 2005-09-19 02:35:13
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30508/src/org/htmlparser/tests/lexerTests Modified Files: LexerTests.java Log Message: Apply patch #1247128 Bug Fix: #1227213 Particular SCRIPT tags close too late from Keiron McCammon. Index: LexerTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/LexerTests.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** LexerTests.java 15 May 2005 11:49:05 -0000 1.25 --- LexerTests.java 19 Sep 2005 02:35:05 -0000 1.26 *************** *** 37,40 **** --- 37,42 ---- import org.htmlparser.Text; import org.htmlparser.lexer.Lexer; + import org.htmlparser.tags.ScriptTag; + import org.htmlparser.tags.StyleTag; import org.htmlparser.tests.ParserTestCase; import org.htmlparser.util.EncodingChangeException; *************** *** 273,277 **** char[] test; ! URL url = new URL ("http://sourceforge.net/projects/htmlparser"); lexer = new Lexer (url.openConnection ()); position = 0; --- 275,279 ---- char[] test; ! URL url = new URL ("http://sourceforge.net"); lexer = new Lexer (url.openConnection ()); position = 0; *************** *** 836,839 **** --- 838,906 ---- } + /** + * See bug #1227213 Particular SCRIPT tags close too late. + */ + public void testCommentInScript () throws ParserException + { + String tag; + String cdata; + String endtag; + String html; + Parser parser; + NodeIterator iterator; + Node node; + + tag = "<script>"; + cdata = "<!--document.write(\"en\");// -->"; + endtag = "</script>"; + html = tag + cdata + endtag; + parser = new Parser (); + parser.setInputHTML (html); + iterator = parser.elements (); + node = iterator.nextNode (); + if (node == null) + fail ("too few nodes"); + else + assertStringEquals ("bad parse", html, node.toHtml()); + assertTrue (node instanceof ScriptTag); + assertStringEquals ("bad cdata", cdata, ((ScriptTag)node).getScriptCode ()); + assertNull ("too many nodes", iterator.nextNode ()); + } + + /** + * See bug #1227213 Particular SCRIPT tags close too late. + * This was actually working prior to the patch, since the + * ScriptScanner didn't use smartquote processing. + * I'm not sure why jwilsonsprings1 said the patch worked + * for him. I can only assume he was mistaken in thinking + * it was the URL that caused the failure. + */ + public void testUrlInStyle () throws ParserException + { + String tag; + String cdata; + String endtag; + String html; + Parser parser; + NodeIterator iterator; + Node node; + + tag = "<style>"; + cdata = ".eSDot {background-image:" + + "url(http://di.image.eshop.msn.com/img/sys/dot.gif)}"; + endtag = "</style>"; + html = tag + cdata + endtag; + parser = new Parser (); + parser.setInputHTML (html); + iterator = parser.elements (); + node = iterator.nextNode (); + if (node == null) + fail ("too few nodes"); + else + assertStringEquals ("bad parse", html, node.toHtml()); + assertTrue (node instanceof StyleTag); + assertStringEquals ("bad cdata", cdata, ((StyleTag)node).getStyleCode ()); + assertNull ("too many nodes", iterator.nextNode ()); + } } |
|
From: Derrick O. <der...@us...> - 2005-09-19 02:35:13
|
Update of /cvsroot/htmlparser/htmlparser/docs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30508/docs Modified Files: contributors.html Log Message: Apply patch #1247128 Bug Fix: #1227213 Particular SCRIPT tags close too late from Keiron McCammon. Index: contributors.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/docs/contributors.html,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** contributors.html 18 Sep 2005 23:40:43 -0000 1.18 --- contributors.html 19 Sep 2005 02:35:05 -0000 1.19 *************** *** 396,404 **** </tr> </table> ! <p>Thanks to Martin Hudson, Matthew Buckett, Jamie McCrindle, John Derrick, ! David Andersen, Manuel Polo, Enrico Triolo, Gernot Fricke, Nick Burch, ! Stephen Harrington, Domenico Lordi, Kamen, John Zook, Cheng Jun, Mazlan Mat, ! Rob Shields, Wolfgang Germund, Raj Sharma, Robert Kausch, Gordon Deudney, ! Serge Kruppa, Roger Kjensrud, and Manpreet Singh for suggestions, bug reports and feature ideas. <br> <p>Thanks to Jon Gillette for the cool new logo.<br> --- 396,404 ---- </tr> </table> ! <p>Thanks to Keiron McCammon, Martin Hudson, Matthew Buckett, Jamie McCrindle, ! John Derrick, David Andersen, Manuel Polo, Enrico Triolo, Gernot Fricke, ! Nick Burch, Stephen Harrington, Domenico Lordi, Kamen, John Zook, Cheng Jun, ! Mazlan Mat, Rob Shields, Wolfgang Germund, Raj Sharma, Robert Kausch, ! Gordon Deudney, Serge Kruppa, Roger Kjensrud, and Manpreet Singh for suggestions, bug reports and feature ideas. <br> <p>Thanks to Jon Gillette for the cool new logo.<br> |
|
From: Derrick O. <der...@us...> - 2005-09-18 23:40:53
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32615/src/org/htmlparser/beans Modified Files: FilterBean.java Log Message: Add recursive property on FilterBean suggested by Martin Hudson. Index: FilterBean.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/FilterBean.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** FilterBean.java 15 May 2005 11:49:03 -0000 1.3 --- FilterBean.java 18 Sep 2005 23:40:44 -0000 1.4 *************** *** 93,96 **** --- 93,103 ---- protected NodeList mNodes; + /** + * The recursion behaviour for elements of the filter array. + * If <code>true</code> the filters are applied recursively. + * @see org.htmlparser.util.NodeList#extractAllNodesThatMatch(NodeFilter, boolean). + */ + protected boolean mRecursive; + /** * Create a FilterBean object. *************** *** 102,105 **** --- 109,113 ---- mFilters = null; mNodes = null; + mRecursive = true; } *************** *** 144,150 **** /** * Apply each of the filters. ! * The first filter is applied to the parser. * Subsequent filters are applied to the output of the prior filter. * @return A list of nodes passed through all filters. * @throws ParserException If an encoding change occurs * or there is some other problem. --- 152,159 ---- /** * Apply each of the filters. ! * The first filter is applied to the output of the parser. * Subsequent filters are applied to the output of the prior filter. * @return A list of nodes passed through all filters. + * If there are no filters, returns the entire page. * @throws ParserException If an encoding change occurs * or there is some other problem. *************** *** 154,167 **** ParserException { NodeList ret; ! ret = new NodeList (); ! ! if (null != getFilters ()) ! for (int i = 0; i < getFilters ().length; i++) ! if (0 == i) ! ret = mParser.parse (getFilters ()[0]); ! else ! ret = ret.extractAllNodesThatMatch (getFilters ()[i]); return (ret); --- 163,174 ---- ParserException { + NodeFilter[] filters; NodeList ret; ! ret = mParser.parse (null); ! filters = getFilters (); ! if (null != filters) ! for (int i = 0; i < filters.length; i++) ! ret = ret.extractAllNodesThatMatch (filters[i], mRecursive); return (ret); *************** *** 409,412 **** --- 416,440 ---- /** + * Get the current recursion behaviour. + * @return The recursion (applies to children, children's children, etc) + * behavior currently being used. + */ + public boolean getRecursive () + { + return (mRecursive); + } + + /** + * Set the recursion behaviour. + * @param recursive If <code>true</code> the + * <code>extractAllNodesThatMatch()</code> call is performed recursively. + * @see org.htmlparser.util.NodeList#extractAllNodesThatMatch(NodeFilter, boolean). + */ + public void setRecursive (boolean recursive) + { + mRecursive = recursive; + } + + /** * Unit test. * @param args Pass arg[0] as the URL to process, |
|
From: Derrick O. <der...@us...> - 2005-09-18 23:40:53
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32615/src/org/htmlparser/filters Modified Files: LinkRegexFilter.java Log Message: Add recursive property on FilterBean suggested by Martin Hudson. Index: LinkRegexFilter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/LinkRegexFilter.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** LinkRegexFilter.java 15 May 2005 11:49:04 -0000 1.3 --- LinkRegexFilter.java 18 Sep 2005 23:40:44 -0000 1.4 *************** *** 68,72 **** mRegex = Pattern.compile (regexPattern); else ! mRegex = Pattern.compile (regexPattern, Pattern.CASE_INSENSITIVE); } --- 68,73 ---- mRegex = Pattern.compile (regexPattern); else ! mRegex = Pattern.compile (regexPattern, ! Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); } |
|
From: Derrick O. <der...@us...> - 2005-09-18 23:40:52
|
Update of /cvsroot/htmlparser/htmlparser/docs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32615/docs Modified Files: contributors.html Log Message: Add recursive property on FilterBean suggested by Martin Hudson. Index: contributors.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/docs/contributors.html,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** contributors.html 18 Sep 2005 23:00:26 -0000 1.17 --- contributors.html 18 Sep 2005 23:40:43 -0000 1.18 *************** *** 396,404 **** </tr> </table> ! <p>Thanks to Matthew Buckett, Jamie McCrindle, John Derrick, David Andersen, ! Manuel Polo, Enrico Triolo, Gernot Fricke, Nick Burch, Stephen Harrington, ! Domenico Lordi, Kamen, John Zook, Cheng Jun, Mazlan Mat, Rob Shields, ! Wolfgang Germund, Raj Sharma, Robert Kausch, Gordon Deudney, Serge Kruppa, ! Roger Kjensrud, and Manpreet Singh for suggestions, bug reports and feature ideas. <br> <p>Thanks to Jon Gillette for the cool new logo.<br> --- 396,404 ---- </tr> </table> ! <p>Thanks to Martin Hudson, Matthew Buckett, Jamie McCrindle, John Derrick, ! David Andersen, Manuel Polo, Enrico Triolo, Gernot Fricke, Nick Burch, ! Stephen Harrington, Domenico Lordi, Kamen, John Zook, Cheng Jun, Mazlan Mat, ! Rob Shields, Wolfgang Germund, Raj Sharma, Robert Kausch, Gordon Deudney, ! Serge Kruppa, Roger Kjensrud, and Manpreet Singh for suggestions, bug reports and feature ideas. <br> <p>Thanks to Jon Gillette for the cool new logo.<br> |
|
From: Derrick O. <der...@us...> - 2005-09-18 23:00:35
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23605/src/org/htmlparser/util Modified Files: NodeList.java Log Message: Add remove(Node) method and code suggested by Matthew Buckett. Index: NodeList.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/NodeList.java,v retrieving revision 1.59 retrieving revision 1.60 diff -C2 -d -r1.59 -r1.60 *** NodeList.java 19 Jun 2005 12:01:14 -0000 1.59 --- NodeList.java 18 Sep 2005 23:00:27 -0000 1.60 *************** *** 35,39 **** import org.htmlparser.visitors.NodeVisitor; ! public class NodeList implements Serializable { private static final int INITIAL_CAPACITY=10; //private static final int CAPACITY_INCREMENT=20; --- 35,40 ---- import org.htmlparser.visitors.NodeVisitor; ! public class NodeList implements Serializable ! { private static final int INITIAL_CAPACITY=10; //private static final int CAPACITY_INCREMENT=20; *************** *** 42,71 **** private int capacity; private int capacityIncrement; - private int numberOfAdjustments; ! public NodeList() { ! size = 0; ! capacity = INITIAL_CAPACITY; ! nodeData = newNodeArrayFor(capacity); ! capacityIncrement = capacity*2; ! numberOfAdjustments = 0; } ! /** * Create a one element node list. * @param node The initial node to add. */ ! public NodeList(Node node) { this (); add (node); } ! ! public void add(Node node) { ! if (size==capacity) ! adjustVectorCapacity(); ! nodeData[size++]=node; } ! /** * Add another node list to this one. --- 43,69 ---- private int capacity; private int capacityIncrement; ! public NodeList () ! { ! removeAll (); } ! /** * Create a one element node list. * @param node The initial node to add. */ ! public NodeList (Node node) { this (); add (node); } ! ! public void add (Node node) ! { ! if (size == capacity) ! adjustVectorCapacity (); ! nodeData[size++] = node; } ! /** * Add another node list to this one. *************** *** 77,192 **** add (list.nodeData[i]); } ! /** * Insert the given node at the head of the list. * @param node The new first element. */ ! public void prepend(Node node) { ! if (size==capacity) ! adjustVectorCapacity(); System.arraycopy (nodeData, 0, nodeData, 1, size); size++; nodeData[0]=node; } ! ! private void adjustVectorCapacity() { capacity += capacityIncrement; capacityIncrement *= 2; Node oldData [] = nodeData; ! nodeData = newNodeArrayFor(capacity); ! System.arraycopy(oldData, 0, nodeData, 0, size); ! numberOfAdjustments++; } ! ! private Node[] newNodeArrayFor(int capacity) { return new Node[capacity]; } ! ! public int size() { return size; } ! ! public Node elementAt(int i) { return nodeData[i]; } ! ! public int getNumberOfAdjustments() { ! return numberOfAdjustments; ! } ! ! public SimpleNodeIterator elements() { ! return new SimpleNodeIterator() { int count = 0; ! ! public boolean hasMoreNodes() { return count < size; } ! ! public Node nextNode() { ! synchronized (NodeList.this) { ! if (count < size) { ! return nodeData[count++]; } ! } ! throw new NoSuchElementException("Vector Enumeration"); } }; } ! ! public Node [] toNodeArray() { ! Node [] nodeArray = newNodeArrayFor(size); ! System.arraycopy(nodeData, 0, nodeArray, 0, size); return nodeArray; } ! ! public void copyToNodeArray(Node[] array) { ! System.arraycopy(nodeData, 0, array, 0, size); } ! ! public String asString() { ! StringBuffer buff = new StringBuffer(); for (int i=0;i<size;i++) ! buff.append(nodeData[i].toPlainTextString()); ! return buff.toString(); } ! /** * Convert this nodelist into the equivalent HTML. - * @deprecated Use {@link #toHtml}. * @return The contents of the list as HTML text. */ ! public String asHtml() { ! return (toHtml ()); } ! /** ! * Convert this nodelist into the equivalent HTML. ! * @return The contents of the list as HTML text. */ ! public String toHtml() { - StringBuffer buff = new StringBuffer(); - for (int i=0;i<size;i++) - buff.append(nodeData[i].toHtml()); - return buff.toString(); - } - - public Node remove(int index) { Node ret; ret = nodeData[index]; ! System.arraycopy(nodeData, index+1, nodeData, index, size-index-1); nodeData[size-1] = null; size--; return (ret); } ! ! public void removeAll() { size = 0; capacity = INITIAL_CAPACITY; ! nodeData = newNodeArrayFor(capacity); ! capacityIncrement = capacity*2; ! numberOfAdjustments = 0; } --- 75,243 ---- add (list.nodeData[i]); } ! /** * Insert the given node at the head of the list. * @param node The new first element. */ ! public void prepend (Node node) { ! if (size == capacity) ! adjustVectorCapacity (); System.arraycopy (nodeData, 0, nodeData, 1, size); size++; nodeData[0]=node; } ! ! private void adjustVectorCapacity () ! { capacity += capacityIncrement; capacityIncrement *= 2; Node oldData [] = nodeData; ! nodeData = newNodeArrayFor (capacity); ! System.arraycopy (oldData, 0, nodeData, 0, size); } ! ! private Node[] newNodeArrayFor (int capacity) ! { return new Node[capacity]; } ! ! public int size () ! { return size; } ! ! public Node elementAt (int i) ! { return nodeData[i]; } ! ! public SimpleNodeIterator elements () ! { ! return new SimpleNodeIterator () ! { int count = 0; ! ! public boolean hasMoreNodes () ! { return count < size; } ! ! public Node nextNode () ! { ! synchronized (NodeList.this) ! { ! if (count < size) ! { ! return nodeData[count++]; ! } } ! throw new NoSuchElementException ("Vector Enumeration"); } }; } ! ! public Node [] toNodeArray () ! { ! Node [] nodeArray = newNodeArrayFor (size); ! System.arraycopy (nodeData, 0, nodeArray, 0, size); return nodeArray; } ! ! public void copyToNodeArray (Node[] array) ! { ! System.arraycopy (nodeData, 0, array, 0, size); } ! ! public String asString () ! { ! StringBuffer buff = new StringBuffer (); for (int i=0;i<size;i++) ! buff.append (nodeData[i].toPlainTextString ()); ! return buff.toString (); } ! /** * Convert this nodelist into the equivalent HTML. * @return The contents of the list as HTML text. */ ! public String toHtml () { ! StringBuffer buff = new StringBuffer (); ! for (int i=0;i<size;i++) ! buff.append (nodeData[i].toHtml ()); ! return buff.toString (); } ! /** ! * Remove the node at index. ! * @param index The index of the node to remove. ! * @return The node that was removed. */ ! public Node remove (int index) { Node ret; + ret = nodeData[index]; ! System.arraycopy (nodeData, index+1, nodeData, index, size - index - 1); nodeData[size-1] = null; size--; + return (ret); } ! ! public void removeAll () ! { size = 0; capacity = INITIAL_CAPACITY; ! nodeData = newNodeArrayFor (capacity); ! capacityIncrement = capacity * 2; ! } ! ! /** ! * Check to see if the NodeList contains the supplied Node. ! * @param node The node to look for. ! * @return True is the Node is in this NodeList. ! */ ! public boolean contains (Node node) ! { ! return (-1 != indexOf (node)); ! } ! ! /** ! * Finds the index of the supplied Node. ! * @param node The node to look for. ! * @return The index of the node in the list or -1 if it isn't found. ! */ ! public int indexOf (Node node) ! { ! int ret; ! ! ret = -1; ! for (int i = 0; (i < size) && (-1 == ret); i++) ! if (nodeData[i].equals (node)) ! ret = i; ! ! return (ret); ! } ! ! /** ! * Remove the supplied Node from the list. ! * @param node The node to remove. ! * @return True if the node was found and removed from the list. ! */ ! public boolean remove (Node node) ! { ! int index; ! boolean ret; ! ! ret = false; ! if (-1 != (index = indexOf (node))) ! { ! remove (index); ! ret = true; ! } ! ! return (ret); } *************** *** 198,205 **** public String toString() { ! StringBuffer text = new StringBuffer(); ! for (int i=0;i<size;i++) ! text.append (nodeData[i]); ! return (text.toString ()); } --- 249,259 ---- public String toString() { ! StringBuffer ret; ! ! ret = new StringBuffer (); ! for (int i = 0; i < size; i++) ! ret.append (nodeData[i]); ! ! return (ret.toString ()); } |
|
From: Derrick O. <der...@us...> - 2005-09-18 23:00:35
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23605/src/org/htmlparser/tests/utilTests Modified Files: NodeListTest.java Log Message: Add remove(Node) method and code suggested by Matthew Buckett. Index: NodeListTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/NodeListTest.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** NodeListTest.java 24 May 2004 16:18:34 -0000 1.27 --- NodeListTest.java 18 Sep 2005 23:00:27 -0000 1.28 *************** *** 52,55 **** --- 52,62 ---- } + public void testOneItemConstructor() { + Node node = createHTMLNodeObject(); + nodeList = new NodeList(node); + assertEquals("Vector Size",1,nodeList.size()); + assertTrue("First Element",node==nodeList.elementAt(0)); + } + public void testAddOneItem() { Node node = createHTMLNodeObject(); *************** *** 82,86 **** createTestDataAndPutInVector(30); assertTestDataCouldBeExtractedFromVector(30); - assertEquals("Number of Adjustments",1,nodeList.getNumberOfAdjustments()); } --- 89,92 ---- *************** *** 88,92 **** createTestDataAndPutInVector(31); assertTestDataCouldBeExtractedFromVector(31); - assertEquals("Number of Adjustments",2,nodeList.getNumberOfAdjustments()); } --- 94,97 ---- *************** *** 94,98 **** createTestDataAndPutInVector(50); assertTestDataCouldBeExtractedFromVector(50); - assertEquals("Number of Adjustments",2,nodeList.getNumberOfAdjustments()); } --- 99,102 ---- *************** *** 100,109 **** createTestDataAndPutInVector(51); assertTestDataCouldBeExtractedFromVector(51); - assertEquals("Number of Adjustments",2,nodeList.getNumberOfAdjustments()); } public void testAddTwoHundredItems() { createTestDataAndPutInVector(200); ! assertEquals("Number of Adjustments",4,nodeList.getNumberOfAdjustments()); } --- 104,112 ---- createTestDataAndPutInVector(51); assertTestDataCouldBeExtractedFromVector(51); } public void testAddTwoHundredItems() { createTestDataAndPutInVector(200); ! assertTestDataCouldBeExtractedFromVector(200); } *************** *** 188,190 **** --- 191,236 ---- assertTrue("Second Element",null==nodeList.elementAt(1)); } + + + public void testIndexOf() { + Node node1 = createHTMLNodeObject(); + Node node2 = createHTMLNodeObject(); + Node node3 = createHTMLNodeObject(); + nodeList.add(node1); + nodeList.add(node2); + nodeList.add(node3); + assertEquals("Vector Size",3,nodeList.size()); + assertTrue("First Element",node1==nodeList.elementAt(0)); + assertTrue("Second Element",node2==nodeList.elementAt(1)); + assertTrue("Third Element",node3==nodeList.elementAt(2)); + assertTrue("Index wrong",1 == nodeList.indexOf(node2)); + assertTrue("Index wrong",0 == nodeList.indexOf(node1)); + assertTrue("Index wrong",2 == nodeList.indexOf(node3)); + } + + public void testRemoveItem() { + Node node1 = createHTMLNodeObject(); + Node node2 = createHTMLNodeObject(); + nodeList.add(node1); + nodeList.add(node2); + assertEquals("Vector Size",2,nodeList.size()); + assertTrue("First Element",node1==nodeList.elementAt(0)); + assertTrue("Second Element",node2==nodeList.elementAt(1)); + nodeList.remove(node1); + assertEquals("List Size",1,nodeList.size()); + assertTrue("First Element",node2==nodeList.elementAt(0)); + } + + public void testRemoveLastItem() { + Node node1 = createHTMLNodeObject(); + Node node2 = createHTMLNodeObject(); + nodeList.add(node1); + nodeList.add(node2); + assertEquals("Vector Size",2,nodeList.size()); + assertTrue("First Element",node1==nodeList.elementAt(0)); + assertTrue("Second Element",node2==nodeList.elementAt(1)); + nodeList.remove(node2); + assertEquals("List Size",1,nodeList.size()); + assertTrue("First Element",node1==nodeList.elementAt(0)); + } } |
|
From: Derrick O. <der...@us...> - 2005-09-18 23:00:34
|
Update of /cvsroot/htmlparser/htmlparser/docs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23605/docs Modified Files: contributors.html Log Message: Add remove(Node) method and code suggested by Matthew Buckett. Index: contributors.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/docs/contributors.html,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** contributors.html 13 May 2005 10:44:15 -0000 1.16 --- contributors.html 18 Sep 2005 23:00:26 -0000 1.17 *************** *** 396,403 **** </tr> </table> ! <p>Thanks to Jamie McCrindle, John Derrick, David Andersen, Manuel Polo, Enrico Triolo, ! Gernot Fricke, Nick Burch, Stephen Harrington, Domenico Lordi, Kamen, ! John Zook, Cheng Jun, Mazlan Mat, Rob Shields, Wolfgang Germund, Raj Sharma, ! Robert Kausch, Gordon Deudney, Serge Kruppa, Roger Kjensrud, and Manpreet Singh for suggestions, bug reports and feature ideas. <br> <p>Thanks to Jon Gillette for the cool new logo.<br> --- 396,404 ---- </tr> </table> ! <p>Thanks to Matthew Buckett, Jamie McCrindle, John Derrick, David Andersen, ! Manuel Polo, Enrico Triolo, Gernot Fricke, Nick Burch, Stephen Harrington, ! Domenico Lordi, Kamen, John Zook, Cheng Jun, Mazlan Mat, Rob Shields, ! Wolfgang Germund, Raj Sharma, Robert Kausch, Gordon Deudney, Serge Kruppa, ! Roger Kjensrud, and Manpreet Singh for suggestions, bug reports and feature ideas. <br> <p>Thanks to Jon Gillette for the cool new logo.<br> |
|
From: Derrick O. <der...@us...> - 2005-06-20 01:56:42
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/http In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12246/htmlparser/src/org/htmlparser/http Modified Files: ConnectionManager.java Log Message: Optimizations suggested by profiling. Correction to previous drop: -- use extractAllNodesThatmatch to replace searchFor(cls) -- Index: ConnectionManager.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/http/ConnectionManager.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** ConnectionManager.java 19 Jun 2005 12:01:13 -0000 1.5 --- ConnectionManager.java 20 Jun 2005 01:56:32 -0000 1.6 *************** *** 143,146 **** --- 143,152 ---- /** + * Cookie expiry date format for parsing. + */ + static protected SimpleDateFormat mFormat = + new SimpleDateFormat ("EEE, dd-MMM-yy kk:mm:ss z"); + + /** * Create a connection manager. */ *************** *** 1056,1064 **** String comma = tokenizer.nextToken (); String rest = tokenizer.nextToken (); - SimpleDateFormat format = new SimpleDateFormat ( - "EEE, dd-MMM-yy kk:mm:ss z"); try { ! Date date = format.parse (value + comma + rest); cookie.setExpiryDate (date); } --- 1062,1068 ---- String comma = tokenizer.nextToken (); String rest = tokenizer.nextToken (); try { ! Date date = mFormat.parse (value + comma + rest); cookie.setExpiryDate (date); } |
|
From: Derrick O. <der...@us...> - 2005-06-20 01:56:42
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12246/htmlparser/src/org/htmlparser/tags Modified Files: CompositeTag.java Log Message: Optimizations suggested by profiling. Correction to previous drop: -- use extractAllNodesThatmatch to replace searchFor(cls) -- Index: CompositeTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/CompositeTag.java,v retrieving revision 1.80 retrieving revision 1.81 diff -C2 -d -r1.80 -r1.81 *** CompositeTag.java 10 Apr 2005 23:20:45 -0000 1.80 --- CompositeTag.java 20 Jun 2005 01:56:32 -0000 1.81 *************** *** 32,38 **** import org.htmlparser.NodeFilter; import org.htmlparser.Text; import org.htmlparser.nodes.AbstractNode; import org.htmlparser.nodes.TagNode; - import org.htmlparser.Tag; import org.htmlparser.scanners.CompositeTagScanner; import org.htmlparser.util.NodeList; --- 32,39 ---- import org.htmlparser.NodeFilter; import org.htmlparser.Text; + import org.htmlparser.Tag; + import org.htmlparser.filters.NodeClassFilter; import org.htmlparser.nodes.AbstractNode; import org.htmlparser.nodes.TagNode; import org.htmlparser.scanners.CompositeTagScanner; import org.htmlparser.util.NodeList; *************** *** 297,303 **** public NodeList searchFor (Class classType, boolean recursive) { ! return ( ! (null == getChildren ()) ? new NodeList () : ! getChildren ().searchFor (classType, recursive)); } --- 298,312 ---- public NodeList searchFor (Class classType, boolean recursive) { ! NodeList children; ! NodeList ret; ! ! children = getChildren (); ! if (null == children) ! ret = new NodeList (); ! else ! ret = children.extractAllNodesThatMatch ( ! new NodeClassFilter (classType), recursive); ! ! return (ret); } |
|
From: Derrick O. <der...@us...> - 2005-06-20 01:56:42
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12246/htmlparser/src/org/htmlparser/lexer Modified Files: Lexer.java Page.java Stream.java Log Message: Optimizations suggested by profiling. Correction to previous drop: -- use extractAllNodesThatmatch to replace searchFor(cls) -- Index: Page.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v retrieving revision 1.50 retrieving revision 1.51 diff -C2 -d -r1.50 -r1.51 *** Page.java 15 May 2005 11:49:04 -0000 1.50 --- Page.java 20 Jun 2005 01:56:32 -0000 1.51 *************** *** 687,699 **** { int i; char ret; i = cursor.getPosition (); ! if (mSource.offset () < i) ! // hmmm, we could skip ahead, but then what about the EOL index ! throw new ParserException ( ! "attempt to read future characters from source " ! + i + " > " + mSource.offset ()); ! else if (mSource.offset () == i) try { --- 687,696 ---- { int i; + int offset; char ret; i = cursor.getPosition (); ! offset = mSource.offset (); ! if (offset == i) try { *************** *** 713,717 **** + cursor.getPosition (), ioe); } ! else { // historic read --- 710,714 ---- + cursor.getPosition (), ioe); } ! else if (offset > i) { // historic read *************** *** 728,731 **** --- 725,733 ---- cursor.advance (); } + else + // hmmm, we could skip ahead, but then what about the EOL index + throw new ParserException ( + "attempt to read future characters from source " + + i + " > " + mSource.offset ()); // handle \r Index: Stream.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Stream.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** Stream.java 15 May 2005 11:49:04 -0000 1.14 --- Stream.java 20 Jun 2005 01:56:32 -0000 1.15 *************** *** 264,270 **** // the array pointed to can only be bigger than the previous buffer, // and hence no array bounds exception can be raised. ! if (0 == available ()) fill (false); ! if (0 != available ()) ret = mBuffer[mOffset++] & 0xff; else --- 264,270 ---- // the array pointed to can only be bigger than the previous buffer, // and hence no array bounds exception can be raised. ! if (0 == (mLevel - mOffset)) // (0 == available ()) fill (false); ! if (0 != (mLevel - mOffset)) // (0 != available ()) ret = mBuffer[mOffset++] & 0xff; else Index: Lexer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v retrieving revision 1.39 retrieving revision 1.40 diff -C2 -d -r1.39 -r1.40 *** Lexer.java 15 May 2005 11:49:04 -0000 1.39 --- Lexer.java 20 Jun 2005 01:56:32 -0000 1.40 *************** *** 1371,1375 **** ConnectionManager manager = Page.getConnectionManager (); lexer = new Lexer (manager.openConnection (args[0])); ! while (null != (node = lexer.nextNode ())) System.out.println (node.toString ()); } --- 1371,1375 ---- ConnectionManager manager = Page.getConnectionManager (); lexer = new Lexer (manager.openConnection (args[0])); ! while (null != (node = lexer.nextNode (false))) System.out.println (node.toString ()); } |