[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer/nodes NodeFactory.java,NONE,1.1 StringNode.java
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-09-28 15:34:56
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv30684/lexer/nodes Modified Files: StringNode.java TagNode.java Added Files: NodeFactory.java Log Message: Lexer Integration Removed old Parser classes. Removed EndTag, this class was replaced by a call to the new isEndTag() method on the Tag class The StringNode, RemarkNode and tags.Tag class now derive from their lexeme counterparts in lexer.nodes instead of the other way around. The beginnings of a node factory interface are included. This was added so the lexer could return 'visitable' nodes to the parser. The parser acts as it's own node factory, as does the Lexer. The node count for parsing goes up in most cases because every whitespace (i.e. newline) now counts as a StringNode. This has whacked out a lot of the tests that were expecting fewer nodes or a certain type of node at a particular index. Attributes now maintain their order and case. The count of attributes also went up because whitespace is maintained within tags too. The storage in a Vector means the element 0 Attribute is actually the name of the tag, rather than having the $TAGNAME entry in a HashTable. --- NEW FILE: NodeFactory.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2003 Derrick Oswald // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/NodeFactory.java,v $ // $Author: derrickoswald $ // $Date: 2003/09/28 15:33:58 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.lexer.nodes; import java.util.Vector; import org.htmlparser.lexer.Lexer; import org.htmlparser.Node; import org.htmlparser.util.ParserException; /** * This interface defines the methods needed to create new nodes. * The factory is used when lexing to generate the nodes passed * back to the caller. */ public interface NodeFactory { /** * Create a new string node. * @param lexer The lexer parsing this string. * @param start The beginning position of the string. * @param end The ending positiong of the string. */ public Node createStringNode (Lexer lexer, int start, int end) throws ParserException; /** * Create a new remark node. * @param lexer The lexer parsing this remark. * @param start The beginning position of the remark. * @param end The ending positiong of the remark. */ public Node createRemarkNode (Lexer lexer, int start, int end) throws ParserException; /** * Create a new tag node. * @param lexer The lexer parsing this tag. * @param start The beginning position of the tag. * @param end The ending positiong of the tag. * @param attributes The attributes contained in this tag. */ public Node createTagNode (Lexer lexer, int start, int end, Vector attributes) throws ParserException; } Index: StringNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/StringNode.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** StringNode.java 22 Sep 2003 02:39:59 -0000 1.7 --- StringNode.java 28 Sep 2003 15:33:57 -0000 1.8 *************** *** 67,79 **** public void setText (String text) { ! try ! { ! mPage = new Page (text); ! nodeBegin = 0; ! nodeEnd = text.length (); ! } ! catch (ParserException pe) ! { ! } } --- 67,73 ---- public void setText (String text) { ! mPage = new Page (text); ! nodeBegin = 0; ! nodeEnd = text.length (); } Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** TagNode.java 23 Sep 2003 03:41:33 -0000 1.11 --- TagNode.java 28 Sep 2003 15:33:57 -0000 1.12 *************** *** 358,362 **** public String getTagName () { ! return (getAttribute (TAGNAME).toUpperCase ()); } --- 358,368 ---- public String getTagName () { ! String ret; ! ! ret = getAttribute (TAGNAME).toUpperCase (); ! if (ret.startsWith ("/")) // end tag ! ret = ret.substring (1); ! ! return (ret); } *************** *** 483,495 **** public void setText (String text) { ! try ! { ! mPage = new Page (text); ! nodeBegin = 0; ! nodeEnd = text.length (); ! } ! catch (ParserException pe) ! { ! } } --- 489,495 ---- public void setText (String text) { ! mPage = new Page (text); ! nodeBegin = 0; ! nodeEnd = text.length (); } *************** *** 536,551 **** public String toString () { ! String tag; Cursor start; Cursor end; ! tag = getTagName (); ! if (tag.startsWith ("/")) ! tag = "End"; else ! tag = "Tag"; start = new Cursor (getPage (), elementBegin ()); end = new Cursor (getPage (), elementEnd ()); ! return (tag + " (" + start.toString () + "," + end.toString () + "): " + getText ()); } --- 536,550 ---- public String toString () { ! String type; Cursor start; Cursor end; ! if (isEndTag ()) ! type = "End"; else ! type = "Tag"; start = new Cursor (getPage (), elementBegin ()); end = new Cursor (getPage (), elementEnd ()); ! return (type + " (" + start.toString () + "," + end.toString () + "): " + getText ()); } *************** *** 557,561 **** public boolean breaksFlow () { ! return (breakTags.containsKey (getText ().toUpperCase ())); } --- 556,560 ---- public boolean breaksFlow () { ! return (breakTags.containsKey (getTagName ().toUpperCase ())); } *************** *** 581,600 **** } - /** - * Sometimes, a scanner may need to request a re-evaluation of the - * attributes in a tag. This may happen when there is some correction - * activity. An example of its usage can be found in ImageTag. - * <br> - * <B>Note:<B> This is an intensive task, hence call only when - * really necessary - * @return Hashtable - */ - public Hashtable redoParseAttributes () - { - mAttributes = null; - getAttributesEx (); - return (getAttributes ()); - } - public void accept (Object visitor) { --- 580,583 ---- *************** *** 621,623 **** --- 604,610 ---- } + public boolean isEndTag () + { + return ('/' == getAttribute (TAGNAME).toUpperCase ().charAt (0)); + } } |