[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer/nodes Attribute.java,1.8,1.9 TagNode.java,1.12,
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-10-02 23:49:03
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv28867/lexer/nodes Modified Files: Attribute.java TagNode.java Log Message: Moved SpecialHashTable to util. Fixed some attribute bugs and some test cases. Index: Attribute.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/Attribute.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** Attribute.java 22 Sep 2003 02:39:59 -0000 1.8 --- Attribute.java 2 Oct 2003 23:48:53 -0000 1.9 *************** *** 148,156 **** { if (null == mName) ! if (0 <= mNameStart) mName = mPage.getText (mNameStart, mNameEnd); return (mName); } /** * Get the value of the attribute. --- 148,166 ---- { if (null == mName) ! if ((null != mPage) && (0 <= mNameStart)) mName = mPage.getText (mNameStart, mNameEnd); return (mName); } + /** + * Predicate to determine if this attribute is whitespace. + * @return <code>true</code> if this attribute is whitespace, + * <code>false</code> if it is a real attribute. + */ + public boolean isWhitespace () + { + return (null == getName ()); + } + /** * Get the value of the attribute. Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** TagNode.java 28 Sep 2003 15:33:57 -0000 1.12 --- TagNode.java 2 Oct 2003 23:48:53 -0000 1.13 *************** *** 32,68 **** import java.util.Hashtable; import java.util.Vector; - import org.htmlparser.lexer.Cursor; import org.htmlparser.lexer.Page; - import org.htmlparser.parserHelper.SpecialHashtable; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; import org.htmlparser.util.Translate; /** ! * TagNode represents a generic tag. This class allows users to register specific ! * tag scanners, which can identify links, or image references. This tag asks the ! * scanners to run over the text, and identify. It can be used to dynamically ! * configure a parser. ! * @author Kaarle Kaila 23.10.2001 */ ! public class TagNode extends AbstractNode { ! public static final String TYPE = "TAG"; ! /** ! * Constant used as value for the value of the tag name ! * in parseParameters (Kaarle Kaila 3.8.2001) ! */ ! public final static String TAGNAME = "$<TAGNAME>$"; ! public final static String EMPTYTAG = "$<EMPTYTAG>$"; ! public final static String NULLVALUE = "$<NULL>$"; ! public final static String NOTHING = "$<NOTHING>$"; ! private final static String EMPTY_STRING=""; ! ! private boolean emptyXmlTag = false; /** * The tag attributes. ! * Objects of type Attribute. */ protected Vector mAttributes; --- 32,57 ---- import java.util.Hashtable; import java.util.Vector; + import org.htmlparser.lexer.Cursor; + import org.htmlparser.lexer.Lexer; import org.htmlparser.lexer.Page; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; + import org.htmlparser.util.SpecialHashtable; import org.htmlparser.util.Translate; /** ! * TagNode represents a generic tag. ! * */ ! public class TagNode ! extends ! AbstractNode { ! private boolean emptyXmlTag; /** * The tag attributes. ! * Objects of type {@link Attribute}. */ protected Vector mAttributes; *************** *** 108,111 **** --- 97,108 ---- /** + * Create an empty tag. + */ + public TagNode () + { + this (null, -1, -1, new Vector ()); + } + + /** * Create a tag with the location and attributes provided * @param page The page this tag was read from. *************** *** 119,131 **** super (page, start, end); mAttributes = attributes; ! } ! ! /** ! * Create an empty tag. ! */ ! public TagNode () ! { ! super (null, -1, -1); ! mAttributes = new Vector (); } --- 116,120 ---- super (page, start, end); mAttributes = attributes; ! emptyXmlTag = false; } *************** *** 138,165 **** public String getAttribute (String name) { - Vector attributes; - int size; Attribute attribute; - String string; String ret; ret = null; ! attributes = getAttributesEx (); ! if (name.equalsIgnoreCase (TAGNAME)) ! ret = ((Attribute)attributes.elementAt (0)).getName (); else { ! size = attributes.size (); ! for (int i = 1; i < size; i++) ! { ! attribute = (Attribute)attributes.elementAt (i); ! string = attribute.getName (); ! if ((null != string) && name.equalsIgnoreCase (string)) ! { ! ret = attribute.getValue (); ! i = size; // exit fast ! } ! } } --- 127,142 ---- public String getAttribute (String name) { Attribute attribute; String ret; ret = null; ! if (name.equalsIgnoreCase (SpecialHashtable.TAGNAME)) ! ret = ((Attribute)getAttributesEx ().elementAt (0)).getName (); else { ! attribute = getAttributeEx (name); ! if (null != attribute) ! ret = attribute.getValue (); } *************** *** 243,246 **** --- 220,255 ---- /** + * Returns the attribute with the given name. + * @param name Name of attribute, case insensitive. + * @return The attribute or null if it does + * not exist. + */ + public Attribute getAttributeEx (String name) + { + Vector attributes; + int size; + Attribute attribute; + String string; + Attribute ret; + + ret = null; + + attributes = getAttributesEx (); + size = attributes.size (); + for (int i = 0; i < size; i++) + { + attribute = (Attribute)attributes.elementAt (i); + string = attribute.getName (); + if ((null != string) && name.equalsIgnoreCase (string)) + { + ret = attribute; + i = size; // exit fast + } + } + + return (ret); + } + + /** * Set an attribute. * This replaces an attribute of the same name. *************** *** 252,255 **** --- 261,265 ---- boolean replaced; Vector attributes; + int length; String name; Attribute test; *************** *** 258,262 **** replaced = false; attributes = getAttributesEx (); ! if (0 < attributes.size ()) { name = attribute.getName (); --- 268,273 ---- replaced = false; attributes = getAttributesEx (); ! length = attributes.size (); ! if (0 < length) { name = attribute.getName (); *************** *** 274,278 **** --- 285,294 ---- } if (!replaced) + { + // add whitespace between attributes + if (!((Attribute)attributes.elementAt (length - 1)).isWhitespace ()) + attributes.addElement (new Attribute ((String)null, " ", (char)0)); attributes.addElement (attribute); + } } *************** *** 296,300 **** public Vector getAttributesEx () { ! return mAttributes; } --- 312,316 ---- public Vector getAttributesEx () { ! return (mAttributes); } *************** *** 317,326 **** // special handling for the node name attribute = (Attribute)attributes.elementAt (0); ! ret.put (TAGNAME, attribute.getName ().toUpperCase ()); // the rest for (int i = 1; i < attributes.size (); i++) { attribute = (Attribute)attributes.elementAt (i); ! if (null != attribute.getName ()) { if (0 != attribute.getQuote ()) --- 333,342 ---- // special handling for the node name attribute = (Attribute)attributes.elementAt (0); ! ret.put (SpecialHashtable.TAGNAME, attribute.getName ().toUpperCase ()); // the rest for (int i = 1; i < attributes.size (); i++) { attribute = (Attribute)attributes.elementAt (i); ! if (!attribute.isWhitespace ()) { if (0 != attribute.getQuote ()) *************** *** 330,337 **** value = attribute.getValue (); if ((null != value) && value.equals ("")) ! value = NOTHING; } if (null == value) ! value = NULLVALUE; ret.put (attribute.getName ().toUpperCase (), value); } --- 346,353 ---- value = attribute.getValue (); if ((null != value) && value.equals ("")) ! value = SpecialHashtable.NOTHING; } if (null == value) ! value = SpecialHashtable.NULLVALUE; ret.put (attribute.getName ().toUpperCase (), value); } *************** *** 339,343 **** } else ! ret.put (TAGNAME, ""); return (ret); --- 355,359 ---- } else ! ret.put (SpecialHashtable.TAGNAME, ""); return (ret); *************** *** 348,356 **** * <p> * <em> ! * Note: This value is converted to uppercase. ! * To get at the original case version of the tag name use: ! * <pre> ! * getAttribute (TagNode.TAGNAME); ! * </pre> * </em> * @return The tag name. --- 364,371 ---- * <p> * <em> ! * Note: This value is converted to uppercase and does not ! * begin with "/" if it is an end tag. ! * To get at the original text of the tag name use ! * {@link #getRawTagName getRawTagName()}. * </em> * @return The tag name. *************** *** 358,366 **** public String getTagName () { String ret; ! ret = getAttribute (TAGNAME).toUpperCase (); ! if (ret.startsWith ("/")) // end tag ! ret = ret.substring (1); return (ret); --- 373,411 ---- public String getTagName () { + Vector attributes; String ret; ! ret = null; ! ! attributes = getAttributesEx (); ! if (0 != attributes.size ()) ! { ! ret = getRawTagName (); ! if (null != ret) ! { ! ret = ret.toUpperCase (); ! if (ret.startsWith ("/")) ! ret = ret.substring (1); ! } ! } ! ! return (ret); ! } ! ! /** ! * Return the name of this tag. ! * @return The tag name or null if this tag contains nothing or only ! * whitespace. ! */ ! public String getRawTagName () ! { ! Vector attributes; ! String ret; ! ! ret = null; ! ! attributes = getAttributesEx (); ! if (0 != attributes.size ()) ! ret = ((Attribute)attributes.elementAt (0)).getName (); return (ret); *************** *** 401,405 **** public String getText () { ! return (mPage.getText (elementBegin () + 1, elementEnd () - 1)); } --- 446,456 ---- public String getText () { ! String ret; ! ! //ret = mPage.getText (elementBegin () + 1, elementEnd () - 1); ! ret = toHtml (); ! ret = ret.substring (1, ret.length () - 1); ! ! return (ret); } *************** *** 433,438 **** else quote = (char)0; ! attribute = new Attribute (key, value, quote); ! att.addElement (attribute); } this.mAttributes = att; --- 484,500 ---- else quote = (char)0; ! if (key.equals (SpecialHashtable.TAGNAME)) ! { ! attribute = new Attribute (value, null, quote); ! att.insertElementAt (attribute, 0); ! } ! else ! { ! // add whitespace between attributes ! attribute = new Attribute ((String)null, " ", (char)0); ! att.addElement (attribute); ! attribute = new Attribute (key, value, quote); ! att.addElement (attribute); ! } } this.mAttributes = att; *************** *** 489,500 **** public void setText (String text) { ! mPage = new Page (text); ! nodeBegin = 0; ! nodeEnd = text.length (); } public String toPlainTextString () { ! return (EMPTY_STRING); } --- 551,580 ---- public void setText (String text) { ! Lexer lexer; ! TagNode output; ! ! lexer = new Lexer (text); ! try ! { ! output = (TagNode)lexer.nextNode (); ! mPage = output.getPage (); ! nodeBegin = output.elementBegin (); ! nodeEnd = output.elementEnd (); ! mAttributes = output.getAttributesEx (); ! } ! catch (ParserException pe) ! { ! throw new IllegalArgumentException (pe.getMessage ()); ! } } + /** + * Get the plain text from this node. + * @return An empty string (tag contents do not display in a browser). + * If you want this tags HTML equivalent, use {@link #toHtml toHtml()}. + */ public String toPlainTextString () { ! return (""); } *************** *** 584,592 **** } - public String getType () - { - return TYPE; - } - /** * Is this an empty xml tag of the form<br> --- 664,667 ---- *************** *** 604,610 **** } public boolean isEndTag () { ! return ('/' == getAttribute (TAGNAME).toUpperCase ().charAt (0)); } } --- 679,693 ---- } + /** + * Predicate to determine if this tag is an end tag (i.e. </HTML>). + * @return <code>true</code> if this tag is an end tag. + */ public boolean isEndTag () { ! String raw; ! ! raw = getRawTagName (); ! ! return ((null == raw) ? false : ('/' == raw.charAt (0))); } } |