[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer/nodes Attribute.java,1.4,1.5 TagNode.java,1.6,1
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-09-07 21:29:02
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv25784 Modified Files: Attribute.java TagNode.java Log Message: Fix setAttribute and optimize getAttribute for speed. Index: Attribute.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/Attribute.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** Attribute.java 24 Aug 2003 21:59:41 -0000 1.4 --- Attribute.java 7 Sep 2003 21:28:03 -0000 1.5 *************** *** 37,41 **** /** * An attribute within a tag. ! * <p>If Name is null, it's whitepace and Value has the text. * <p>If Name is not null, and Value is null it's a standalone attribute. * <p>If Name is not null, and Value is "", and Quote is zero it's an empty attribute. --- 37,41 ---- /** * An attribute within a tag. ! * <p>If Name is null, it is whitepace and Value has the text. * <p>If Name is not null, and Value is null it's a standalone attribute. * <p>If Name is not null, and Value is "", and Quote is zero it's an empty attribute. *************** *** 156,159 **** --- 156,160 ---- * Get the value of the attribute. * The part after the equals sign, or the text if it's just a whitepace 'attribute'. + * <em>NOTE: This does not include any quotes that may have enclosed the value.</em> * @return The value, or <code>null</code> if it's a stand-alone attribute, * or the text if it's just a whitepace 'attribute'. *************** *** 168,171 **** --- 169,228 ---- /** + * Get the raw value of the attribute. + * The part after the equals sign, or the text if it's just a whitepace 'attribute'. + * @return The value, or <code>null</code> if it's a stand-alone attribute, + * or the text if it's just a whitepace 'attribute'. + */ + public String getRawValue () + { + char quote; + StringBuffer buffer; + String ret; + + ret = getValue (); + if (null != ret && (0 != (quote = getQuote ()))) + { + buffer = new StringBuffer (ret.length() + 2); + buffer.append (quote); + buffer.append (ret); + buffer.append (quote); + ret = buffer.toString (); + } + + return (ret); + } + + /** + * Get the raw value of the attribute. + * The part after the equals sign, or the text if it's just a whitepace 'attribute'. + * @return The value, or <code>null</code> if it's a stand-alone attribute, + * or the text if it's just a whitepace 'attribute'. + */ + public void getRawValue (StringBuffer buffer) + { + char quote; + + if (null == mValue) + { + if (0 <= mValueStart) + { + if (0 != (quote = getQuote ())) + buffer.append (quote); + mPage.getText (buffer, mValueStart, mValueEnd); + if (0 != quote) + buffer.append (quote); + } + } + else + { + if (0 != (quote = getQuote ())) + buffer.append (quote); + buffer.append (mValue); + if (0 != quote) + buffer.append (quote); + } + } + + /** * Get the quote, if any, surrounding the value of the attribute, if any. * @return Either ' or " if the attribute value was quoted, or zero *************** *** 194,218 **** public void toString (StringBuffer buffer) { - String value; String name; - value = getValue (); name = getName (); if (null == name) ! { ! if (value != null) ! buffer.append (value); ! } else { buffer.append (name); ! if (null != value) { buffer.append ("="); ! if (0 != getQuote ()) ! buffer.append (getQuote ()); ! buffer.append (value); ! if (0 != getQuote ()) ! buffer.append (getQuote ()); } } --- 251,266 ---- public void toString (StringBuffer buffer) { String name; name = getName (); if (null == name) ! getRawValue (buffer); else { buffer.append (name); ! if (0 <= mValueStart) { buffer.append ("="); ! getRawValue (buffer); } } Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** TagNode.java 3 Sep 2003 23:36:19 -0000 1.6 --- TagNode.java 7 Sep 2003 21:28:03 -0000 1.7 *************** *** 38,41 **** --- 38,42 ---- import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; + import org.htmlparser.util.Translate; /** *************** *** 121,152 **** /** ! * In case the tag is parsed at the scan method this will return value of a ! * parameter not implemented yet ! * @param name of parameter */ public String getAttribute (String name) { ! return ((String)getAttributes().get(name.toUpperCase())); } /** * Set attribute with given key, value pair. ! * @param key ! * @param value */ ! public void setAttribute(String key, String value) { ! getAttributes ().put(key,value); } /** ! * In case the tag is parsed at the scan method this will return value of a ! * parameter not implemented yet ! * @param name of parameter * @deprecated use getAttribute instead */ ! public String getParameter(String name) { ! return (String)getAttributes().get (name.toUpperCase()); } --- 122,293 ---- /** ! * Create a tag with the location and attributes provided ! * @param page The page this tag was read from. ! * @param start The starting offset of this node within the page. ! * @param end The ending offset of this node within the page. ! * @param attributes The list of attributes that were parsed in this tag. ! * @see Attribute ! */ ! public TagNode () ! { ! super (null, -1, -1); ! mAttributes = new Vector (); ! } ! ! /** ! * Returns the value of an attribute. ! * @param name Name of attribute, case insensitive. ! * @return The value associated with the attribute or null if it does ! * not exist, or is a stand-alone or */ public String getAttribute (String name) { ! Vector attributes; ! int size; ! Attribute attribute; ! String string; ! String ret; ! ! ret = null; ! ! attributes = getAttributesEx (); ! if (name.equalsIgnoreCase (TAGNAME)) ! ret = ((Attribute)attributes.elementAt (0)).getName (); ! else ! { ! size = attributes.size (); ! for (int i = 1; i < size; i++) ! { ! attribute = (Attribute)attributes.elementAt (i); ! string = attribute.getName (); ! if ((null != string) && name.equalsIgnoreCase (string)) ! { ! ret = attribute.getValue (); ! i = size; // exit fast ! } ! } ! } ! ! return (ret); } /** * Set attribute with given key, value pair. ! * Figures out a quote character to use if necessary. ! * @param key The name of the attribute. ! * @param value The value of the attribute. */ ! public void setAttribute (String key, String value) { ! char ch; ! boolean needed; ! boolean singleq; ! boolean doubleq; ! String ref; ! StringBuffer buffer; ! char quote; ! ! // first determine if there's whitespace in the value ! // and while we'return at it find a suitable quote character ! needed = false; ! singleq = true; ! doubleq = true; ! for (int i = 0; i < value.length (); i++) ! { ! ch = value.charAt (i); ! if (Character.isWhitespace (ch)) ! needed = true; ! else if ('\'' == ch) ! singleq = false; ! else if ('"' == ch) ! doubleq = false; ! } ! ! // now apply quoting ! if (needed) ! { ! if (doubleq) ! quote = '"'; ! else if (singleq) ! quote = '\''; ! else ! { ! // uh-oh, we need to convert some quotes into character references ! // convert all double quotes into " ! quote = '"'; ! ref = Translate.convertToString (quote); ! // JDK 1.4: value = value.replaceAll ("\"", ref); ! buffer = new StringBuffer (value.length() * 5); ! for (int i = 0; i < value.length (); i++) ! { ! ch = value.charAt (i); ! if ('"' == ch) ! buffer.append (ref); ! else ! buffer.append (ch); ! } ! value = buffer.toString (); ! } ! } ! else ! quote = 0; ! setAttribute (key, value, quote); } /** ! * Set attribute with given key, value pair where the value is quoted by quote. ! * @param key The name of the attribute. ! * @param value The value of the attribute. ! * @param quote The quote character to be used around value. ! * If zero, it is an unquoted value. ! */ ! public void setAttribute (String key, String value, char quote) ! { ! setAttribute (new Attribute (key, value, quote)); ! } ! ! /** ! * Set an attribute. ! * This replaces an attribute of the same name. ! * To set the zeroth attribute (the tag name), use setTagName(). ! * @param attribute The attribute to set. ! */ ! public void setAttribute (Attribute attribute) ! { ! boolean replaced; ! Vector attributes; ! String name; ! Attribute test; ! String test_name; ! ! replaced = false; ! attributes = getAttributesEx (); ! if (0 < attributes.size ()) ! { ! name = attribute.getName (); ! for (int i = 1; i < attributes.size (); i++) ! { ! test = (Attribute)attributes.elementAt (i); ! test_name = test.getName (); ! if (null != test_name) ! if (test_name.equalsIgnoreCase (name)) ! { ! attributes.setElementAt (attribute, i); ! replaced = true; ! } ! } ! } ! if (!replaced) ! attributes.addElement (attribute); ! } ! ! /** ! * Eqivalent to <code>getAttribute (name)</code>. ! * @param name Name of attribute. * @deprecated use getAttribute instead */ ! public String getParameter (String name) { ! return (getAttribute (name)); } *************** *** 158,162 **** * @return Returns a special hashtable of attributes in two element String arrays. */ ! public Vector getAttributesEx() { return mAttributes; --- 299,303 ---- * @return Returns a special hashtable of attributes in two element String arrays. */ ! public Vector getAttributesEx () { return mAttributes; *************** *** 165,171 **** /** * Gets the attributes in the tag. ! * @return Returns a Hashtable of attributes */ ! public Hashtable getAttributes() { Vector attributes; --- 306,312 ---- /** * Gets the attributes in the tag. ! * @return Returns a Hashtable of attributes. */ ! public Hashtable getAttributes () { Vector attributes; *************** *** 188,213 **** if (null != attribute.getName ()) { ! value = attribute.getValue (); ! if ('\'' == attribute.getQuote ()) ! { ! _value = new StringBuffer (value.length () + 2); ! _value.append ("'"); ! _value.append (value); ! _value.append ("'"); ! value = _value.toString (); ! } ! else if ('"' == attribute.getQuote ()) { ! _value = new StringBuffer (value.length () + 2); ! _value.append ("\""); ! _value.append (value); ! _value.append ("\""); ! value = _value.toString (); } - else if ((null != value) && value.equals ("")) - value = NOTHING; if (null == value) value = NULLVALUE; ! ret.put (attribute.getName (), value); } } --- 329,343 ---- if (null != attribute.getName ()) { ! if (0 != attribute.getQuote ()) ! value = attribute.getRawValue (); ! else { ! value = attribute.getValue (); ! if ((null != value) && value.equals ("")) ! value = NOTHING; } if (null == value) value = NULLVALUE; ! ret.put (attribute.getName ().toUpperCase (), value); } } *************** *** 219,230 **** } ! public String getTagName(){ ! return getParameter(TAGNAME); } /** ! * Return the text contained in this tag */ ! public String getText() { return (mPage.getText (elementBegin () + 1, elementEnd () - 1)); --- 349,402 ---- } ! /** ! * Return the name of this tag. ! * <p> ! * <em> ! * Note: This value is converted to uppercase. ! * To get at the original case version of the tag name use: ! * <pre> ! * getAttribute (TagNode.TAGNAME); ! * </pre> ! * </em> ! * @return The tag name. ! */ ! public String getTagName () ! { ! return (getAttribute (TAGNAME).toUpperCase ()); } /** ! * Set the name of this tag. ! * This creates or replaces the first attribute of the tag (the ! * zeroth element of the attribute vector). ! * @param name The tag name. */ ! public void setTagName (String name) ! { ! Attribute attribute; ! Vector attributes; ! Attribute zeroth; ! ! attribute = new Attribute (name, null, (char)0); ! attributes = getAttributesEx (); ! if (0 == attributes.size ()) ! // nothing added yet ! attributes.addElement (attribute); ! else ! { ! zeroth = (Attribute)attributes.elementAt (0); ! // check forn attribute that looks like a name ! if ((null == zeroth.getValue ()) && (0 == zeroth.getQuote ())) ! attributes.setElementAt (attribute, 0); ! else ! attributes.insertElementAt (attribute, 0); ! } ! } ! ! /** ! * Return the text contained in this tag. ! * @return The complete contents of the tag (within the angle brackets). ! */ ! public String getText () { return (mPage.getText (elementBegin () + 1, elementEnd () - 1)); *************** *** 282,287 **** * @param tagBegin The nodeBegin to set */ ! public void setTagBegin(int tagBegin) { ! this.nodeBegin = tagBegin; } --- 454,460 ---- * @param tagBegin The nodeBegin to set */ ! public void setTagBegin (int tagBegin) ! { ! nodeBegin = tagBegin; } *************** *** 290,294 **** * @return The nodeBegin value. */ ! public int getTagBegin() { return (nodeBegin); } --- 463,468 ---- * @return The nodeBegin value. */ ! public int getTagBegin () ! { return (nodeBegin); } *************** *** 298,303 **** * @param tagEnd The nodeEnd to set */ ! public void setTagEnd(int tagEnd) { ! this.nodeEnd = tagEnd; } --- 472,478 ---- * @param tagEnd The nodeEnd to set */ ! public void setTagEnd (int tagEnd) ! { ! nodeEnd = tagEnd; } *************** *** 306,310 **** * @return The nodeEnd value. */ ! public int getTagEnd() { return (nodeEnd); } --- 481,486 ---- * @return The nodeEnd value. */ ! public int getTagEnd () ! { return (nodeEnd); } *************** *** 323,328 **** } ! public String toPlainTextString() { ! return EMPTY_STRING; } --- 499,505 ---- } ! public String toPlainTextString () ! { ! return (EMPTY_STRING); } *************** *** 331,335 **** * @see org.htmlparser.Node#toHtml() */ ! public String toHtml() { StringBuffer ret; --- 508,512 ---- * @see org.htmlparser.Node#toHtml() */ ! public String toHtml () { StringBuffer ret; *************** *** 362,366 **** * Print the contents of the tag */ ! public String toString() { String tag; --- 539,543 ---- * Print the contents of the tag */ ! public String toString () { String tag; *************** *** 395,399 **** * @see org.htmlparser.Node#collectInto(NodeList, String) */ ! public void collectInto(NodeList collectionList, String filter) { } --- 572,576 ---- * @see org.htmlparser.Node#collectInto(NodeList, String) */ ! public void collectInto (NodeList collectionList, String filter) { } *************** *** 404,408 **** * @deprecated This method is deprecated. Use getAttributes() instead. */ ! public Hashtable getParsed() { return getAttributes (); } --- 581,586 ---- * @deprecated This method is deprecated. Use getAttributes() instead. */ ! public Hashtable getParsed () ! { return getAttributes (); } *************** *** 417,421 **** * @return Hashtable */ ! public Hashtable redoParseAttributes() { mAttributes = null; --- 595,599 ---- * @return Hashtable */ ! public Hashtable redoParseAttributes () { mAttributes = null; *************** *** 424,431 **** } ! public void accept(Object visitor) { } ! public String getType() { return TYPE; } --- 602,611 ---- } ! public void accept (Object visitor) ! { } ! public String getType () ! { return TYPE; } *************** *** 436,444 **** * @return boolean */ ! public boolean isEmptyXmlTag() { return emptyXmlTag; } ! public void setEmptyXmlTag(boolean emptyXmlTag) { this.emptyXmlTag = emptyXmlTag; } --- 616,626 ---- * @return boolean */ ! public boolean isEmptyXmlTag () ! { return emptyXmlTag; } ! public void setEmptyXmlTag (boolean emptyXmlTag) ! { this.emptyXmlTag = emptyXmlTag; } |