[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer/nodes Attribute.java,1.10,1.11 TagNode.java,1.1
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-10-13 21:48:52
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv16902/lexer/nodes Modified Files: Attribute.java TagNode.java Log Message: Eliminated ParserHelper static class. Add fixAttributes() to handle bad tags. Provide for more than just an equals sign between the attribute name and the value. Unquote the values in getAttributes() hashtable. Fixed a bug regarding factory creation in script scanner. Returned temporaryFailures classes to servicability. Skip JSP testing, fix tests broken because of unquoted attribute values. Some JavaDoc cleanup. Index: Attribute.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/Attribute.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** Attribute.java 5 Oct 2003 13:49:42 -0000 1.10 --- Attribute.java 13 Oct 2003 21:48:12 -0000 1.11 *************** *** 37,54 **** /** * An attribute within a tag. ! * <p>If Name is null, it is whitepace and Value has the text. ! * <p>If Name is not null, and Value is null it's a standalone attribute. ! * <p>If Name is not null, and Value is "", and Quote is zero it's an empty attribute. ! * <p>If Name is not null, and Value is "", and Quote is ' it's an empty single quoted attribute. ! * <p>If Name is not null, and Value is "", and Quote is " it's an empty double quoted attribute. ! * <p>If Name is not null, and Value is something, and Quote is zero it's a naked attribute. ! * <p>If Name is not null, and Value is something, and Quote is ' it's a single quoted attribute. ! * <p>If Name is not null, and Value is something, and Quote is " it's a double quoted attribute. ! * <p>All other states are illegal. * <p> * The attribute can be 'lazy loaded' by providing the page and cursor offsets ! * into the page for the name and value. In this case if the starting offset is ! * less than zero, the element is null. This is done for speed, since if the name ! * and value are not been needed we can avoid the cost of creating the strings. */ public class Attribute --- 37,56 ---- /** * An attribute within a tag. ! * <br>If Name is null, it is whitepace and Value has the text. ! * <br>If Name is not null, and Value is null it's a standalone attribute. ! * <br>If Name is not null, and Value is "", and Quote is zero it's an empty attribute. ! * <br>If Name is not null, and Value is "", and Quote is ' it's an empty single quoted attribute. ! * <br>If Name is not null, and Value is "", and Quote is " it's an empty double quoted attribute. ! * <br>If Name is not null, and Value is something, and Quote is zero it's a naked attribute. ! * <br>If Name is not null, and Value is something, and Quote is ' it's a single quoted attribute. ! * <br>If Name is not null, and Value is something, and Quote is " it's a double quoted attribute. ! * <br>All other states are illegal. * <p> * The attribute can be 'lazy loaded' by providing the page and cursor offsets ! * into the page for the name and value. In this case if the starting offset of ! * the name is less than zero, the name is null, and if the ending offset of the ! * value is less than zero, the value is null.. This is done for speed, since ! * if the name and value are not been needed we can avoid the cost and memory ! * overhead of creating the strings. */ public class Attribute *************** *** 176,180 **** return (null == getName ()); } ! /** * Get the value of the attribute. --- 178,212 ---- return (null == getName ()); } ! ! /** ! * Predicate to determine if this attribute has no equals sign (or value). ! * @return <code>true</code> if this attribute is a standalone attribute. ! * <code>false</code> if has an equals sign. ! */ ! public boolean isStandAlone () ! { ! return (-1 == mValueStart); ! } ! ! /** ! * Predicate to determine if this attribute has an equals sign but no value. ! * @return <code>true</code> if this attribute is an empty attribute. ! * <code>false</code> if has an equals sign and a value. ! */ ! public boolean isEmpty () ! { ! return ((-1 != mValueStart) && (-1 == mValueEnd)); ! } ! ! /** ! * Predicate to determine if this attribute has a value. ! * @return <code>true</code> if this attribute has a value. ! * <code>false</code> if it is empty or standalone. ! */ ! public boolean isValued () ! { ! return ((-1 != mValueStart) && (-1 != mValueEnd)); ! } ! /** * Get the value of the attribute. *************** *** 187,191 **** { if (null == mValue) ! if (0 <= mValueStart) mValue = mPage.getText (mValueStart, mValueEnd); return (mValue); --- 219,223 ---- { if (null == mValue) ! if ((null != mPage) && (0 <= mValueEnd)) mValue = mPage.getText (mValueStart, mValueEnd); return (mValue); *************** *** 229,233 **** if (null == mValue) { ! if (0 <= mValueStart) { if (0 != (quote = getQuote ())) --- 261,265 ---- if (null == mValue) { ! if (0 <= mValueEnd) { if (0 != (quote = getQuote ())) *************** *** 259,262 **** --- 291,352 ---- /** + * Set the quote surrounding the value of the attribute. + * @param quote The new quote value. + */ + public void setQuote (char quote) + { + mQuote = quote; + } + + public Page getPage () + { + return (mPage); + } + + public int getNameStartPosition () + { + return (mNameStart); + } + + public void setNameStartPosition (int start) + { + mNameStart = start; + mName = null; + } + + public int getNameEndPosition () + { + return (mNameEnd); + } + + public void setNameEndPosition (int end) + { + mNameEnd = end; + mName = null; + } + + public int getValueStartPosition () + { + return (mValueStart); + } + + public void setValueStartPosition (int start) + { + mValueStart = start; + mValue = null; + } + + public int getValueEndPosition () + { + return (mValueEnd); + } + + public void setValueEndPosition (int end) + { + mValueEnd = end; + mValue = null; + } + + /** * Get a text representation of this attribute. * Suitable for insertion into a start tag, the output is one of *************** *** 266,272 **** * value * name ! * name= value ! * name= 'value' ! * name= "value" * </pre> * </code> --- 356,362 ---- * value * name ! * name=value ! * name='value' ! * name="value" * </pre> * </code> *************** *** 284,291 **** buffer.append (name); if (0 <= mValueStart) ! { ! buffer.append ("="); getRawValue (buffer); - } } } --- 374,383 ---- buffer.append (name); if (0 <= mValueStart) ! if (null == mPage) ! buffer.append ("="); ! else ! mPage.getText (buffer, mNameEnd, mValueStart - (0 == getQuote () ? 0 : 1)); ! if (0 <= mValueEnd) getRawValue (buffer); } } *************** *** 317,321 **** if (null != value) { ! length += 1; length += value.length (); if (0 != getQuote ()) --- 409,416 ---- if (null != value) { ! if (null == mPage) ! length += 1; ! else ! length += mValueStart - (0 == getQuote () ? 1 : 0) - mNameEnd; length += value.length (); if (0 != getQuote ()) Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** TagNode.java 5 Oct 2003 13:49:43 -0000 1.15 --- TagNode.java 13 Oct 2003 21:48:12 -0000 1.16 *************** *** 302,309 **** /** * Gets the attributes in the tag. ! * NOTE: Values of the extended hashtable are two element arrays of String, ! * with the first element being the original name (not uppercased), ! * and the second element being the value. ! * @return Returns a special hashtable of attributes in two element String arrays. */ public Vector getAttributesEx () --- 302,306 ---- /** * Gets the attributes in the tag. ! * @return Returns the list of {@link Attribute Attributes} in the tag. */ public Vector getAttributesEx () *************** *** 314,318 **** /** * Gets the attributes in the tag. ! * @return Returns a Hashtable of attributes. */ public Hashtable getAttributes () --- 311,324 ---- /** * Gets the attributes in the tag. ! * This is not the preferred method to get attributes, see {@link ! * #getAttributesEx getAttributesEx} which returns a list of {@link ! * Attribute} objects, which offer more information than the simple ! * <code>String</code> objects available from this <code>Hashtable</code>. ! * @return Returns a list of name/value pairs representing the attributes. ! * These are not in order, the keys (names) are capitalized and the values ! * are not quoted, even if they need to be. The table <em>will</em> return ! * <code>null</code> if there was no value for an attribute (no equals ! * sign or nothing to the right of the equals sign). A special entry with ! * a key of SpecialHashtable.TAGNAME ("$<TAGNAME>$") holds the tag name. */ public Hashtable getAttributes () *************** *** 337,348 **** if (!attribute.isWhitespace ()) { ! if (0 != attribute.getQuote ()) ! value = attribute.getRawValue (); ! else ! { ! value = attribute.getValue (); ! if ((null != value) && value.equals ("")) ! value = SpecialHashtable.NOTHING; ! } if (null == value) value = SpecialHashtable.NULLVALUE; --- 343,349 ---- if (!attribute.isWhitespace ()) { ! value = attribute.getValue (); ! if (attribute.isEmpty ()) ! value = SpecialHashtable.NOTHING; if (null == value) value = SpecialHashtable.NULLVALUE; *************** *** 689,693 **** * Set this tag to be an empty xml node, or not. * Adds or removes an ending slash on the tag. ! * @param If true, ensures there is an ending slash in the node, * i.e. <tag/>, otherwise removes it. */ --- 690,694 ---- * Set this tag to be an empty xml node, or not. * Adds or removes an ending slash on the tag. ! * @param emptyXmlTag If true, ensures there is an ending slash in the node, * i.e. <tag/>, otherwise removes it. */ |