[Adapdev-commits] Adapdev/src/Adapdev.Web/Html HtmlAttribute.cs,1.3,1.4 HtmlComment.cs,1.3,1.4 HtmlD
Status: Beta
Brought to you by:
intesar66
From: Sean M. <int...@us...> - 2005-11-16 07:02:09
|
Update of /cvsroot/adapdev/Adapdev/src/Adapdev.Web/Html In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv909/src/Adapdev.Web/Html Added Files: HtmlAttribute.cs HtmlComment.cs HtmlDocument.cs HtmlDomainTreeParser.cs HtmlElement.cs HtmlElementClose.cs HtmlHelper.cs HtmlLinearParser.cs HtmlNode.cs HtmlParser.cs HtmlScript.cs HtmlStyleSheet.cs HtmlText.cs SgmlComment.cs Log Message: --- NEW FILE: HtmlParser.cs --- // Copyright Andy Powney 2004. http://powney.demon.co.uk/milhtml.html // Originally published under the GNU Public License. Included in this // library under the Apache License 2.0 with permission from Andy. using System; using System.IO; using System.Text; using System.Web; namespace Adapdev.Web.Html { /// <summary> /// This class is thrown when there is an error parsing the HTML. /// </summary> public class HtmlParserException: Exception { private int mLineNumber; public HtmlParserException(string message,int lineNumber) : base( message ) { mLineNumber = lineNumber; } public override String Message { get { return base.Message + " at line " + mLineNumber; } } } /// <summary> /// This is the base class of any HtmlParser you wish to implement. It handles /// the tag extraction and decoding for you. NB. This class is NOT thread-safe. You /// should not attempt to parse two files at the same time using the same instance /// of a parser. /// </summary> public abstract class HtmlParser { private const int END_OF_FILE = -1; private int mLineNumber; private bool mIgnoreErrors; /// <summary> /// Default constructor only exposed to make inheritence easier. /// </summary> public HtmlParser() { mIgnoreErrors = false; } /// <summary> /// This will parse the given HTML string. /// </summary> /// <param name="htmlString">The sequence of HTML tags to parse</param> /// <returns>The document representing the HTML</returns> public HtmlDocument Parse(String htmlString) { return Parse( new StringReader( htmlString ) ); } /// <summary> /// Implement this method if you want to handle text nodes. /// </summary> /// <param name="document">The destination document</param> /// <param name="htmlText">The node that has been read</param> protected virtual void OnText(HtmlDocument document,HtmlText htmlText) { } /// <summary> /// Implement this method if you want to handle script nodes. /// </summary> /// <param name="document">The destination document</param> /// <param name="htmlText">The node that has been read</param> protected virtual void OnScript(HtmlDocument document,HtmlScript scriptSource) { } /// <summary> /// Implement this method if you want to handle stylesheet nodes. /// </summary> /// <param name="document">The destination document</param> /// <param name="htmlText">The node that has been read</param> protected virtual void OnStyleSheet(HtmlDocument document,HtmlStyleSheet styleSheetSource) { } /// <summary> /// Implement this method if you want to handle open-tag nodes. /// </summary> /// <param name="document">The destination document</param> /// <param name="htmlText">The node that has been read</param> protected virtual void OnElementOpen(HtmlDocument document,HtmlElement element) { } /// <summary> /// Implement this method if you want to handle close-tag nodes. /// </summary> /// <param name="document">The destination document</param> /// <param name="htmlText">The node that has been read</param> protected virtual void OnElementClose(HtmlDocument document,HtmlElementClose element) { } /// <summary> /// Implement this method if you want to handle HTML comment nodes. /// </summary> /// <param name="document">The destination document</param> /// <param name="htmlText">The node that has been read</param> protected virtual void OnComment(HtmlDocument document,HtmlComment comment) { } /// <summary> /// Implement this method if you want to handle SGML comment nodes. /// </summary> /// <param name="document">The destination document</param> /// <param name="htmlText">The node that has been read</param> protected virtual void OnSgmlComment(HtmlDocument document,HtmlComment comment) { } /// <summary> /// The current line number. A parse is taken to start at line one. /// </summary> protected int LineNumber { get { return mLineNumber; } } /// <summary> /// If you choose to ignore errors, no exceptions will be thrown; errors will be silently ignored. /// </summary> public bool IgnoreErrors { get { return mIgnoreErrors; } set { mIgnoreErrors = value; } } /// <summary> /// This will parse the contents of the reader (a stream, for example) /// </summary> /// <param name="reader">The reader from which to read the HTML</param> /// <returns>The resultant document</returns> public HtmlDocument Parse(TextReader reader) { mLineNumber = 1; HtmlDocument document = new HtmlDocument(); int inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; while( inChar != END_OF_FILE ) { if( inChar == '<' ) { HtmlNode node = null; inChar = ParseTag( document , reader , ref node , inChar ); if( node is HtmlElementClose ) { OnElementClose( document , (HtmlElementClose) node ); } else if( node is HtmlElement ) { HtmlElement element = (HtmlElement) node; OnElementOpen( document , element ); if( "SCRIPT".Equals( element.Name.ToUpper() ) && ! element.IsClosed ) { bool ignoreSingleQuotes = true; /* * TODO: Is VBScript the only fella that uses single quotes in a manner other than to quote things...? if( element.Attributes[ "VBSCRIPT" ] == null ) { ignoreSingleQuotes = true; } */ String script = ""; inChar = ExtractScript( reader , ref script , inChar , ignoreSingleQuotes ); OnScript( document , new HtmlScript( script ) ); OnElementClose( document , new HtmlElementClose( element.Name ) ); } else if( "STYLE".Equals( element.Name.ToUpper() ) && ! element.IsClosed ) { String style = ""; inChar = ExtractStyle( reader , ref style , inChar ); OnStyleSheet( document , new HtmlStyleSheet( style ) ); OnElementClose( document , new HtmlElementClose( element.Name ) ); } } else if( node != null ) { if( node is HtmlComment ) { OnComment( document , (HtmlComment) node ); } } } else { HtmlNode node = null; inChar = ParseText( reader , ref node , inChar ); OnText( document , (HtmlText) node ); } } return document; } #region Node extraction private int ParseText(TextReader reader,ref HtmlNode outputNode,int inChar) { StringBuilder input = new StringBuilder(); input.Append( (char) inChar ); inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; while( inChar != END_OF_FILE ) { if( inChar == '<' ) { break; } else { input.Append( (char) inChar ); } inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; } outputNode = new HtmlText( HttpUtility.HtmlDecode( input.ToString() ) ); return inChar; } private int ParseTag(HtmlDocument document,TextReader reader,ref HtmlNode outputNode,int inChar) { inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; if( inChar == '!' ) { // This a comment string text = ""; inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; if( inChar == '-' ) { inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; if( inChar == '-' ) { inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; // Extract HTML comment inChar = ExtractComment( reader , ref text , inChar ); outputNode = new HtmlComment( text ); } else { // Extract SGML comment (probably a DOCTYPE) inChar = ExtractSGMLComment( reader , ref text , inChar ); if( text.Length >= 7 && "DOCTYPE".Equals( text.Substring( 0 , 7 ).ToUpper() ) ) { // We record DOCTYPE with the document (and not as a comment) document.DocumentType = text.Substring( 7 ).Trim(); } else { outputNode = new SgmlComment( text ); } } } else { // Extract SGML comment (probably a DOCTYPE) inChar = ExtractSGMLComment( reader , ref text , inChar ); if( text.Length >= 7 && "DOCTYPE".Equals( text.Substring( 0 , 7 ).ToUpper() ) ) { // We record DOCTYPE with the document (and not as a comment) document.DocumentType = text.Substring( 7 ).Trim(); } else { outputNode = new SgmlComment( text ); } } } else { // This is a tag inChar = SkipWhitespace( reader , inChar ); String name = ""; if( inChar == '/' ) { // This is a close element tag inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; inChar = SkipWhitespace( reader , inChar ); if( inChar != END_OF_FILE ) { inChar = ExtractName( reader , ref name , inChar ); } inChar = SkipWhitespace( reader , inChar ); if( inChar != '>' ) { if( ! IgnoreErrors ) { throw new HtmlParserException( "Tag not properly closed \"" + name + "\"" , mLineNumber ); } } else { inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; } outputNode = new HtmlElementClose( HttpUtility.HtmlDecode( name ) ); } else if( inChar != END_OF_FILE ) { // This is an open element tag inChar = ExtractName( reader , ref name , inChar ); inChar = SkipWhitespace( reader , inChar ); HtmlElement element = new HtmlElement( HttpUtility.HtmlDecode( name ) ); String attrName = ""; String attrValue = ""; while( inChar != END_OF_FILE && inChar != '>' ) { // Read each attribute attributes inChar = SkipWhitespace( reader , inChar ); inChar = ExtractName( reader , ref attrName , inChar ); inChar = SkipWhitespace( reader , inChar ); if( inChar == '=' ) { // Attribute has a value inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; inChar = SkipWhitespace( reader , inChar ); bool isClosedWithSlash = false; inChar = ExtractValue( reader , ref attrValue , inChar , ref isClosedWithSlash ); inChar = SkipWhitespace( reader , inChar ); element.Attributes.Add( new HtmlAttribute( HttpUtility.HtmlDecode( attrName ) , HttpUtility.HtmlDecode( attrValue ) ) ); element.IsClosedWithSlash = isClosedWithSlash; } else if( inChar == '/' ) { // Special examination of "/>" tag terminator inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; if( inChar == '>' ) { element.IsClosedWithSlash = true; } } else { // Null attribute value inChar = SkipWhitespace( reader , inChar ); element.Attributes.Add( new HtmlAttribute( HttpUtility.HtmlDecode( attrName ) , null ) ); } } if( inChar != '>' ) { if( ! IgnoreErrors ) { throw new HtmlParserException( "Tag not closed \"" + name + "\"" , mLineNumber); } } else { inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; } outputNode = element; } } return inChar; } #endregion private int SkipWhitespace(TextReader reader,int inChar) { while( char.IsWhiteSpace( (char) inChar ) ) { inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; } return inChar; } #region Extraction routines private int ExtractScript(TextReader reader,ref string text,int inChar,bool ignoreSingleQuotes) { return ExtractUntil( reader , "</SCRIPT>" , ref text , inChar , "Script not closed" , true , ignoreSingleQuotes ); } private int ExtractStyle(TextReader reader,ref string text,int inChar) { return ExtractUntil( reader , "</STYLE>" , ref text , inChar , "Style not closed" , true , false ); } private int ExtractUntil(TextReader reader,String terminatorText,ref string text,int inChar,string failMessage,bool respectQuotationMarks,bool ignoreSingleQuotes) { StringBuilder comment = new StringBuilder(); char[] terms = terminatorText.ToUpper().ToCharArray(); char[] cTemp = new char [ terms.Length ]; cTemp[ cTemp.Length - 1 ] = (char) inChar; bool inDoubleQuotes = false; bool inSingleQuotes = false; if( inChar == '\"' ) inDoubleQuotes = true; if( inChar == '\'' ) inSingleQuotes = true; while( inChar != END_OF_FILE ) { if( ignoreSingleQuotes ) inSingleQuotes = false; int i = 0; if( ! ( inSingleQuotes | inDoubleQuotes ) ) { for( i = 0 ; i < terms.Length ; i++ ) { if( terms[i] != char.ToUpper( cTemp[i] ) ) break; } } if( i == terms.Length ) break; comment.Append( (char) inChar ); if( ! ( inSingleQuotes | inDoubleQuotes ) ) { for( i = 0 ; i < cTemp.Length - 1 ; i++ ) { cTemp[ i ] = cTemp[ i + 1]; } } else { for( i = 0 ; i < cTemp.Length - 1 ; i++ ) { cTemp[ i ] = '\0'; } } cTemp[ cTemp.Length - 1 ] = (char) inChar; int oldChar = inChar; inChar = reader.Read(); if( inChar == '\"' && ! inSingleQuotes && ! ( oldChar == '\\' ) ) inDoubleQuotes = !inDoubleQuotes; if( inChar == '\'' && ! inDoubleQuotes && ! ( oldChar == '\\' ) ) inSingleQuotes = !inSingleQuotes; if( inChar == '\n' ) mLineNumber++; } text = comment.ToString(); if( text.Length >= terminatorText.Length && text.Substring( text.Length - terminatorText.Length ).ToUpper().Equals( terminatorText.ToUpper() ) ) { text = text.Substring( 0 , text.Length - terminatorText.Length ); } else { if( ! IgnoreErrors ) { throw new HtmlParserException( failMessage , mLineNumber ); } } return inChar; } private int ExtractComment(TextReader reader,ref String text,int inChar) { StringBuilder comment = new StringBuilder(); int c1=0,c2=0; while( inChar != END_OF_FILE ) { if( c1 == '-' && c2 == '-' && inChar == '>' ) { break; } comment.Append( (char) inChar ); c1 = c2; c2 = inChar; inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; } if( comment.Length > 2 ) { text = comment.ToString().Substring( 0 , comment.Length - 2 ); } else { text = ""; } if( inChar == '>' ) { inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; } else { if( ! IgnoreErrors ) { throw new HtmlParserException( "Comment not closed" , mLineNumber ); } } return inChar; } private int ExtractSGMLComment(TextReader reader,ref String text,int inChar) { StringBuilder comment = new StringBuilder(); bool inQuotes = false; while( inChar != END_OF_FILE ) { if( inChar == '>' && ! inQuotes ) { break; } if( inChar == '\"' ) { inQuotes = ! inQuotes; } comment.Append( (char) inChar ); inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; } if( inChar == '>' ) { inChar = reader.Read(); } else { if( ! IgnoreErrors ) { throw new HtmlParserException( "Comment not closed" , mLineNumber ); } } text = comment.ToString(); return inChar; } private int ExtractName(TextReader reader,ref String name,int inChar) { StringBuilder nameBuffer = new StringBuilder(); while( ! ( char.IsWhiteSpace( (char) inChar ) ) && inChar != END_OF_FILE && inChar != '=' && inChar != '>' /*&& inChar != '<'*/ && inChar != '/' ) { nameBuffer.Append( (char) inChar ); inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; } name = nameBuffer.ToString(); return inChar; } private int ExtractValue(TextReader reader,ref String value,int inChar,ref bool isClosedWithSlash) { StringBuilder valueBuffer = new StringBuilder(); if( inChar == '\"' || inChar == '\'' ) { int terminatorChar = inChar; inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; while( inChar != END_OF_FILE && inChar != terminatorChar ) { valueBuffer.Append( (char) inChar ); inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; } if( inChar == terminatorChar ) { inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; } else { if( ! IgnoreErrors ) { throw new HtmlParserException( "String not terminated" , mLineNumber ); } } } else { int lastInChar = 0; while( ! char.IsWhiteSpace( (char) inChar ) && inChar != END_OF_FILE && inChar != '>' ) { valueBuffer.Append( (char) inChar ); lastInChar = inChar; inChar = reader.Read(); if( inChar == '\n' ) mLineNumber++; } if( lastInChar == '/' && inChar == '>' ) { valueBuffer.Remove( valueBuffer.Length - 1 , 1 ); isClosedWithSlash = true; } } value = valueBuffer.ToString(); return inChar; } #endregion } } --- NEW FILE: HtmlComment.cs --- // Copyright Andy Powney 2004. http://powney.demon.co.uk/milhtml.html // Originally published under the GNU Public License. Included in this // library under the Apache License 2.0 with permission from Andy. using System; using System.IO; using System.ComponentModel; namespace Adapdev.Web.Html { /// <summary> /// This represents a HTML comment. If the input document contained an SGML-style /// comment (but wasn't a DOCTYPE), it is treated as if it were a regular HTML-style /// comment. i.e. <! comment > is treated the same as <!-- comment -->. /// All comments are output in HTML/XML using the second format. /// </summary> public class HtmlComment: HtmlNode { private string mText; /// <summary> /// Creates a new comment with the given text. /// </summary> /// <param name="text">The comment text</param> public HtmlComment(string text) { mText = text; } /// <summary> /// This will write this comment out in HTML format. Be sure that the /// comment text does not contain comments itself as no encoding is /// performed here. /// </summary> /// <param name="writer">The writer on which to write this comment</param> public override void WriteHTML(TextWriter writer) { writer.Write( "<!--" ); writer.Write( mText ); writer.Write( "-->" ); } /// <summary> /// This will write this comment out in XML format. Be sure that the /// comment text does not contain comments itself as no encoding is /// performed here. /// </summary> /// <param name="writer">The writer on which to write this comment</param> public override void WriteXML(TextWriter writer) { writer.Write( "<!--" ); writer.Write( mText ); writer.Write( "-->" ); } /// <summary> /// Get or set the textual contents of the comment. /// </summary> [ Category("General"), Description("The comment") ] public string Text { get { return mText; } set { mText = value; } } /// <summary> /// This makes a duplicate of this comment. /// </summary> /// <returns>A duplicate of this comment</returns> public override object Clone() { return new HtmlComment( mText ); } } } --- NEW FILE: HtmlElement.cs --- // Copyright Andy Powney 2004. http://powney.demon.co.uk/milhtml.html // Originally published under the GNU Public License. Included in this // library under the Apache License 2.0 with permission from Andy. using System; using System.IO; using System.Text; using System.Collections; using System.ComponentModel; using System.Web; namespace Adapdev.Web.Html { /// <summary> /// This represents an element within the document. /// </summary> public class HtmlElement: HtmlNode { private HtmlNodeCollection mNodes; private HtmlAttributeCollection mAttributes; private string mName; private bool mIsClosedWithSlash; private bool mIsClosedWithTag; private bool mAllowNoClose; /// <summary> /// Creates a new empty element with the given name. /// </summary> /// <param name="name">The name of the element</param> public HtmlElement(string name) { mNodes = new HtmlNodeCollection( this ); mAttributes = new HtmlAttributeCollection(); mName = name; mIsClosedWithSlash = false; mIsClosedWithTag = false; } /// <summary> /// Creates a new empty element with the given name. /// </summary> /// <param name="name">The name of the element</param> /// <param name="allowNoClosingTag">Some tags do not need to be closed at all, under those circumstances, set this to true</param> public HtmlElement(string name,bool allowNoClosingTag) { mNodes = new HtmlNodeCollection( this ); mAttributes = new HtmlAttributeCollection(); mName = name; mIsClosedWithSlash = false; mIsClosedWithTag = false; mAllowNoClose = allowNoClosingTag; } /// <summary> /// This will write this element (and any child-nodes) in HTML format. /// </summary> /// <param name="writer">The writer on which to write the element</param> public override void WriteHTML(TextWriter writer) { writer.Write( "<" ); writer.Write( Name ); if( Attributes.Count > 0 ) { foreach( HtmlAttribute attribute in Attributes ) { writer.Write( " " ); attribute.WriteHTML( writer ); } } writer.Write( ">" ); foreach( HtmlNode node in this.Nodes ) { node.WriteHTML( writer ); } if( this.Nodes.Count > 0 || ! HtmlHelper.ElementDoesNotRequireClosing( Name ) ) { writer.Write( "</" ); writer.Write( Name ); writer.Write( ">" ); } } /// <summary> /// This will write this element (and any child-nodes) in XML format. /// </summary> /// <param name="writer">The writer on which to write the element</param> public override void WriteXML(TextWriter writer) { writer.Write( "<" ); writer.Write( Name.ToLower() ); if( Attributes.Count > 0 ) { foreach( HtmlAttribute attribute in Attributes ) { writer.Write( " " ); attribute.WriteXML( writer ); } } if( Nodes.Count == 0 ) { writer.Write( "/>" ); } else { writer.Write( ">" ); foreach( HtmlNode node in this.Nodes ) { node.WriteXML( writer ); } writer.Write( "</" ); writer.Write( Name.ToLower() ); writer.Write( ">" ); } } /// <summary> /// Get or set the name of the element. /// </summary> [ Category("General"), Description("The name of the element") ] public string Name { get { return mName; } set { mName = value; } } /// <summary> /// This will return all the nodes (in order) that this element contains. /// </summary> [ Category("Navigation"), Description("All the child nodes of this element") ] public HtmlNodeCollection Nodes { get { return mNodes; } } /// <summary> /// This will return the collection of attribute values for this element. /// </summary> [ Category("General"), Description("The attributes of this element") ] public HtmlAttributeCollection Attributes { get { return mAttributes; } } /// <summary> /// This is an internal property used by the domain tree parser to determine /// how this element is eventually closed (if at all). /// </summary> internal bool IsClosedWithSlash { get { return mIsClosedWithSlash; } set { mIsClosedWithSlash = value; } } /// <summary> /// This is an internal property used by the domain tree parser to determine /// how this element is eventually closed (if at all). /// </summary> internal bool IsClosedWithTag { get { return mIsClosedWithTag; } set { mIsClosedWithTag = value; } } /// <summary> /// This is an internal property used by the domain tree parser to determine /// how this element is eventually closed (if at all). /// </summary> internal bool IsClosed { get { return ( mNodes.Count > 0 ) || mIsClosedWithSlash || mIsClosedWithTag; } } /// <summary> /// This will return the HTML to represent this element. /// </summary> /// <returns></returns> public override string ToString() { StringBuilder output = new StringBuilder(); output.Append( "<" ); output.Append( Name ); if( Attributes.Count > 0 ) { foreach( HtmlAttribute attribute in Attributes ) { output.Append( " " ); output.Append( attribute.ToString() ); } } if( this.IsClosedWithSlash ) { output.Append( "/" ); } output.Append( ">" ); return output.ToString(); } /// <summary> /// This makes a deep copy of this element. /// </summary> /// <returns>A copy of this element</returns> public override object Clone() { HtmlElement clonedElement = new HtmlElement( mName ); clonedElement.mNodes = (HtmlNodeCollection) mNodes.Clone(); clonedElement.mAttributes = (HtmlAttributeCollection) mAttributes.Clone(); clonedElement.mIsClosedWithSlash = mIsClosedWithSlash; clonedElement.mIsClosedWithTag = mIsClosedWithTag; return clonedElement; } /// <summary> /// Return the HTML contained within this node - useful if you know /// it's just text. /// </summary> [ Category("General"), Description("The HTML contents of this element") ] public string InnerHTML { get { StringWriter stringWriter = new StringWriter(); Nodes.WriteHTML( stringWriter ); return stringWriter.ToString(); } } /// <summary> /// Return the HTML contained within this node - useful if you know /// it's just text. /// </summary> [ Category("General"), Description("The HTML contents of this element") ] public string InnerText { get { StringWriter stringWriter = new StringWriter(); HtmlNodeCollection textNodes = Nodes.FindAllText( false ); foreach( HtmlNode node in textNodes ) { stringWriter.Write( node.ToString() ); } return stringWriter.ToString(); } } } /// <summary> /// This is a collection of elements - you should not directly modify this collection /// (use the HtmlElement.Nodes property instead). The implementation of this class /// will change in the future. /// </summary> public class HtmlElementCollection: HtmlNodeCollection { /// <summary> /// This creates a new empty collection in which to store nodes. /// </summary> internal HtmlElementCollection() : base( null ) { } /// <summary> /// This is only ever used internally, and is for maintaining the parent/child /// relationships. /// </summary> /// <param name="parent">The owner of this collection</param> internal HtmlElementCollection(HtmlElement parent) : base( parent ) { } /// <summary> /// Get or set the element at the given index /// </summary> public new HtmlElement this [int index] { get { return (HtmlElement) base[ index ]; } set { base[ index ] = value; } } } } --- NEW FILE: HtmlNode.cs --- // Copyright Andy Powney 2004. http://powney.demon.co.uk/milhtml.html // Originally published under the GNU Public License. Included in this // library under the Apache License 2.0 with permission from Andy. using System; using System.IO; using System.Collections; using System.ComponentModel; namespace Adapdev.Web.Html { /// <summary> /// This abstract object represents an object that can appear in an HTML document. /// </summary> public abstract class HtmlNode: ICloneable { protected HtmlElement mParent; /// <summary> /// This constructor is only ever used by the child classes. /// </summary> protected HtmlNode() { } /// <summary> /// Write this node in HTML format /// </summary> /// <param name="writer"></param> public abstract void WriteHTML(TextWriter writer); /// <summary> /// Write this node in XML format /// </summary> /// <param name="writer"></param> public abstract void WriteXML(TextWriter writer); /// <summary> /// The HTML used to build the document tree from this node. /// </summary> [ Category("General"), Description("The HTML representation of the document tree from this point") ] public string HTML { get { StringWriter stringWriter = new StringWriter(); WriteHTML( stringWriter ); return stringWriter.ToString(); } } /// <summary> /// The XML used to build the document tree from this node. /// </summary> [ Category("General"), Description("The XML representation of the document tree from this point") ] public string XML { get { StringWriter stringWriter = new StringWriter(); WriteXML( stringWriter ); return stringWriter.ToString(); } } /// <summary> /// This will return the next sibling node. If this is the last one, it will return null. /// </summary> [ Category("Navigation"), Description("The next sibling node") ] public HtmlNode Next { get { if( Index == -1 ) { return null; } else { if( Parent.Nodes.Count > Index + 1 ) { return Parent.Nodes[ Index + 1 ]; } else { return null; } } } } /// <summary> /// This will return the previous sibling node. If this is the first one, it will return null. /// </summary> [ Category("Navigation"), Description("The previous sibling node") ] public HtmlNode Previous { get { if( Index == -1 ) { return null; } else { if( Index > 0 ) { return Parent.Nodes[ Index - 1 ]; } else { return null; } } } } /// <summary> /// This will return the first child node. If there are no children, this /// will return null. /// </summary> [ Category("Navigation"), Description("The first child of this node") ] public HtmlNode FirstChild { get { if( this is HtmlElement ) { if( ((HtmlElement)this).Nodes.Count == 0 ) { return null; } else { return ((HtmlElement)this).Nodes[ 0 ]; } } else { return null; } } } /// <summary> /// This will return the last child node. If there are no children, this /// will return null. /// </summary> [ Category("Navigation"), Description("The last child of this node") ] public HtmlNode LastChild { get { if( this is HtmlElement ) { if( ((HtmlElement)this).Nodes.Count == 0 ) { return null; } else { return ((HtmlElement)this).Nodes[ ((HtmlElement)this).Nodes.Count - 1 ]; } } else { return null; } } } /// <summary> /// This will return the index position within the parent's nodes that this one resides. /// If this is not in a collection, this will return -1. /// </summary> [ Category("Navigation"), Description("The zero-based index of this node in the parent's nodes collection") ] public int Index { get { if( mParent == null ) { return -1; } else { return mParent.Nodes.IndexOf( this ); } } } /// <summary> /// This will return the parent of this node, or null if there is none. /// </summary> [ Category("Navigation"), Description("The parent node of this one") ] public HtmlElement Parent { get { return mParent; } } /// <summary> /// This internal function is used to maintain the parent/child relationships. /// </summary> /// <param name="parent"></param> internal void SetParent(HtmlElement parent) { mParent = parent; } /// <summary> /// This will return true if this is root node (ie. has no parent) /// </summary> [ Category("Navigation"), Description("Is this node a root node?") ] public bool IsRoot { get { return mParent == null; } } /// <summary> /// This will return true if this is a child node (has a parent). /// </summary> [ Category("Navigation"), Description("Is this node a child of another?") ] public bool IsChild { get { return mParent != null; } } /// <summary> /// Does this node have any children? If this node is anything other than an /// HtmlElement, this will return false. /// </summary> [ Category("Navigation"), Description("Does this node have any children?") ] public bool IsParent { get { if( this is HtmlElement ) { return ((HtmlElement)this).Nodes.Count > 0; } else { return false; } } } /// <summary> /// This will return true if the node passed is a descendent of this node. /// </summary> /// <param name="node">The node that might be the parent or grandparent (etc.)</param> /// <returns>True if this node is a descendent of the one passed in.</returns> [ Category("Relationships") ] public bool IsDescendentOf(HtmlNode node) { HtmlNode parent = mParent; while( parent != null ) { if( parent == node ) { return true; } parent = parent.Parent; } return false; } /// <summary> /// This will return true if the node passed is one of the children or grandchildren of this node. /// </summary> /// <param name="node">The node that might be a child.</param> /// <returns>True if this node is an ancestor of the one specified.</returns> [ Category("Relationships") ] public bool IsAncestorOf(HtmlNode node) { return node.IsDescendentOf( this ); } /// <summary> /// This will return the ancstor that is common to this node and the one specified. /// </summary> /// <param name="node">The possible node that is relative</param> /// <returns>The common ancestor, or null if there is none</returns> [ Category("Relationships") ] public HtmlNode GetCommonAncestor(HtmlNode node) { HtmlNode thisParent = this; while( thisParent != null ) { HtmlNode thatParent = node; while( thatParent != null ) { if( thisParent == thatParent ) { return thisParent; } thatParent = thatParent.Parent; } thisParent = thisParent.Parent; } return null; } /// <summary> /// This will remove this node and all child nodes from the tree. If this /// is a root node, this operation will do nothing. /// </summary> [ Category("General") ] public void Remove() { if( mParent != null ) { mParent.Nodes.RemoveAt( this.Index ); } } /// <summary> /// This will take a deep copy of this node collection /// </summary> /// <returns></returns> public abstract object Clone(); /// <summary> /// Create a document fragment from this node. /// </summary> /// <returns>A new document from this node</returns> public HtmlDocument CreateDocumentFragment() { HtmlNodeCollection collection = new HtmlNodeCollection(); collection.Add( (HtmlNode) this.Clone() ); HtmlDocument fragment = new HtmlDocument( collection ); return fragment; } } /// <summary> /// This represents a collection of nodes (i.e. a portion of a document) /// </summary> public class HtmlNodeCollection: CollectionBase,ICloneable { private HtmlElement mParent; /// <summary> /// This creates a new empty collection in which to store nodes. /// </summary> public HtmlNodeCollection() { mParent = null; } /// <summary> /// This is only ever used internally, and is for maintaining the parent/child /// relationships. /// </summary> /// <param name="parent">The owner of this collection</param> internal HtmlNodeCollection(HtmlElement parent) { mParent = parent; } /// <summary> /// This will add the node to the end of this collection. /// </summary> /// <param name="node">The node to add</param> /// <returns>The zero-based index of where the node was added</returns> public virtual int Add(HtmlNode node) { if( mParent != null ) node.SetParent( mParent ); return base.InnerList.Add( node ); } /// <summary> /// This will add a collection of nodes to the end of this collection. /// </summary> /// <param name="nodes">The collection of nodes to add</param> public void Add(HtmlNodeCollection nodes) { foreach(HtmlNode node in nodes) { if( mParent != null ) node.SetParent( mParent ); base.InnerList.Add( node ); } } /// <summary> /// Insert a node at the given index. /// </summary> /// <param name="index">The index at which to insert</param> /// <param name="node">The node to insert</param> public void Insert(int index,HtmlNode node) { if( mParent != null ) node.SetParent( mParent ); base.InnerList.Insert( index , node ); } /// <summary> /// Get or set the node at the given index /// </summary> public HtmlNode this [int index] { get { return (HtmlNode) base.InnerList[ index ]; } set { if( mParent != null ) value.SetParent( mParent); base.InnerList[ index ] = value; } } /// <summary> /// Get the element that owns this collection. If this collection is not /// contained in an element, this will return null. /// </summary> public HtmlElement Parent { get { return mParent; } } /// <summary> /// The zero-based index of the node within this collection. /// </summary> /// <param name="node">The node to find</param> /// <returns>The index of the node (or -1 if it was not found)</returns> public int IndexOf(HtmlNode node) { return base.InnerList.IndexOf( node ); } /// <summary> /// This will search though this collection of nodes for all elements with the /// specified name. If you want to search the subnodes recursively, you should /// pass True as the parameter in searchChildren. This search is guaranteed to /// return nodes in the order in which they are found in the document. /// </summary> /// <param name="name">The name of the element to find</param> /// <returns>A collection of all the nodes that macth.</returns> public HtmlElementCollection FindByName(string name) { return FindByName( name , true ); } /// <summary> /// This will search though this collection of nodes for all elements with the /// specified name. If you want to search the subnodes recursively, you should /// pass True as the parameter in searchChildren. This search is guaranteed to /// return nodes in the order in which they are found in the document. /// </summary> /// <param name="name">The name of the element to find</param> /// <param name="searchChildren">True if you want to search sub-nodes, False to /// only search this collection.</param> /// <returns>A collection of all the nodes that macth.</returns> public HtmlElementCollection FindByName(string name,bool searchChildren) { HtmlElementCollection results = new HtmlElementCollection(null); foreach( HtmlNode node in base.List ) { if( node is HtmlElement ) { if( ( (HtmlElement)node ).Name.ToLower().Equals( name.ToLower() ) ) { results.Add( node ); } if( searchChildren ) { foreach( HtmlNode matchedChild in ( (HtmlElement)node ).Nodes.FindByName( name , searchChildren ) ) { results.Add( matchedChild ); } } } } return results; } /// <summary> /// This will search though this collection of nodes for all elements with the an /// attribute with the given name. /// </summary> /// <param name="name">The name of the attribute to find</param> /// <returns>A collection of all the nodes that macth.</returns> public HtmlElementCollection FindByAttributeName(string attributeName) { return FindByAttributeName( attributeName , true ); } /// <summary> /// This will search though this collection of nodes for all elements with the an /// attribute with the given name. /// </summary> /// <param name="name">The name of the attribute to find</param> /// <param name="searchChildren">True if you want to search sub-nodes, False to /// only search this collection.</param> /// <returns>A collection of all the nodes that macth.</returns> public HtmlElementCollection FindByAttributeName(string attributeName,bool searchChildren) { HtmlElementCollection results = new HtmlElementCollection(null); foreach( HtmlNode node in base.List ) { if( node is HtmlElement ) { foreach( HtmlAttribute attribute in ((HtmlElement)node).Attributes ) { if( attribute.Name.ToLower().Equals( attributeName.ToLower() ) ) { results.Add( node ); break; } } if( searchChildren ) { foreach( HtmlNode matchedChild in ( (HtmlElement)node ).Nodes.FindByAttributeName( attributeName , searchChildren ) ) { results.Add( matchedChild ); } } } } return results; } /// <summary> /// This will search though this collection of nodes for all elements with the an /// attribute with the given name, and the attribute has the specified value. /// </summary> /// <param name="attributeName">The name of the attribute to find</param> /// <param name="attributeValue">The value of the attribute to find</param> /// <returns>A collection of all the nodes that macth.</returns> public HtmlElementCollection FindByAttributeNameValue(string attributeName,string attributeValue) { return FindByAttributeNameValue( attributeName , attributeValue , true ); } /// <summary> /// This will search though this collection of nodes for all elements with the an /// attribute with the given name, and the attribute has the specified value. /// </summary> /// <param name="attributeName">The name of the attribute to find</param> /// <param name="attributeValue">The value of the attribute to find</param> /// <param name="searchChildren">Flag indicating that all child nodes should also be recursed</param> /// <returns>A collection of all the nodes that macth.</returns> public HtmlElementCollection FindByAttributeNameValue(string attributeName,string attributeValue,bool searchChildren) { HtmlElementCollection results = new HtmlElementCollection(null); foreach( HtmlNode node in base.List ) { if( node is HtmlElement ) { foreach( HtmlAttribute attribute in ((HtmlElement)node).Attributes ) { if( attribute.Name.ToLower().Equals( attributeName.ToLower() ) ) { if( attribute.Value.ToLower().Equals( attributeValue.ToLower() ) ) { results.Add( node ); } break; } } if( searchChildren ) { foreach( HtmlNode matchedChild in ( (HtmlElement)node ).Nodes.FindByAttributeNameValue( attributeName , attributeValue , searchChildren ) ) { results.Add( matchedChild ); } } } } return results; } /// <summary> /// This will take a deep copy of this node collection /// </summary> /// <returns></returns> public object Clone() { HtmlNodeCollection clonedCollection = new HtmlNodeCollection(); foreach(HtmlNode node in this) { clonedCollection.Add( (HtmlNode) node.Clone() ); } return clonedCollection; } /// <summary> /// Write the HTML /// </summary> /// <param name="writer"></param> public void WriteHTML(TextWriter writer) { foreach( HtmlNode node in this ) { node.WriteHTML( writer ); } } /// <summary> /// Write the XML /// </summary> /// <param name="writer"></param> public void WriteXML(TextWriter writer) { foreach( HtmlNode node in this ) { node.WriteXML( writer ); } } /// <summary> /// Get the HTML requred to render these nodes. /// </summary> [ Category("General"), Description("The HTML required to render these nodes") ] public string HTML { get { StringWriter stringWriter = new StringWriter(); WriteHTML( stringWriter ); return stringWriter.ToString(); } } /// <summary> /// Get the XML requred to render these nodes. /// </summary> [ Category("General"), Description("The XML required to render these nodes") ] public string XML { get { StringWriter stringWriter = new StringWriter(); WriteXML( stringWriter ); return stringWriter.ToString(); } } /// <summary> /// This will search through this collection of nodes and will return all the /// HtmlScript objects. /// </summary> /// <param name="searchChildren">True if you want to search sub-nodes, False to /// only search this collection.</param> /// <returns>A collection of all the nodes that macth.</returns> public HtmlNodeCollection FindAllScripts(bool searchChildren) { HtmlNodeCollection results = new HtmlNodeCollection(null); foreach( HtmlNode node in base.List ) { if( node is HtmlElement ) { if( searchChildren ) { results.Add( ( (HtmlElement) node ).Nodes.FindAllScripts( searchChildren ) ); } } else if( node is HtmlScript ) { results.Add( node ); } } return results; } /// <summary> /// This will search through this collection of nodes and will return all the /// HtmlComment objects. /// </summary> /// <param name="searchChildren">True if you want to search sub-nodes, False to /// only search this collection.</param> /// <returns>A collection of all the nodes that macth.</returns> public HtmlNodeCollection FindAllComments(bool searchChildren) { HtmlNodeCollection results = new HtmlNodeCollection(null); foreach( HtmlNode node in base.List ) { if( node is HtmlElement ) { if( searchChildren ) { results.Add( ( (HtmlElement) node ).Nodes.FindAllComments( searchChildren ) ); } } else if( node is HtmlComment ) { results.Add( node ); } } return results; } /// <summary> /// This will search through this collection of nodes and will return all the /// HtmlStyleSheet objects. /// </summary> /// <param name="searchChildren">True if you want to search sub-nodes, False to /// only search this collection.</param> /// <returns>A collection of all the nodes that macth.</returns> public HtmlNodeCollection FindAllStyleSheets(bool searchChildren) { HtmlNodeCollection results = new HtmlNodeCollection(null); foreach( HtmlNode node in base.List ) { if( node is HtmlElement ) { if( searchChildren ) { results.Add( ( (HtmlElement) node ).Nodes.FindAllStyleSheets( searchChildren ) ); } } else if( node is HtmlStyleSheet ) { results.Add( node ); } } return results; } /// <summary> /// This will search through this collection of nodes and will return all the /// HtmlText objects. /// </summary> /// <param name="searchChildren">True if you want to search sub-nodes, False to /// only search this collection.</param> /// <returns>A collection of all the nodes that macth.</returns> public HtmlNodeCollection FindAllText(bool searchChildren) { HtmlNodeCollection results = new HtmlNodeCollection(null); foreach( HtmlNode node in base.List ) { if( node is HtmlElement ) { if( searchChildren ) { results.Add( ( (HtmlElement) node ).Nodes.FindAllText( searchChildren ) ); } } else if( node is HtmlText ) { results.Add( node ); } } return results; } /// <summary> /// This will search through this collection of nodes and will return all the /// HtmlText objects. /// </summary> /// <param name="searchChildren">True if you want to search sub-nodes, False to /// only search this collection.</param> /// <returns>A collection of all the nodes that macth.</returns> public HtmlElementCollection FindAllElements(bool searchChildren) { HtmlElementCollection results = new HtmlElementCollection(null); foreach( HtmlNode node in base.List ) { if( node is HtmlElement ) { results.Add( node ); if( searchChildren ) { results.Add( ( (HtmlElement) node ).Nodes.FindAllElements( searchChildren ) ); } } } return results; } /// <summary> /// Create a document fragment from these nodes. /// </summary> /// <returns>A new document with these nodes</returns> public HtmlDocument CreateDocumentFragment() { HtmlDocument fragment = new HtmlDocument( (HtmlNodeCollection) this.Clone() ); return fragment; } } } --- NEW FILE: HtmlStyleSheet.cs --- // Copyright Andy Powney 2004. http://powney.demon.co.uk/milhtml.html // Originally published under the GNU Public License. Included in this // library under the Apache License 2.0 with permission from Andy. using System; using System.IO; using System.ComponentModel; namespace Adapdev.Web.Html { /// <summary> /// Summary description for HtmlStyle. /// </summary> public class HtmlStyleSheet: HtmlNode { private string mText; public HtmlStyleSheet(string styleSheetSource) { mText = styleSheetSource; } public override void WriteHTML(TextWriter writer) { writer.Write( mText ); } public override void WriteXML(TextWriter writer) { writer.Write( "<![CDATA[" ); writer.Write( mText ); writer.Write( "]]>" ); } /// <summary> /// Set or get the stylesheet definition /// </summary> [ Category("General"), Description("The definition of the stylesheet") ] public string Text { get { return mText; } set { mText = value; } } public override object Clone() { return new HtmlStyleSheet( mText ); } } } --- NEW FILE: HtmlDocument.cs --- // Copyright Andy Powney 2004. http://powney.demon.co.uk/milhtml.html // Originally published under the GNU Public License. Included in this // library under the Apache License 2.0 with permission from Andy. using System; using System.IO; using System.Text; using System.ComponentModel; namespace Adapdev.Web.Html { /// <summary> /// This object represents an HTML document. This can be used to represent /// a complete document or just a fragment of a document. /// </summary> public class HtmlDocument { private HtmlNodeCollection mNodes; private string mDocumentType; /// <summary> /// Creates a new empty document. /// </summary> public HtmlDocument() { mNodes = new HtmlNodeCollection(); mDocumentType = null; } /// <summary> /// Used to create instances of document fragments. The nodes that are /// added should be cloned from the original document. /// </summary> /// <param name="nodes">The nodes to copy into this new fragment</param> internal HtmlDocument(HtmlNodeCollection nodes) { mNodes = (HtmlNodeCollection) nodes; } /// <summary> /// This will return all the root nodes of the document. In a complete /// document, this is just a collection containing the HTML element. /// </summary> public HtmlNodeCollection Nodes { get { return mNodes; } } /// <summary> /// The DOCTYPE for this document. If no DOCTYPE is specified, this value /// is null. Similarly, assigning null to this will indicate that this document /// has no type. /// </summary> public string DocumentType { get { return mDocumentType; } set { mDocumentType = value; } } /// <summary> /// Write this entire document in HTML format. /// </summary> /// <param name="writer">The writer to write on</param> public void WriteHTML(TextWriter writer) { if( DocumentType != null ) { writer.Write( "<!DOCTYPE " ); writer.Write( DocumentType ); writer.Write( ">" ); } Nodes.WriteHTML( writer ); } /// <summary> /// Write this entire document in XML format. /// </summary> /// <param name="writer">The writer to write on</param> public void WriteXML(TextWriter writer) { if( DocumentType != null ) { writer.Write( "<!DOCTYPE " ); writer.Write( DocumentType ); writer.Write( ">" ); } Nodes.WriteXML( writer ); } /// <summary> /// The HTML used to build the document tree from this node. /// </summary> [ Category("General"), Description("The HTML representation of the document tree from this point") ] public string HTML { get { StringWriter stringWriter = new StringWriter(); WriteHTML( stringWriter ); return stringWriter.ToString(); } } /// <summary> /// The XML used to build the document tree from this node. /// </summary> [ Category("General"), Description("The XML representation of the document tree from this point") ] public string XML { get { StringWriter stringWriter = new StringWriter(); WriteXML( stringWriter ); return stringWriter.ToString(); } } } } --- NEW FILE: HtmlHelper.cs --- // Copyright Andy Powney 2004. http://powney.demon.co.uk/milhtml.html // Originally published under the GNU Public License. Included in this // library under the Apache License 2.0 with permission from Andy. using System; using System.Collections.Specialized; namespace Adapdev.Web.Html { /// <summary> /// Internal helper class for HTML processing /// </summary> internal class HtmlHelper { private static StringCollection mOpenEndedTags; /// <summary> /// This is a singleton, so perform initialisation here /// </summary> static HtmlHelper() { mOpenEndedTags = new StringCollection(); // This lot is taken from the HTML4 spec mOpenEndedTags.Add( "area" ); mOpenEndedTags.Add( "base" ); mOpenEndedTags.Add( "basefont" ); mOpenEndedTags.Add( "br" ); mOpenEndedTags.Add( "col" ); mOpenEndedTags.Add( "frame" ); mOpenEndedTags.Add( "hr" ); mOpenEndedTags.Add( "img" ); mOpenEndedTags.Add( "input" ); mOpenEndedTags.Add( "isindex" ); mOpenEndedTags.Add( "link" ); mOpenEndedTags.Add( "meta" ); mOpenEndedTags.Add( "param" ); } /// <summary> /// Returns true if the named element should not be closed in HTML. /// </summary> /// <param name="elementName">The name of the element</param> /// <returns>True if the element should not be closed</returns> internal static bool ElementDoesNotRequireClosing(string elementName) { return mOpenEndedTags.Contains( elementName.ToLower() ); } } } --- NEW FILE: HtmlText.cs --- // Copyright Andy Powney 2004. http://powney.demon.co.uk/milhtml.html // Originally published under the GNU Public License. Included in this // library under the Apache License 2.0 with permission from Andy. using System; using System.IO; using System.ComponentModel; using System.Web; namespace Adapdev.Web.Html { /// <summary> /// Summary description for HtmlText. /// </summary> public class HtmlText: HtmlNode { protected string mText; public HtmlText(string text) { mText = text; } public override void WriteHTML(TextWriter writer) { HttpUtility.HtmlEncode( mText , writer ); } public override void WriteXML(TextWriter writer) { HttpUtility.HtmlEncode( mText , writer ); } /// <summary> /// Retrieves the text directly. /// </summary> /// <returns></returns> public override string ToString() { return mText; } /// <summary> /// Set or get the text /// </summary> [ Category("General"), Description("The text contained in the node") ] public string Text { get { return mText; } set { mText = value; } } public override object Clone() { return new HtmlText( mText ); } } } --- NEW FILE: SgmlComment.cs --- // Copyright Andy Powney 2004. http://powney.demon.co.uk/milhtml.html // Originally published under the GNU Public License. Included in this // library under the Apache License 2.0 with permission from Andy. using System; using System.IO; using System.ComponentModel; namespace Adapdev.Web.Html { /// <summary> /// Summary description for SgmlComment. /// </summary> public class SgmlComment: HtmlComment { public SgmlComment(string text) : base( text ) { } public override void WriteHTML(TextWriter writer) { writer.Write( "<!" ); writer.Write( Text ); writer.Write( ">" ); } public override void WriteXML(TextWriter writer) { writer.Write( "<!" ); writer.Write( Text ); writer.Write( ">" ); } /// <summary> /// This makes a duplicate of this comment. /// </summary> /// <returns>A duplicate of this comment</returns> public override object Clone() { return new SgmlComment( Text ); } } } --- NEW FILE: HtmlScript.cs --- // Copyright Andy Powney 2004. http://powney.demon.co.uk/milhtml.html // Originally published under the GNU Public License. Included in this // library under the Apache License 2.0 with permission from Andy. using System; using System.IO; using System.ComponentModel; namespace Adapdev.Web.Html { /// <summary> /// Summary description for HtmlScript. /// </summary> public class HtmlScript: HtmlNode { private string mText; public HtmlScript(string source) { mText = source; } public override void WriteHTML(TextWriter writer) { writer.Write( mText ); } public override void WriteXML(TextWriter writer) { writer.Write( "<![CDATA[" ); writer.Write( mText ); writer.Write( "]]>" ); } /// <summary> /// Set or get the source code /// </summary> [ Category("General"), Description("The source code of the script") ] public string Text { get { return mText; } set { mText = value; } } public override object Clone() { return new HtmlScript( mText ); } } } --- NEW FILE: HtmlDomainTreeParser.cs --- // Copyright Andy Powney 2004. http://powney.demon.co.uk/milhtml.html // Originally published under the GNU Public License. Included in this // library under the Apache License 2.0 with permission from Andy. using System; namespace Adapdev.Web.Html { /// <summary> /// This parser will build a domain tree of the document that is being /// parsed. The various navigation functionality within the other objects /// in this library depend on this being used. /// </summary> public class HtmlDomainTreeParser: HtmlParser { private bool mIgnoreUnbalancedClosing; private bool mIgnoreEmptyTextNodes; /// <summary> /// This will create a new domain tree parser. /// </summary> public HtmlDomainTreeParser() { mIgnoreUnbalancedClosing = false; mIgnoreEmptyTextNodes = false; } /// <summary> /// This will create a new domain tree parser. /// </summary> public HtmlDomainTreeParser(bool ignoreUnbalancedClosing) { mIgnoreUnbalancedClosing = ignoreUnbalancedClosing; mIgnoreEmptyTextNodes = false; } /// <summary> /// This will create a new domain tree parser. /// </summary> public HtmlDomainTreeParser(bool ignoreUnbalancedClosing,bool ignoreEmptyTextNodes) { mIgnoreUnbalancedClosing = ignoreUnbalancedClosing; mIgnoreEmptyTextNodes = ignoreEmptyTextNodes; } /// <summary> /// Setting this flag will cause the parser to ignore unbalanced close tags. /// On many webpages, you will discover that close tags don't necessarily /// tally up with open tags. FORM-tags are notorious for this. /// </summary> public bool IgnoreUnbalancedClosing { get { return mIgnoreUnbalancedClosing; } set { mIgnoreUnbalancedClosing = value; } } /// <summary> /// Setting this flag will cause the parser to ignore text nodes that contain purely /// whitespace. /// </summary> public bool IgnoreEmptyTextNodes { get { return mIgnoreEmptyTextNodes; } set { mIgnoreEmptyTextNodes = value; } } /// <summary> /// This is called by the base parser when a piece of text has been read. /// </summary> /// <param name="document">The document being built</param> /// <param name="htmlText">The text that has been read</param> protected override void OnText(HtmlDocument document,HtmlText htmlText) { if( IgnoreEmptyTextNodes ) { if( htmlText.Text.Trim().Length > 0 ) { document.Nodes.Add( htmlText ); } } else { document.Nodes.Add( htmlText ); } } /// <summary> /// This is called by the base parser when a piece of script has been read. /// </summary> /// <param name="document">The document being built</param> /// <param name="scriptSource">The script source code that has been read</param> prote... [truncated message content] |