[Adapdev-commits] Adapdev/src/Adapdev.Web/Html/XPath Header.cs,1.2,1.3 HtmlAttribute.cs,1.2,1.3 Html
Status: Beta
Brought to you by:
intesar66
From: Sean M. <int...@us...> - 2005-11-16 07:02:11
|
Update of /cvsroot/adapdev/Adapdev/src/Adapdev.Web/Html/XPath In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv909/src/Adapdev.Web/Html/XPath Added Files: Header.cs HtmlAttribute.cs HtmlDocument.cs HtmlEntity.cs HtmlNode.cs HtmlNodeNavigator.cs HtmlWeb.cs MixedCodeDocument.cs ParseReader.cs crc32.cs readme.txt tools.cs Log Message: --- NEW FILE: HtmlWeb.cs --- // HtmlAgilityPack V1.0 - Simon Mourier <si...@mi...> using System; using System.IO; using System.Net; using System.Xml; using System.Xml.Serialization; using System.Xml.Xsl; using Microsoft.Win32; namespace Adapdev.Web.Html.XPath { /// <summary> /// A utility class to get HTML document from HTTP. /// </summary> public class HtmlWeb { /// <summary> /// Represents the method that will handle the PreRequest event. /// </summary> public delegate bool PreRequestHandler(HttpWebRequest request); /// <summary> /// Represents the method that will handle the PostResponse event. /// </summary> public delegate void PostResponseHandler(HttpWebRequest request, HttpWebResponse response); /// <summary> /// Represents the method that will handle the PreHandleDocument event. /// </summary> public delegate void PreHandleDocumentHandler(HtmlDocument document); private int _streamBufferSize = 1024; private string _cachePath; private bool _usingCache; private bool _fromCache; private bool _cacheOnly; private bool _useCookies; private int _requestDuration; private bool _autoDetectEncoding = true; private HttpStatusCode _statusCode = HttpStatusCode.OK; private Uri _responseUri; /// <summary> /// Occurs before an HTTP request is executed. /// </summary> public PreRequestHandler PreRequest; /// <summary> /// Occurs after an HTTP request has been executed. /// </summary> public PostResponseHandler PostResponse; /// <summary> /// Occurs before an HTML document is handled. /// </summary> public PreHandleDocumentHandler PreHandleDocument; /// <summary> /// Creates an instance of an HtmlWeb class. /// </summary> public HtmlWeb() { } /// <summary> /// Gets an HTML document from an Internet resource and saves it to the specified file. /// </summary> /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param> /// <param name="path">The location of the file where you want to save the document.</param> public void Get(string url, string path) { Get(url, path, "GET"); } /// <summary> /// Gets an HTML document from an Internet resource and saves it to the specified file. /// </summary> /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param> /// <param name="path">The location of the file where you want to save the document.</param> /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param> public void Get(string url, string path, string method) { Uri uri = new Uri(url); if ((uri.Scheme == Uri.UriSchemeHttps) || (uri.Scheme == Uri.UriSchemeHttp)) { Get(uri, method, path, null); } else { throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'."); } } /// <summary> /// Gets an HTML document from an Internet resource. /// </summary> /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param> /// <returns>A new HTML document.</returns> public HtmlDocument Load(string url) { return Load(url, "GET"); } /// <summary> /// Loads an HTML document from an Internet resource. /// </summary> /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param> /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param> /// <returns>A new HTML document.</returns> public HtmlDocument Load(string url, string method) { Uri uri = new Uri(url); HtmlDocument doc; if ((uri.Scheme == Uri.UriSchemeHttps) || (uri.Scheme == Uri.UriSchemeHttp)) { doc = LoadUrl(uri, method); } else { if (uri.Scheme == Uri.UriSchemeFile) { doc = new HtmlDocument(); doc.OptionAutoCloseOnEnd = false; doc.OptionAutoCloseOnEnd = true; doc.DetectEncodingAndLoad(url, _autoDetectEncoding); } else { throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'."); } } if (PreHandleDocument != null) { PreHandleDocument(doc); } return doc; } private bool IsCacheHtmlContent(string path) { string ct = GetContentTypeForExtension(Path.GetExtension(path), null); return IsHtmlContent(ct); } private bool IsHtmlContent(string contentType) { return contentType.ToLower().StartsWith("text/html"); } private string GetCacheHeadersPath(Uri uri) { //return Path.Combine(GetCachePath(uri), ".h.xml"); return GetCachePath(uri) + ".h.xml"; } /// <summary> /// Gets the cache file path for a specified url. /// </summary> /// <param name="uri">The url fo which to retrieve the cache path. May not be null.</param> /// <returns>The cache file path.</returns> public string GetCachePath(Uri uri) { if (uri == null) { throw new ArgumentNullException("uri"); } if (!UsingCache) { throw new HtmlWebException("Cache is not enabled. Set UsingCache to true first."); } string cachePath; if (uri.AbsolutePath == "/") { cachePath = Path.Combine(_cachePath, ".htm"); } else { cachePath = Path.Combine(_cachePath, (uri.Host + uri.AbsolutePath).Replace('/', '\\')); } return cachePath; } /// <summary> /// Gets a value indicating if the last document was retrieved from the cache. /// </summary> public bool FromCache { get { return _fromCache; } } /// <summary> /// Gets the URI of the Internet resource that actually responded to the request. /// </summary> public Uri ResponseUri { get { return _responseUri; } } /// <summary> /// Gets or Sets a value indicating whether to get document only from the cache. /// If this is set to true and document is not found in the cache, nothing will be loaded. /// </summary> public bool CacheOnly { get { return _cacheOnly; } set { if ((value) && !UsingCache) { throw new HtmlWebException("Cache is not enabled. Set UsingCache to true first."); } _cacheOnly = value; } } /// <summary> /// Gets or Sets a value indicating if cookies will be stored. /// </summary> public bool UseCookies { get { return _useCookies; } set { _useCookies = value; } } /// <summary> /// Gets the last request duration in milliseconds. /// </summary> public int RequestDuration { get { return _requestDuration; } } /// <summary> /// Gets or Sets a value indicating if document encoding must be automatically detected. /// </summary> public bool AutoDetectEncoding { get { return _autoDetectEncoding; } set { _autoDetectEncoding = value; } } /// <summary> /// Gets the last request status. /// </summary> public HttpStatusCode StatusCode { get { return _statusCode; } } /// <summary> /// Gets or Sets the size of the buffer used for memory operations. /// </summary> public int StreamBufferSize { get { return _streamBufferSize; } set { if (_streamBufferSize <= 0) { throw new ArgumentException("Size must be greater than zero."); } _streamBufferSize = value; } } private HtmlDocument LoadUrl(Uri uri, string method) { HtmlDocument doc = new HtmlDocument(); doc.OptionAutoCloseOnEnd = false; doc.OptionFixNestedTags = true; _statusCode = Get(uri, method, null, doc); if (_statusCode == HttpStatusCode.NotModified) { // read cached encoding doc.DetectEncodingAndLoad(GetCachePath(uri)); } return doc; } private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc) { string cachePath = null; HttpWebRequest req; bool oldFile = false; req = WebRequest.Create(uri) as HttpWebRequest; req.Method = method; _fromCache = false; _requestDuration = 0; int tc = Environment.TickCount; if (UsingCache) { cachePath = GetCachePath(req.RequestUri); if (File.Exists(cachePath)) { req.IfModifiedSince = File.GetLastAccessTime(cachePath); oldFile = true; } } if (_cacheOnly) { if (!File.Exists(cachePath)) { throw new HtmlWebException("File was not found at cache path: '" + cachePath + "'"); } if (path != null) { IOLibrary.CopyAlways(cachePath, path); // touch the file File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } _fromCache = true; return HttpStatusCode.NotModified; } if (_useCookies) { req.CookieContainer = new CookieContainer(); } if (PreRequest != null) { // allow our user to change the request at will if (!PreRequest(req)) { return HttpStatusCode.ResetContent; } // dump cookie // if (_useCookies) // { // foreach(Cookie cookie in req.CookieContainer.GetCookies(req.RequestUri)) // { // HtmlLibrary.Trace("Cookie " + cookie.Name + "=" + cookie.Value + " path=" + cookie.Path + " domain=" + cookie.Domain); // } // } } HttpWebResponse resp; try { resp = req.GetResponse() as HttpWebResponse; } catch (WebException we) { _requestDuration = Environment.TickCount - tc; resp = (HttpWebResponse)we.Response; if (resp == null) { if (oldFile) { if (path != null) { IOLibrary.CopyAlways(cachePath, path); // touch the file File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } return HttpStatusCode.NotModified; } throw; } } catch(Exception) { _requestDuration = Environment.TickCount - tc; throw; } // allow our user to get some info from the response if (PostResponse != null) { PostResponse(req, resp); } _requestDuration = Environment.TickCount - tc; _responseUri = resp.ResponseUri; bool html = IsHtmlContent(resp.ContentType); System.Text.Encoding respenc; if ((resp.ContentEncoding != null) && (resp.ContentEncoding.Length>0)) { respenc = System.Text.Encoding.GetEncoding(resp.ContentEncoding); } else { respenc = null; } if (resp.StatusCode == HttpStatusCode.NotModified) { if (UsingCache) { _fromCache = true; if (path != null) { IOLibrary.CopyAlways(cachePath, path); // touch the file File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } return resp.StatusCode; } else { // this should *never* happen... throw new HtmlWebException("Server has send a NotModifed code, without cache enabled."); } } Stream s = resp.GetResponseStream(); if (s != null) { if (UsingCache) { // NOTE: LastModified does not contain milliseconds, so we remove them to the file SaveStream(s, cachePath, RemoveMilliseconds(resp.LastModified), _streamBufferSize); // save headers SaveCacheHeaders(req.RequestUri, resp); if (path != null) { // copy and touch the file IOLibrary.CopyAlways(cachePath, path); File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } } else { // try to work in-memory if ((doc != null) && (html)) { if (respenc != null) { doc.Load(s,respenc); } else { doc.Load(s); } } } resp.Close(); } return resp.StatusCode; } private string GetCacheHeader(Uri requestUri, string name, string def) { // note: some headers are collection (ex: www-authenticate) // we don't handle that here XmlDocument doc = new XmlDocument(); doc.Load(GetCacheHeadersPath(requestUri)); XmlNode node = doc.SelectSingleNode("//h[translate(@n, 'abcdefghijklmnopqrstuvwxyz','ABCDEFGHIJKLMNOPQRSTUVWXYZ')='" + name.ToUpper() + "']"); if (node == null) { return def; } // attribute should exist return node.Attributes[name].Value; } private void SaveCacheHeaders(Uri requestUri, HttpWebResponse resp) { // we cache the original headers aside the cached document. string file = GetCacheHeadersPath(requestUri); XmlDocument doc = new XmlDocument(); doc.LoadXml("<c></c>"); XmlNode cache = doc.FirstChild; foreach(string header in resp.Headers) { XmlNode entry = doc.CreateElement("h"); XmlAttribute att = doc.CreateAttribute("n"); att.Value = header; entry.Attributes.Append(att); att = doc.CreateAttribute("v"); att.Value = resp.Headers[header]; entry.Attributes.Append(att); cache.AppendChild(entry); } doc.Save(file); } private static long SaveStream(Stream stream, string path, DateTime touchDate, int streamBufferSize) { FilePreparePath(path); FileStream fs = new FileStream(path, FileMode.Create, FileAccess.Write); BinaryReader br = null; BinaryWriter bw = null; long len; try { br = new BinaryReader(stream); bw = new BinaryWriter(fs); len = 0; byte[] buffer; do { buffer = br.ReadBytes(streamBufferSize); len += buffer.Length; if (buffer.Length>0) { bw.Write(buffer); } } while (buffer.Length>0); } finally { if (br != null) { br.Close(); } if (bw != null) { bw.Flush(); bw.Close(); } if (fs != null) { fs.Close(); } } File.SetLastWriteTime(path, touchDate); return len; } private static void FilePreparePath(string target) { if (File.Exists(target)) { FileAttributes atts = File.GetAttributes(target); File.SetAttributes(target, atts & ~FileAttributes.ReadOnly); } else { string dir = Path.GetDirectoryName(target); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } } } private static DateTime RemoveMilliseconds(DateTime t) { return new DateTime(t.Year, t.Month, t.Day, t.Hour, t.Minute, t.Second, 0); } /// <summary> /// Gets the path extension for a given MIME content type. /// </summary> /// <param name="contentType">The input MIME content type.</param> /// <param name="def">The default path extension to return if any error occurs.</param> /// <returns>The MIME content type's path extension.</returns> public static string GetExtensionForContentType(string contentType, string def) { if ((contentType == null) || (contentType.Length == 0)) { return def; } string ext; try { RegistryKey reg = Registry.ClassesRoot; reg = reg.OpenSubKey(@"MIME\Database\Content Type\" + contentType, false); ext = (string)reg.GetValue("Extension", def); } catch(Exception) { ext = def; } return ext; } /// <summary> /// Gets the MIME content type for a given path extension. /// </summary> /// <param name="extension">The input path extension.</param> /// <param name="def">The default content type to return if any error occurs.</param> /// <returns>The path extention's MIME content type.</returns> public static string GetContentTypeForExtension(string extension, string def) { if ((extension == null) || (extension.Length == 0)) { return def; } string contentType; try { RegistryKey reg = Registry.ClassesRoot; reg = reg.OpenSubKey(extension, false); contentType = (string)reg.GetValue("", def); } catch(Exception) { contentType = def; } return contentType; } /// <summary> /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter. /// </summary> /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param> /// <param name="writer">The XmlTextWriter to which you want to save.</param> public void LoadHtmlAsXml(string htmlUrl, XmlTextWriter writer) { HtmlDocument doc = Load(htmlUrl); doc.Save(writer); } /// <summary> /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter, after an XSLT transformation. /// </summary> /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param> /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param> /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param> /// <param name="writer">The XmlTextWriter to which you want to save.</param> public void LoadHtmlAsXml(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, XmlTextWriter writer) { LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer, null); } /// <summary> /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter, after an XSLT transformation. /// </summary> /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp". May not be null.</param> /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param> /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param> /// <param name="writer">The XmlTextWriter to which you want to save.</param> /// <param name="xmlPath">A file path where the temporary XML before transformation will be saved. Mostly used for debugging purposes.</param> public void LoadHtmlAsXml(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, XmlTextWriter writer, string xmlPath) { if (htmlUrl == null) { throw new ArgumentNullException("htmlUrl"); } HtmlDocument doc = Load(htmlUrl); if (xmlPath != null) { XmlTextWriter w = new XmlTextWriter(xmlPath, doc.Encoding); doc.Save(w); w.Close(); } if (xsltArgs == null) { xsltArgs = new XsltArgumentList(); } // add some useful variables to the xslt doc xsltArgs.AddParam("url", "", htmlUrl); xsltArgs.AddParam("requestDuration", "", RequestDuration); xsltArgs.AddParam("fromCache", "", FromCache); XslTransform xslt = new XslTransform(); xslt.Load(xsltUrl); xslt.Transform(doc, xsltArgs, writer, null); } /// <summary> /// Creates an instance of the given type from the specified Internet resource. /// </summary> /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param> /// <param name="type">The requested type.</param> /// <returns>An newly created instance.</returns> public object CreateInstance(string url, Type type) { return CreateInstance(url, null, null, type); } /// <summary> /// Creates an instance of the given type from the specified Internet resource. /// </summary> /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param> /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param> /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param> /// <param name="type">The requested type.</param> /// <returns>An newly created instance.</returns> public object CreateInstance(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, Type type) { return CreateInstance(htmlUrl, xsltUrl, xsltArgs, type, null); } /// <summary> /// Creates an instance of the given type from the specified Internet resource. /// </summary> /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param> /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param> /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param> /// <param name="type">The requested type.</param> /// <param name="xmlPath">A file path where the temporary XML before transformation will be saved. Mostly used for debugging purposes.</param> /// <returns>An newly created instance.</returns> public object CreateInstance(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, Type type, string xmlPath) { StringWriter sw = new StringWriter(); XmlTextWriter writer = new XmlTextWriter(sw); if (xsltUrl == null) { LoadHtmlAsXml(htmlUrl, writer); } else { if (xmlPath == null) { LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer); } else { LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer, xmlPath); } } writer.Flush(); StringReader sr = new StringReader(sw.ToString()); XmlTextReader reader = new XmlTextReader(sr); XmlSerializer serializer = new XmlSerializer(type); object o = null; try { o = serializer.Deserialize(reader); } catch(InvalidOperationException ex) { throw new Exception(ex.ToString() + ", --- xml:" + sw.ToString()); } return o; } /// <summary> /// Gets or Sets the cache path. If null, no caching mechanism will be used. /// </summary> public string CachePath { get { return _cachePath; } set { _cachePath = value; } } /// <summary> /// Gets or Sets a value indicating whether the caching mechanisms should be used or not. /// </summary> public bool UsingCache { get { if (_cachePath == null) { return false; } return _usingCache; } set { if ((value) && (_cachePath == null)) { throw new HtmlWebException("You need to define a CachePath first."); } _usingCache = value; } } } /// <summary> /// Represents an exception thrown by the HtmlWeb utility class. /// </summary> public class HtmlWebException: Exception { /// <summary> /// Creates an instance of the HtmlWebException. /// </summary> /// <param name="message">The exception's message.</param> public HtmlWebException(string message) :base(message) { } } } --- NEW FILE: HtmlNodeNavigator.cs --- // HtmlAgilityPack V1.0 - Simon Mourier <si...@mi...> using System; using System.Diagnostics; using System.Collections.Specialized; using System.IO; using System.Text; using System.Xml; using System.Xml.XPath; namespace Adapdev.Web.Html.XPath { internal class HtmlNameTable: XmlNameTable { private NameTable _nametable = new NameTable(); internal HtmlNameTable() { } internal string GetOrAdd(string array) { string s = Get(array); if (s == null) { return Add(array); } return s; } public override string Add(string array) { return _nametable.Add(array); } public override string Get(string array) { return _nametable.Get(array); } public override string Get(char[] array, int offset, int length) { return _nametable.Get(array, offset, length); } public override string Add(char[] array, int offset, int length) { return _nametable.Add(array, offset, length); } } /// <summary> /// Represents an HTML navigator on an HTML document seen as a data store. /// </summary> public class HtmlNodeNavigator: XPathNavigator { private HtmlDocument _doc = new HtmlDocument(); private HtmlNode _currentnode; private int _attindex; private HtmlNameTable _nametable = new HtmlNameTable(); internal bool Trace = false; internal HtmlNodeNavigator() { Reset(); } private void Reset() { InternalTrace(null); _currentnode = _doc.DocumentNode; _attindex = -1; } [Conditional("TRACE")] internal void InternalTrace(object Value) { if (!Trace) { return; } string name = null; StackFrame sf = new StackFrame(1, true); name = sf.GetMethod().Name; string nodename; if (_currentnode == null) { nodename = "(null)"; } else { nodename = _currentnode.Name; } string nodevalue; if (_currentnode == null) { nodevalue = "(null)"; } else { switch(_currentnode.NodeType) { case HtmlNodeType.Comment: nodevalue = ((HtmlCommentNode)_currentnode).Comment; break; case HtmlNodeType.Document: nodevalue = ""; break; case HtmlNodeType.Text: nodevalue = ((HtmlTextNode)_currentnode).Text; break; default: nodevalue = _currentnode.CloneNode(false).OuterHtml; break; } } System.Diagnostics.Trace.WriteLine("oid=" + GetHashCode() + ",n=" + nodename + ",a=" + _attindex + "," + ",v=" + nodevalue + "," + Value, "N!"+ name); } internal HtmlNodeNavigator(HtmlDocument doc, HtmlNode currentNode) { if (currentNode == null) { throw new ArgumentNullException("currentNode"); } if (currentNode.OwnerDocument != doc) { throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild); } InternalTrace(null); _doc = doc; Reset(); _currentnode = currentNode; } private HtmlNodeNavigator(HtmlNodeNavigator nav) { if (nav == null) { throw new ArgumentNullException("nav"); } InternalTrace(null); _doc = nav._doc; _currentnode = nav._currentnode; _attindex = nav._attindex; _nametable = nav._nametable; // REVIEW: should we do this? } /// <summary> /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a stream. /// </summary> /// <param name="stream">The input stream.</param> public HtmlNodeNavigator(Stream stream) { _doc.Load(stream); Reset(); } /// <summary> /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a stream. /// </summary> /// <param name="stream">The input stream.</param> /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the stream.</param> public HtmlNodeNavigator(Stream stream, bool detectEncodingFromByteOrderMarks) { _doc.Load(stream, detectEncodingFromByteOrderMarks); Reset(); } /// <summary> /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a stream. /// </summary> /// <param name="stream">The input stream.</param> /// <param name="encoding">The character encoding to use.</param> public HtmlNodeNavigator(Stream stream, Encoding encoding) { _doc.Load(stream, encoding); Reset(); } /// <summary> /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a stream. /// </summary> /// <param name="stream">The input stream.</param> /// <param name="encoding">The character encoding to use.</param> /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the stream.</param> public HtmlNodeNavigator(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks) { _doc.Load(stream, encoding, detectEncodingFromByteOrderMarks); Reset(); } /// <summary> /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a stream. /// </summary> /// <param name="stream">The input stream.</param> /// <param name="encoding">The character encoding to use.</param> /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the stream.</param> /// <param name="buffersize">The minimum buffer size.</param> public HtmlNodeNavigator(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize) { _doc.Load(stream, encoding, detectEncodingFromByteOrderMarks, buffersize); Reset(); } /// <summary> /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a TextReader. /// </summary> /// <param name="reader">The TextReader used to feed the HTML data into the document.</param> public HtmlNodeNavigator(TextReader reader) { _doc.Load(reader); Reset(); } /// <summary> /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a file. /// </summary> /// <param name="path">The complete file path to be read.</param> public HtmlNodeNavigator(string path) { _doc.Load(path); Reset(); } /// <summary> /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a file. /// </summary> /// <param name="path">The complete file path to be read.</param> /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param> public HtmlNodeNavigator(string path, bool detectEncodingFromByteOrderMarks) { _doc.Load(path, detectEncodingFromByteOrderMarks); Reset(); } /// <summary> /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a file. /// </summary> /// <param name="path">The complete file path to be read.</param> /// <param name="encoding">The character encoding to use.</param> public HtmlNodeNavigator(string path, Encoding encoding) { _doc.Load(path, encoding); Reset(); } /// <summary> /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a file. /// </summary> /// <param name="path">The complete file path to be read.</param> /// <param name="encoding">The character encoding to use.</param> /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param> public HtmlNodeNavigator(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks) { _doc.Load(path, encoding, detectEncodingFromByteOrderMarks); Reset(); } /// <summary> /// Initializes a new instance of the HtmlNavigator and loads an HTML document from a file. /// </summary> /// <param name="path">The complete file path to be read.</param> /// <param name="encoding">The character encoding to use.</param> /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param> /// <param name="buffersize">The minimum buffer size.</param> public HtmlNodeNavigator(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize) { _doc.Load(path, encoding, detectEncodingFromByteOrderMarks, buffersize); Reset(); } /// <summary> /// Gets the name of the current HTML node without the namespace prefix. /// </summary> public override string LocalName { get { if (_attindex != -1) { InternalTrace("att>" + _currentnode.Attributes[_attindex].Name); return _nametable.GetOrAdd(_currentnode.Attributes[_attindex].Name); } else { InternalTrace("node>" + _currentnode.Name); return _nametable.GetOrAdd(_currentnode.Name); } } } /// <summary> /// Gets the namespace URI (as defined in the W3C Namespace Specification) of the current node. /// Always returns string.Empty in the case of HtmlNavigator implementation. /// </summary> public override string NamespaceURI { get { InternalTrace(">"); return _nametable.GetOrAdd(string.Empty); } } /// <summary> /// Gets the qualified name of the current node. /// </summary> public override string Name { get { InternalTrace(">" + _currentnode.Name); return _nametable.GetOrAdd(_currentnode.Name); } } /// <summary> /// Gets the prefix associated with the current node. /// Always returns string.Empty in the case of HtmlNavigator implementation. /// </summary> public override string Prefix { get { InternalTrace(null); return _nametable.GetOrAdd(string.Empty); } } /// <summary> /// Gets the type of the current node. /// </summary> public override XPathNodeType NodeType { get { switch(_currentnode.NodeType) { case HtmlNodeType.Comment: InternalTrace(">" + XPathNodeType.Comment); return XPathNodeType.Comment; case HtmlNodeType.Document: InternalTrace(">" + XPathNodeType.Root); return XPathNodeType.Root; case HtmlNodeType.Text: InternalTrace(">" + XPathNodeType.Text); return XPathNodeType.Text; case HtmlNodeType.Element: { if (_attindex != -1) { InternalTrace(">" + XPathNodeType.Attribute); return XPathNodeType.Attribute; } InternalTrace(">" + XPathNodeType.Element); return XPathNodeType.Element; } default: throw new NotImplementedException("Internal error: Unhandled HtmlNodeType: " + _currentnode.NodeType); } } } /// <summary> /// Gets the text value of the current node. /// </summary> public override string Value { get { InternalTrace("nt=" + _currentnode.NodeType); switch(_currentnode.NodeType) { case HtmlNodeType.Comment: InternalTrace(">" + ((HtmlCommentNode)_currentnode).Comment); return ((HtmlCommentNode)_currentnode).Comment; case HtmlNodeType.Document: InternalTrace(">"); return ""; case HtmlNodeType.Text: InternalTrace(">" + ((HtmlTextNode)_currentnode).Text); return ((HtmlTextNode)_currentnode).Text; case HtmlNodeType.Element: { if (_attindex != -1) { InternalTrace(">" + _currentnode.Attributes[_attindex].Value); return _currentnode.Attributes[_attindex].Value; } return _currentnode.InnerText; } default: throw new NotImplementedException("Internal error: Unhandled HtmlNodeType: " + _currentnode.NodeType); } } } /// <summary> /// Gets the base URI for the current node. /// Always returns string.Empty in the case of HtmlNavigator implementation. /// </summary> public override string BaseURI { get { InternalTrace(">"); return _nametable.GetOrAdd(string.Empty); } } /// <summary> /// Gets the xml:lang scope for the current node. /// Always returns string.Empty in the case of HtmlNavigator implementation. /// </summary> public override string XmlLang { get { InternalTrace(null); return _nametable.GetOrAdd(string.Empty); } } /// <summary> /// Gets a value indicating whether the current node is an empty element. /// </summary> public override bool IsEmptyElement { get { InternalTrace(">" + !HasChildren); // REVIEW: is this ok? return !HasChildren; } } /// <summary> /// Gets the XmlNameTable associated with this implementation. /// </summary> public override XmlNameTable NameTable { get { InternalTrace(null); return _nametable; } } /// <summary> /// Gets a value indicating whether the current node has child nodes. /// </summary> public override bool HasAttributes { get { InternalTrace(">" + (_currentnode.Attributes.Count>0)); return (_currentnode.Attributes.Count>0); } } /// <summary> /// Gets a value indicating whether the current node has child nodes. /// </summary> public override bool HasChildren { get { InternalTrace(">" + (_currentnode.ChildNodes.Count>0)); return (_currentnode.ChildNodes.Count>0); } } /// <summary> /// Moves to the next sibling of the current node. /// </summary> /// <returns>true if the navigator is successful moving to the next sibling node, false if there are no more siblings or if the navigator is currently positioned on an attribute node. If false, the position of the navigator is unchanged.</returns> public override bool MoveToNext() { if (_currentnode.NextSibling == null) { InternalTrace(">false"); return false; } InternalTrace("_c=" + _currentnode.CloneNode(false).OuterHtml); InternalTrace("_n=" + _currentnode.NextSibling.CloneNode(false).OuterHtml); _currentnode = _currentnode.NextSibling; InternalTrace(">true"); return true; } /// <summary> /// Moves to the previous sibling of the current node. /// </summary> /// <returns>true if the navigator is successful moving to the previous sibling node, false if there is no previous sibling or if the navigator is currently positioned on an attribute node.</returns> public override bool MoveToPrevious() { if (_currentnode.PreviousSibling == null) { InternalTrace(">false"); return false; } _currentnode = _currentnode.PreviousSibling; InternalTrace(">true"); return true; } /// <summary> /// Moves to the first sibling of the current node. /// </summary> /// <returns>true if the navigator is successful moving to the first sibling node, false if there is no first sibling or if the navigator is currently positioned on an attribute node.</returns> public override bool MoveToFirst() { if (_currentnode.ParentNode == null) { InternalTrace(">false"); return false; } if (_currentnode.ParentNode.FirstChild == null) { InternalTrace(">false"); return false; } _currentnode = _currentnode.ParentNode.FirstChild; InternalTrace(">true"); return true; } /// <summary> /// Moves to the first child of the current node. /// </summary> /// <returns>true if there is a first child node, otherwise false.</returns> public override bool MoveToFirstChild() { if (!_currentnode.HasChildNodes) { InternalTrace(">false"); return false; } _currentnode = _currentnode.ChildNodes[0]; InternalTrace(">true"); return true; } /// <summary> /// Moves to the parent of the current node. /// </summary> /// <returns>true if there is a parent node, otherwise false.</returns> public override bool MoveToParent() { if (_currentnode.ParentNode == null) { InternalTrace(">false"); return false; } _currentnode = _currentnode.ParentNode; InternalTrace(">true"); return true; } /// <summary> /// Moves to the root node to which the current node belongs. /// </summary> public override void MoveToRoot() { _currentnode = _doc.DocumentNode; InternalTrace(null); } /// <summary> /// Moves to the same position as the specified HtmlNavigator. /// </summary> /// <param name="other">The HtmlNavigator positioned on the node that you want to move to.</param> /// <returns>true if successful, otherwise false. If false, the position of the navigator is unchanged.</returns> public override bool MoveTo(XPathNavigator other) { HtmlNodeNavigator nav = other as HtmlNodeNavigator; if (nav == null) { InternalTrace(">false (nav is not an HtmlNodeNavigator)"); return false; } InternalTrace("moveto oid=" + nav.GetHashCode() + ", n:" + nav._currentnode.Name + ", a:" + nav._attindex); if (nav._doc == _doc) { _currentnode = nav._currentnode; _attindex = nav._attindex; InternalTrace(">true"); return true; } // we don't know how to handle that InternalTrace(">false (???)"); return false; } /// <summary> /// Moves to the node that has an attribute of type ID whose value matches the specified string. /// </summary> /// <param name="id">A string representing the ID value of the node to which you want to move. This argument does not need to be atomized.</param> /// <returns>true if the move was successful, otherwise false. If false, the position of the navigator is unchanged.</returns> public override bool MoveToId(string id) { InternalTrace("id=" + id); HtmlNode node = _doc.GetElementbyId(id); if (node == null) { InternalTrace(">false"); return false; } _currentnode = node; InternalTrace(">true"); return true; } /// <summary> /// Determines whether the current HtmlNavigator is at the same position as the specified HtmlNavigator. /// </summary> /// <param name="other">The HtmlNavigator that you want to compare against.</param> /// <returns>true if the two navigators have the same position, otherwise, false.</returns> public override bool IsSamePosition(XPathNavigator other) { HtmlNodeNavigator nav = other as HtmlNodeNavigator; if (nav == null) { InternalTrace(">false"); return false; } InternalTrace(">" + (nav._currentnode == _currentnode)); return (nav._currentnode == _currentnode); } /// <summary> /// Creates a new HtmlNavigator positioned at the same node as this HtmlNavigator. /// </summary> /// <returns>A new HtmlNavigator object positioned at the same node as the original HtmlNavigator.</returns> public override XPathNavigator Clone() { InternalTrace(null); return new HtmlNodeNavigator(this); } /// <summary> /// Gets the value of the HTML attribute with the specified LocalName and NamespaceURI. /// </summary> /// <param name="localName">The local name of the HTML attribute.</param> /// <param name="namespaceURI">The namespace URI of the attribute. Unsupported with the HtmlNavigator implementation.</param> /// <returns>The value of the specified HTML attribute. String.Empty or null if a matching attribute is not found or if the navigator is not positioned on an element node.</returns> public override string GetAttribute(string localName, string namespaceURI) { InternalTrace("localName=" + localName + ", namespaceURI=" + namespaceURI); HtmlAttribute att = _currentnode.Attributes[localName]; if (att == null) { InternalTrace(">null"); return null; } InternalTrace(">" + att.Value); return att.Value; } /// <summary> /// Moves to the HTML attribute with matching LocalName and NamespaceURI. /// </summary> /// <param name="localName">The local name of the HTML attribute.</param> /// <param name="namespaceURI">The namespace URI of the attribute. Unsupported with the HtmlNavigator implementation.</param> /// <returns>true if the HTML attribute is found, otherwise, false. If false, the position of the navigator does not change.</returns> public override bool MoveToAttribute(string localName, string namespaceURI) { InternalTrace("localName=" + localName + ", namespaceURI=" + namespaceURI); int index = _currentnode.Attributes.GetAttributeIndex(localName); if (index == -1) { InternalTrace(">false"); return false; } _attindex = index; InternalTrace(">true"); return true; } /// <summary> /// Moves to the first HTML attribute. /// </summary> /// <returns>true if the navigator is successful moving to the first HTML attribute, otherwise, false.</returns> public override bool MoveToFirstAttribute() { if (!HasAttributes) { InternalTrace(">false"); return false; } _attindex = 0; InternalTrace(">true"); return true; } /// <summary> /// Moves to the next HTML attribute. /// </summary> /// <returns></returns> public override bool MoveToNextAttribute() { InternalTrace(null); if (_attindex>=(_currentnode.Attributes.Count-1)) { InternalTrace(">false"); return false; } _attindex++; InternalTrace(">true"); return true; } /// <summary> /// Returns the value of the namespace node corresponding to the specified local name. /// Always returns string.Empty for the HtmlNavigator implementation. /// </summary> /// <param name="name">The local name of the namespace node.</param> /// <returns>Always returns string.Empty for the HtmlNavigator implementation.</returns> public override string GetNamespace(string name) { InternalTrace("name=" + name); return string.Empty; } /// <summary> /// Moves the XPathNavigator to the namespace node with the specified local name. /// Always returns false for the HtmlNavigator implementation. /// </summary> /// <param name="name">The local name of the namespace node.</param> /// <returns>Always returns false for the HtmlNavigator implementation.</returns> public override bool MoveToNamespace(string name) { InternalTrace("name=" + name); return false; } /// <summary> /// Moves the XPathNavigator to the first namespace node of the current element. /// Always returns false for the HtmlNavigator implementation. /// </summary> /// <param name="scope">An XPathNamespaceScope value describing the namespace scope.</param> /// <returns>Always returns false for the HtmlNavigator implementation.</returns> public override bool MoveToFirstNamespace(XPathNamespaceScope scope) { InternalTrace(null); return false; } /// <summary> /// Moves the XPathNavigator to the next namespace node. /// Always returns falsefor the HtmlNavigator implementation. /// </summary> /// <param name="scope">An XPathNamespaceScope value describing the namespace scope.</param> /// <returns>Always returns false for the HtmlNavigator implementation.</returns> public override bool MoveToNextNamespace(XPathNamespaceScope scope) { InternalTrace(null); return false; } /// <summary> /// Gets the current HTML node. /// </summary> public HtmlNode CurrentNode { get { return _currentnode; } } /// <summary> /// Gets the current HTML document. /// </summary> public HtmlDocument CurrentDocument { get { return _doc; } } } } --- NEW FILE: HtmlNode.cs --- // HtmlAgilityPack V1.0 - Simon Mourier <si...@mi...> using System; using System.Collections; using System.IO; using System.Xml; using System.Xml.XPath; namespace Adapdev.Web.Html.XPath { /// <summary> /// Flags that describe the behavior of an Element node. /// </summary> public enum HtmlElementFlag { /// <summary> /// The node is a CDATA node. /// </summary> CData = 1, [...2220 lines suppressed...] /// Gets or Sets the comment text of the node. /// </summary> public string Comment { get { if (_comment == null) { return base.InnerHtml; } return _comment; } set { _comment = value; } } } } --- NEW FILE: HtmlAttribute.cs --- // HtmlAgilityPack V1.0 - Simon Mourier <si...@mi...> using System; using System.Collections; namespace Adapdev.Web.Html.XPath { /// <summary> /// Represents an HTML attribute. /// </summary> public class HtmlAttribute: IComparable { internal int _line = 0; internal int _lineposition = 0; internal int _streamposition = 0; internal int _namestartindex = 0; internal int _namelength = 0; internal int _valuestartindex = 0; internal int _valuelength = 0; internal HtmlDocument _ownerdocument; // attribute can exists without a node internal HtmlNode _ownernode; internal string _name; internal string _value; internal HtmlAttribute(HtmlDocument ownerdocument) { _ownerdocument = ownerdocument; } /// <summary> /// Creates a duplicate of this attribute. /// </summary> /// <returns>The cloned attribute.</returns> public HtmlAttribute Clone() { HtmlAttribute att = new HtmlAttribute(_ownerdocument); att.Name = Name; att.Value = Value; return att; } /// <summary> /// Compares the current instance with another attribute. Comparison is based on attributes' name. /// </summary> /// <param name="obj">An attribute to compare with this instance.</param> /// <returns>A 32-bit signed integer that indicates the relative order of the names comparison.</returns> public int CompareTo(object obj) { HtmlAttribute att = obj as HtmlAttribute; if (att == null) { throw new ArgumentException("obj"); } return Name.CompareTo(att.Name); } internal string XmlName { get { return HtmlDocument.GetXmlName(Name); } } internal string XmlValue { get { return Value; } } /// <summary> /// Gets the qualified name of the attribute. /// </summary> public string Name { get { if (_name == null) { _name = _ownerdocument._text.Substring(_namestartindex, _namelength).ToLower(); } return _name; } set { if (value == null) { throw new ArgumentNullException("value"); } _name = value.ToLower(); if (_ownernode != null) { _ownernode._innerchanged = true; _ownernode._outerchanged = true; } } } /// <summary> /// Gets or sets the value of the attribute. /// </summary> public string Value { get { if (_value == null) { _value = _ownerdocument._text.Substring(_valuestartindex, _valuelength); } return _value; } set { _value = value; if (_ownernode != null) { _ownernode._innerchanged = true; _ownernode._outerchanged = true; } } } /// <summary> /// Gets the line number of this attribute in the document. /// </summary> public int Line { get { return _line; } } /// <summary> /// Gets the column number of this attribute in the document. /// </summary> public int LinePosition { get { return _lineposition; } } /// <summary> /// Gets the stream position of this attribute in the document, relative to the start of the document. /// </summary> public int StreamPosition { get { return _streamposition; } } /// <summary> /// Gets the HTML node to which this attribute belongs. /// </summary> public HtmlNode OwnerNode { get { return _ownernode; } } /// <summary> /// Gets the HTML document to which this attribute belongs. /// </summary> public HtmlDocument OwnerDocument { get { return _ownerdocument; } } } /// <summary> /// Represents a combined list and collection of HTML nodes. /// </summary> public class HtmlAttributeCollection: IEnumerable { internal Hashtable _hashitems = new Hashtable(); private ArrayList _items = new ArrayList(); private HtmlNode _ownernode; internal HtmlAttributeCollection(HtmlNode ownernode) { _ownernode = ownernode; } /// <summary> /// Inserts the specified attribute as the last attribute in the collection. /// </summary> /// <param name="newAttribute">The attribute to insert. May not be null.</param> /// <returns>The appended attribute.</returns> public HtmlAttribute Append(HtmlAttribute newAttribute) { if (newAttribute == null) { throw new ArgumentNullException("newAttribute"); } _hashitems[newAttribute.Name] = newAttribute; newAttribute._ownernode = _ownernode; _items.Add(newAttribute); _ownernode._innerchanged = true; _ownernode._outerchanged = true; return newAttribute; } /// <summary> /// Creates and inserts a new attribute as the last attribute in the collection. /// </summary> /// <param name="name">The name of the attribute to insert.</param> /// <returns>The appended attribute.</returns> public HtmlAttribute Append(string name) { HtmlAttribute att = _ownernode._ownerdocument.CreateAttribute(name); return Append(att); } /// <summary> /// Creates and inserts a new attribute as the last attribute in the collection. /// </summary> /// <param name="name">The name of the attribute to insert.</param> /// <param name="value">The value of the attribute to insert.</param> /// <returns>The appended attribute.</returns> public HtmlAttribute Append(string name, string value) { HtmlAttribute att = _ownernode._ownerdocument.CreateAttribute(name, value); return Append(att); } /// <summary> /// Inserts the specified attribute as the first node in the collection. /// </summary> /// <param name="newAttribute">The attribute to insert. May not be null.</param> /// <returns>The prepended attribute.</returns> public HtmlAttribute Prepend(HtmlAttribute newAttribute) { if (newAttribute == null) { throw new ArgumentNullException("newAttribute"); } _hashitems[newAttribute.Name] = newAttribute; newAttribute._ownernode = _ownernode; _items.Insert(0, newAttribute); _ownernode._innerchanged = true; _ownernode._outerchanged = true; return newAttribute; } /// <summary> /// Removes the attribute at the specified index. /// </summary> /// <param name="index">The index of the attribute to remove.</param> public void RemoveAt(int index) { HtmlAttribute att = (HtmlAttribute)_items[index]; _hashitems.Remove(att.Name); _items.RemoveAt(index); _ownernode._innerchanged = true; _ownernode._outerchanged = true; } /// <summary> /// Removes a given attribute from the list. /// </summary> /// <param name="attribute">The attribute to remove. May not be null.</param> public void Remove(HtmlAttribute attribute) { if (attribute == null) { throw new ArgumentNullException("attribute"); } int index = GetAttributeIndex(attribute); if (index == -1) { throw new IndexOutOfRangeException(); } RemoveAt(index); } /// <summary> /// Removes an attribute from the list, using its name. If there are more than one attributes with this name, they will all be removed. /// </summary> /// <param name="name">The attribute's name. May not be null.</param> public void Remove(string name) { if (name == null) { throw new ArgumentNullException("name"); } string lname = name.ToLower(); for(int i=0;i<_items.Count;i++) { HtmlAttribute att = (HtmlAttribute)_items[i]; if (att.Name == lname) { RemoveAt(i); } } } /// <summary> /// Remove all attributes in the list. /// </summary> public void RemoveAll() { _hashitems.Clear(); _items.Clear(); _ownernode._innerchanged = true; _ownernode._outerchanged = true; } /// <summary> /// Gets the number of elements actually contained in the list. /// </summary> public int Count { get { return _items.Count; } } internal int GetAttributeIndex(HtmlAttribute attribute) { if (attribute == null) { throw new ArgumentNullException("attribute"); } for(int i=0;i<_items.Count;i++) { if (((HtmlAttribute)_items[i])==attribute) return i; } return -1; } internal int GetAttributeIndex(string name) { if (name == null) { throw new ArgumentNullException("name"); } string lname = name.ToLower(); for(int i=0;i<_items.Count;i++) { if (((HtmlAttribute)_items[i]).Name==lname) return i; } return -1; } /// <summary> /// Gets a given attribute from the list using its name. /// </summary> public HtmlAttribute this[string name] { get { if (name == null) { throw new ArgumentNullException("name"); } return _hashitems[name.ToLower()] as HtmlAttribute; } } /// <summary> /// Gets the attribute at the specified index. /// </summary> public HtmlAttribute this[int index] { get { return _items[index] as HtmlAttribute; } } internal void Clear() { _hashitems.Clear(); _items.Clear(); } /// <summary> /// Returns an enumerator that can iterate through the list. /// </summary> /// <returns>An IEnumerator for the entire list.</returns> public HtmlAttributeEnumerator GetEnumerator() { return new HtmlAttributeEnumerator(_items); } IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); } /// <summary> /// Represents an enumerator that can iterate through the list. /// </summary> public class HtmlAttributeEnumerator: IEnumerator { int _index; ArrayList _items; internal HtmlAttributeEnumerator(ArrayList items) { _items = items; _index = -1; } /// <summary> /// Sets the enumerator to its initial position, which is before the first element in the collection. /// </summary> public void Reset() { _index = -1; } /// <summary> /// Advances the enumerator to the next element of the collection. /// </summary> /// <returns>true if the enumerator was successfully advanced to the next element, false if the enumerator has passed the end of the collection.</returns> public bool MoveNext() { _index++; return (_index<_items.Count); } /// <summary> /// Gets the current element in the collection. /// </summary> public HtmlAttribute Current { get { return (HtmlAttribute)(_items[_index]); } } /// <summary> /// Gets the current element in the collection. /// </summary> object IEnumerator.Current { get { return (Current); } } } } } --- NEW FILE: HtmlDocument.cs --- // HtmlAgilityPack V1.0 - Simon Mourier <si...@mi...> using System; using System.IO; using System.Text; using System.Diagnostics; using System.Collections; using System.Text.RegularExpressions; using System.Xml; using System.Xml.XPath; namespace Adapdev.Web.Html.XPath { /// <summary> /// Represents the type of parsing error. /// </summary> public enum HtmlParseErrorCode { /// <summary> /// A tag was not closed. [...2040 lines suppressed...] } internal class EncodingFoundException: Exception { private Encoding _encoding; internal EncodingFoundException(Encoding encoding) { _encoding = encoding; } internal Encoding Encoding { get { return _encoding; } } } } --- NEW FILE: HtmlEntity.cs --- // HtmlAgilityPack V1.0 - Simon Mourier <si...@mi...> using System; using System.Collections; using System.Text; namespace Adapdev.Web.Html.XPath { /// <summary> /// A utility class to replace special characters by entities and vice-versa. /// Follows HTML 4.0 specification found at http://www.w3.org/TR/html4/sgml/entities.html /// </summary> public class HtmlEntity { private static Hashtable _entityName; private static Hashtable _entityValue; private static readonly int _maxEntitySize; private HtmlEntity() { } static HtmlEntity() { _entityName = new Hashtable(); _entityValue = new Hashtable(); #region Entities Definition _entityValue.Add("nbsp", 160); // no-break space = non-breaking space, U+00A0 ISOnum _entityName.Add(160, "nbsp"); _entityValue.Add("iexcl", 161); // inverted exclamation mark, U+00A1 ISOnum _entityName.Add(161, "iexcl"); _entityValue.Add("cent", 162); // cent sign, U+00A2 ISOnum _entityName.Add(162, "cent"); _entityValue.Add("pound", 163); // pound sign, U+00A3 ISOnum _entityName.Add(163, "pound"); _entityValue.Add("curren", 164); // currency sign, U+00A4 ISOnum _entityName.Add(164, "curren"); _entityValue.Add("yen", 165); // yen sign = yuan sign, U+00A5 ISOnum _entityName.Add(165, "yen"); _entityValue.Add("brvbar", 166); // broken bar = broken vertical bar, U+00A6 ISOnum _entityName.Add(166, "brvbar"); _entityValue.Add("sect", 167); // section sign, U+00A7 ISOnum _entityName.Add(167, "sect"); _entityValue.Add("uml", 168); // diaeresis = spacing diaeresis, U+00A8 ISOdia _entityName.Add(168, "uml"); _entityValue.Add("copy", 169); // copyright sign, U+00A9 ISOnum _entityName.Add(169, "copy"); _entityValue.Add("ordf", 170); // feminine ordinal indicator, U+00AA ISOnum _entityName.Add(170, "ordf"); _entityValue.Add("laquo", 171); // left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum _entityName.Add(171, "laquo"); _entityValue.Add("not", 172); // not sign, U+00AC ISOnum _entityName.Add(172, "not"); _entityValue.Add("shy", 173); // soft hyphen = discretionary hyphen, U+00AD ISOnum _entityName.Add(173, "shy"); _entityValue.Add("reg", 174); // registered sign = registered trade mark sign, U+00AE ISOnum _entityName.Add(174, "reg"); _entityValue.Add("macr", 175); // macron = spacing macron = overline = APL overbar, U+00AF ISOdia _entityName.Add(175, "macr"); _entityValue.Add("deg", 176); // degree sign, U+00B0 ISOnum _entityName.Add(176, "deg"); _entityValue.Add("plusmn", 177); // plus-minus sign = plus-or-minus sign, U+00B1 ISOnum _entityName.Add(177, "plusmn"); _entityValue.Add("sup2", 178); // superscript two = superscript digit two = squared, U+00B2 ISOnum _entityName.Add(178, "sup2"); _entityValue.Add("sup3", 179); // superscript three = superscript digit three = cubed, U+00B3 ISOnum _entityName.Add(179, "sup3"); _entityValue.Add("acute", 180); // acute accent = spacing acute, U+00B4 ISOdia _entityName.Add(180, "acute"); _entityValue.Add("micro", 181); // micro sign, U+00B5 ISOnum _entityName.Add(181, "micro"); _entityValue.Add("para", 1... [truncated message content] |