From: <pb...@us...> - 2007-03-27 10:49:53
|
Revision: 235 http://mp-plugins.svn.sourceforge.net/mp-plugins/?rev=235&view=rev Author: pbb72 Date: 2007-03-27 03:49:51 -0700 (Tue, 27 Mar 2007) Log Message: ----------- more simplification and identifier renaming, fixed redirects, replaced substring conversion by regex conversion, added skin file Modified Paths: -------------- trunk/plugins/Documentation plugin/Documentation.cs trunk/plugins/Documentation plugin/DocumentationPage.cs Added Paths: ----------- trunk/plugins/Documentation plugin/skin/ trunk/plugins/Documentation plugin/skin/Documentation.xml Property Changed: ---------------- trunk/plugins/Documentation plugin/ Property changes on: trunk/plugins/Documentation plugin ___________________________________________________________________ Name: svn:ignore + *.bak Modified: trunk/plugins/Documentation plugin/Documentation.cs =================================================================== --- trunk/plugins/Documentation plugin/Documentation.cs 2007-03-27 10:35:10 UTC (rev 234) +++ trunk/plugins/Documentation plugin/Documentation.cs 2007-03-27 10:49:51 UTC (rev 235) @@ -46,7 +46,7 @@ #region SkinControls // map skin controls to member variables [SkinControlAttribute(10)] - protected GUIButtonControl buttonSearch = null; + protected GUIButtonControl buttonOpen = null; [SkinControlAttribute(14)] protected GUIButtonControl buttonBack = null; [SkinControlAttribute(12)] @@ -154,7 +154,13 @@ return Load(GUIGraphicsContext.Skin + @"\Documentation.xml"); } - protected override void OnClicked(int controlId, GUIControl control, + protected override void OnPageLoad() + { + GetAndDisplayArticle(String.Empty); // display start page + base.OnPageLoad(); + } + + protected override void OnClicked(int controlId, GUIControl control, MediaPortal.GUI.Library.Action.ActionType actionType) { // we don't want the user to start another search while one is already active @@ -162,7 +168,7 @@ return; // Here we want to open the OSD Keyboard to enter the searchstring - if (control == buttonSearch) + if (control == buttonOpen) { // If the search Button was clicked we need to bring up the search keyboard. VirtualKeyboard keyboard = (VirtualKeyboard)GUIWindowManager.GetWindow( @@ -181,23 +187,9 @@ searchterm = keyboard.Text; // If there was a string entered try getting the article. - if (searchterm != "") - { - Log.Info(_PluginName + ": Searchterm gotten from OSD keyboard: {0}", - searchterm); - GetAndDisplayArticle(searchterm); - } - // Else display an error dialog. - else - { - GUIDialogOK dlg = (GUIDialogOK)GUIWindowManager.GetWindow( - (int)GUIWindow.Window.WINDOW_DIALOG_OK); - dlg.SetHeading(GUILocalizeStrings.Get(257)); // Error - dlg.SetLine(1, GUILocalizeStrings.Get(2500)); // No searchterm entered! - dlg.SetLine(2, String.Empty); - dlg.SetLine(3, GUILocalizeStrings.Get(2501)); // Please enter valid searchterm! - dlg.DoModal(GUIWindowManager.ActiveWindow); - } + Log.Info(_PluginName + ": Searchterm gotten from OSD keyboard: {0}", + searchterm); + GetAndDisplayArticle(searchterm); } // The Button holding the Links to other articles if (control == buttonLinks) @@ -322,7 +314,7 @@ } } - // The main function. + // The core function. void GetAndDisplayArticle(string searchterm) { BackgroundWorker worker = new BackgroundWorker(); @@ -358,8 +350,8 @@ searchtermLabel.Label = e.Argument.ToString(); DocumentationPage page = new DocumentationPage(e.Argument.ToString()); - _pageText = page.GetText(); - _linkArray = page.GetLinkArray(); + _pageText = page.text; + _linkArray = page.linkArray(); _imageNameArray = page.GetImageArray(); _imageDescArray = page.GetImagedescArray(); Modified: trunk/plugins/Documentation plugin/DocumentationPage.cs =================================================================== --- trunk/plugins/Documentation plugin/DocumentationPage.cs 2007-03-27 10:35:10 UTC (rev 234) +++ trunk/plugins/Documentation plugin/DocumentationPage.cs 2007-03-27 10:49:51 UTC (rev 235) @@ -29,6 +29,7 @@ using System.Net; using System.IO; using System.Text; +using System.Text.RegularExpressions; using System.Threading; using System.Collections; using MediaPortal.GUI.Library; @@ -44,72 +45,30 @@ public class DocumentationPage { #region vars - private string _WikiURL = "http://wiki.team-mediaportal.com/{0}?action=raw"; + public string text = string.Empty; + public ArrayList linkArray = new ArrayList(); + public ArrayList imageArray = new ArrayList(); + public ArrayList imageDescArray = new ArrayList(); + private string _WikiURL = "http://wiki.team-mediaportal.com/{0}?action=raw"; private string _imagePattern = "attachment:"; - private string _title = string.Empty; - private string _unparsedPage = string.Empty; - private string _parsedPage = string.Empty; - private ArrayList _linkArray = new ArrayList(); - private ArrayList _imageArray = new ArrayList(); - private ArrayList _imageDescArray = new ArrayList(); #endregion #region constructors /// <summary>This constructor creates a new WikipediaArticle</summary> - /// <summary>Searchterm and language need to be given</summary> - /// <param name="title">The article's title</param> - /// <param name="language">Language of the Wikipedia page</param> - public DocumentationPage(string title) + /// <param name="pageName">The article's page name</param> + public DocumentationPage(string pageName) { - this._title = title; - GetWikipediaXML(); + GetWikiPage(pageName); ParseWikipediaArticle(); ParseLinksAndImages(); } #endregion - /// <summary>Returns the parsed article text.</summary> - /// <returns>String: parsed article</returns> - public string GetText() + /// <summary>Downloads the raw Wiki code.</summary> + private void GetWikiPage(string pageName) { - return _parsedPage; - } - - /// <summary>Returns the title of the article. Can differ from the passed parameter on - /// redirects for example.</summary> - /// <returns>String: title of the article</returns> - public string GetTitle() - { - return _title; - } - - /// <summary>Returns all names of images.</summary> - /// <returns>StringArray: images used in this article</returns> - public ArrayList GetImageArray() - { - return _imageArray; - } - - /// <summary>Returns all descriptions of images.</summary> - /// <returns>StringArray: images used in this article</returns> - public ArrayList GetImagedescArray() - { - return _imageDescArray; - } - - /// <summary>Returns the titles of all linked articles.</summary> - /// <returns>StringArray: titles of linked (internal) Wikipedia articles</returns> - public ArrayList GetLinkArray() - { - return _linkArray; - } - - /// <summary>Downloads the xml content from Wikipedia and cuts metadata like version - /// info.</summary> - private void GetWikipediaXML() - { // Build the URL to the Wikipedia page - System.Uri url = new System.Uri(String.Format(_WikiURL, this._title)); + System.Uri url = new System.Uri(String.Format(_WikiURL, this._pageName)); Log.Info("Wikipedia: Trying to get following URL: {0}", url.ToString()); // Here we get the content from the web and put it to a string @@ -120,7 +79,8 @@ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)"); Stream data = client.OpenRead(url); StreamReader reader = new StreamReader(data); - this._unparsedPage = reader.ReadToEnd(); + //this._unparsedPage = reader.ReadToEnd(); + this.text = reader.ReadToEnd(); reader.Close(); Log.Info("Wikipedia: Success! Downloaded all data."); } @@ -134,227 +94,83 @@ /// <summary>Cuts all special wiki syntax from the article to display plain text</summary> private void ParseWikipediaArticle() { - string tempParsedArticle = this._unparsedPage; + //string tempParsedPage = this._unparsedPage; + string tempParsedPage = this.text; - // Check if the article is empty, if so do not parse. - if (tempParsedArticle == string.Empty) + if (this.text.Length > 10 && this.text.Substring(0, 10) == "#REDIRECT ") { - Log.Info("Wikipedia: Empty article found. Try another Searchterm."); - this._unparsedPage = string.Empty; - } - // Here we check if there is only a redirect as article to handle it as a special - // article type - else if (tempParsedArticle.IndexOf("#REDIRECT") == 0) - { Log.Info("Wikipedia: #REDIRECT found."); - int iStart = tempParsedArticle.IndexOf("[[") + 2; - int iEnd = tempParsedArticle.IndexOf("]]", iStart); - // Extract the Text - string keyword = tempParsedArticle.Substring(iStart, iEnd - iStart); - this._unparsedPage = string.Empty; - this._title = keyword; - GetWikipediaXML(); + GetWikiPage(this.text.Substring(10)); ParseWikipediaArticle(); } - // Finally a well-formed article ;-) else { Log.Info("Wikipedia: Starting parsing."); - StringBuilder builder = new StringBuilder(tempParsedArticle); - int iStart = 0; - int iEnd = 0; - // Remove HTML comments - Log.Debug("Wikipedia: Remove HTML comments."); - while (tempParsedArticle.IndexOf("<!--") >= 0) - { - builder = new StringBuilder(tempParsedArticle); - iStart = tempParsedArticle.IndexOf("<!--"); - iEnd = tempParsedArticle.IndexOf("-->", iStart) + 6; + // remove control codes & remarks (#blah) + this.text = Regex.Replace(this.text, @"^#.+\n", String.Empty, + RegexOptions.Multiline); - try - { - builder.Remove(iStart, iEnd - iStart); - } - catch (Exception e) - { - Log.Error(e.ToString()); - Log.Error(builder.ToString()); - } - tempParsedArticle = builder.ToString(); - } + // TODO: no interpretation in headers (not even ``) + + // explicit hyperlinks ([:pagename:link text]) + this.text = Regex.Replace(this.text, @"\[:[^:]+:(?: |\t)*([^\]]+)\]", + "[$1]"); - // surrounded by {{ and }} is (atm) unusable stuff. - //Log.Debug("Wikipedia: Remove stuff between {{ and }}."); - while (tempParsedArticle.IndexOf("{{") >= 0) - { - builder = new StringBuilder(tempParsedArticle); - iStart = tempParsedArticle.IndexOf("{{"); - int iStart2 = iStart; - iEnd = tempParsedArticle.IndexOf("}}") + 2; + // TODO: [:page name] + // TODO: ["page name"] + + // camelcase hyperlinks that are not in headers (PageName) + // TODO: prevent camelcase links inside explicit hyperlinks + this.text = Regex.Replace(this.text, @"(?<!^=(?: )+|\w)((?:[A-Z][a-z0-9]+){2,})(?!\w)", + "[$1]", RegexOptions.Multiline); - // Between {{ and }} we can again have inner sets of {{ and }} - while (tempParsedArticle.IndexOf("{{", iStart2 + 2) >= 0 && - tempParsedArticle.IndexOf("{{", iStart2 + 2) < iEnd) - { - iStart2 = tempParsedArticle.IndexOf("{{", iStart2 + 2); - iEnd = tempParsedArticle.IndexOf("}}", iStart2) + 2; - iEnd = tempParsedArticle.IndexOf("}}", iEnd) + 2; - } + // de-linked camelcase (!PageName) + this.text = Regex.Replace(this.text, @"(?<=\W)!((?:[A-Z][a-z0-9]+){2,})(?=\W)", + "$1"); - try - { - builder.Remove(iStart, iEnd - iStart); - } - catch (Exception e) - { - Log.Error(e.ToString()); - Log.Error(builder.ToString()); - } + // TODO: /SubPage, SomePage/SomeSubPage + + // TODO: Wiki:WikiPage, MoinMoin:SomePage + + // enclosed external hyperlinks ([http://example.com]) + this.text = Regex.Replace(this.text, @"\[(http://[^ \]]+)\]", "$1"); - tempParsedArticle = builder.ToString(); - } + // external hyperlinks with label ([http://example.com an example]) + this.text = Regex.Replace(this.text, @"\[(http://[^ \]]+) ([^\]]+)\]", "$2 ($1)"); - // surrounded by {| and |} is (atm) unusable stuff. - //Log.Debug("Wikipedia: Remove stuff between {| and |}."); - while (tempParsedArticle.IndexOf("{|") >= 0) - { - builder = new StringBuilder(tempParsedArticle); - iStart = tempParsedArticle.IndexOf("{|"); - iEnd = tempParsedArticle.IndexOf("|}") + 2; + // images (attachment:file.gif) + this.text = Regex.Replace(this.text, @"attachment:([^\s\|]+)(?:\.gif|\.jpg)", + "[img:$1]"); - try - { - builder.Remove(iStart, iEnd - iStart); - } - catch (Exception e) - { - Log.Error(e.ToString()); - Log.Error(builder.ToString()); - } + // TODO: attachment:file.zip + + // TODO: bold & italic ('''text''' -> *text* & /text/) - tempParsedArticle = builder.ToString(); - } + // TODO: lists + + // tables (|| cell 1 || cell 2 ||) + this.text = Regex.Replace(this.text, @"\|{2,}<[^>]*>", "|"); - // Remove audio links. - Log.Debug("Wikipedia: Remove audio links."); - while (tempParsedArticle.IndexOf("<span") >= 0) - { - builder = new StringBuilder(tempParsedArticle); - iStart = tempParsedArticle.IndexOf("<span"); - iEnd = tempParsedArticle.IndexOf("</span>") + 13; + // single linebreaks in code, except ones around headers and tables + this.text = Regex.Replace(this.text, @"(?<!\r\n|=|\|)\r\n(?!\r\n|=|\|)", + " "); - try - { - builder.Remove(iStart, iEnd - iStart); - } - catch (Exception e) - { - Log.Error(e.ToString()); - Log.Error(builder.ToString()); - } + // paragraphs with excessive linebreaks + this.text = Regex.Replace(this.text, @"(?:\r\n){2,}", + Environment.NewLine + Environment.NewLine); - tempParsedArticle = builder.ToString(); - } + // whitespace around titles + this.text = Regex.Replace(this.text, @"(?:\r\n)+(=.+=)(?:\r\n)+", + Environment.NewLine + Environment.NewLine + "$1" + Environment.NewLine); - // Remove web references. - Log.Debug("Wikipedia: Remove web references."); - while (tempParsedArticle.IndexOf("<ref>") >= 0) - { - builder = new StringBuilder(tempParsedArticle); - iStart = tempParsedArticle.IndexOf("<ref"); - iEnd = tempParsedArticle.IndexOf("</ref>") + 12; + // forced linebreaks ([[BR]]) + this.text = Regex.Replace(this.text, @"\[\[BR\]\]", Environment.NewLine); - try - { - builder.Remove(iStart, iEnd - iStart); - } - catch (Exception e) - { - Log.Error(e.ToString()); - Log.Error(builder.ToString()); - } + // quote escaping (``) + this.text = Regex.Replace(this.text, @"``", String.Empty); - tempParsedArticle = builder.ToString(); - } - - // Remove <br /> - Log.Debug("Wikipedia: Remove <br />."); - builder.Replace("<br />", "\n"); - builder.Replace("<br style="clear:both"/>", "\n"); - builder.Replace("<br style="clear:left"/>", "\n"); - builder.Replace("<br style="clear:right"/>", "\n"); - - // Remove <sup> - Log.Debug("Wikipedia: Remove <sup>."); - builder.Replace("<sup>", "^"); - builder.Replace("</sup>", ""); - - // surrounded by ''' and ''' is bold text, atm also unusable. - Log.Debug("Wikipedia: Remove \'\'\'."); - builder.Replace("'''", ""); - - // surrounded by '' and '' is italic text, atm also unusable. - Log.Debug("Wikipedia: Remove \'\'."); - builder.Replace("''", ""); - - // Display === as newlines (meaning new line for every ===). - Log.Debug("Wikipedia: Display === as 1 newlines."); - builder.Replace("===", "\n"); - - // Display == as newlines (meaning new line for every ==). - Log.Debug("Wikipedia: Display == as 1 newline."); - builder.Replace("==", "\n"); - - // Display * as list (meaning new line for every *). - Log.Debug("Wikipedia: Display * as list."); - builder.Replace("*", "\n +"); - - // Remove HTML whitespace. - Log.Debug("Wikipedia: Remove HTML whitespace."); - builder.Replace(" ", " "); - - // Display " as ". - Log.Debug("Wikipedia: Remove Quotations."); - builder.Replace(""", "\""); - - // Display — as -. - Log.Debug("Wikipedia: Remove —."); - builder.Replace("—", "-"); - - // Remove gallery tags. - Log.Debug("Wikipedia: Remove gallery tags."); - builder.Replace("<gallery>", ""); - builder.Replace("</gallery>", ""); - - // Remove gallery tags. - Log.Debug("Wikipedia: Remove &."); - builder.Replace("&", "&"); - - // Remove (too many) newlines - Log.Debug("Wikipedia: Remove (too many) newlines."); - builder.Replace("\n\n\n\n", "\n"); - builder.Replace("\n\n\n", "\n"); - builder.Replace("\n\n", "\n"); - - // Remove (too many) newlines - Log.Debug("Wikipedia: Remove (too many) whitespaces."); - builder.Replace(" ", " "); - builder.Replace(" ", " "); - builder.Replace(" ", " "); - - tempParsedArticle = builder.ToString(); - - // The text shouldn't start with a newline. - if (tempParsedArticle.IndexOf("\n") == 0) - tempParsedArticle.Remove(0, 2); - - // For Debug purposes it is nice to see how the whole article text is parsed until - // here - //Log.Debug(tempParsedArticle); - Log.Info("Wikipedia: Finished parsing."); - this._unparsedPage = tempParsedArticle; } } @@ -363,8 +179,9 @@ private void ParseLinksAndImages() { Log.Info("Wikipedia: Starting parsing of links and images."); - string tempParsedArticle = this._unparsedPage; - int iStart = 0, iEnd = 0, iPipe = 0; + //string tempParsedArticle = this._unparsedPage; + string tempParsedArticle = this.text; + int iStart = 0, iEnd = 0, iPipe = 0; // Surrounded by [[IMAGEPATTERN: and ]] are the links to IMAGES. // We need to check for the localized image keyword but also for the English as this @@ -421,8 +238,8 @@ int iEnddesc = keyword.LastIndexOf("]]"); string imagedesc = keyword.Substring(iStartdesc, iEnddesc - iStartdesc); ; - this._imageArray.Add(imagename); - this._imageDescArray.Add(imagedesc); + this.imageArray.Add(imagename); + this.imageDescArray.Add(imagedesc); Log.Debug("Wikipedia: Image added: {0}, {1}", imagedesc, imagename); tempParsedArticle = tempParsedArticle.Substring(0, iStart) + @@ -447,9 +264,9 @@ { parsedKeyword = keyword.Substring(iPipe + 1, keyword.Length - iPipe - 3); parsedLink = keyword.Substring(2, iPipe - 2); - if (!this._linkArray.Contains(parsedLink)) + if (!this.linkArray.Contains(parsedLink)) { - this._linkArray.Add(parsedLink); + this.linkArray.Add(parsedLink); //Log.Debug("Wikipedia: Link added: {0}, {1}", parsedLink, parsedKeyword); } } @@ -463,9 +280,9 @@ { // for the 3rd the article and displayed text are equal [[article]]. parsedKeyword = keyword.Substring(2, keyword.Length - 4); - if (!this._linkArray.Contains(parsedKeyword)) + if (!this.linkArray.Contains(parsedKeyword)) { - this._linkArray.Add(parsedKeyword); + this.linkArray.Add(parsedKeyword); //Log.Debug("Wikipedia: Link added: {0}", parsedKeyword); } } @@ -481,7 +298,7 @@ Log.Error("Wikipedia: {0}", e.ToString()); Log.Error("Wikipedia: tempArticle: {0}", tempParsedArticle); } - Log.Debug("Wikipedia: Finished Link parsing: {0} Links added.", _linkArray.Count); + Log.Debug("Wikipedia: Finished Link parsing: {0} Links added.", linkArray.Count); // surrounded by [ and ] are external Links. Need to be removed. Log.Debug("Wikipedia: Removing external links"); @@ -505,7 +322,7 @@ } Log.Info("Wikipedia: Finished parsing of links and images."); - this._parsedPage = tempParsedArticle; + //this._parsedPage = tempParsedArticle; } } } \ No newline at end of file Added: trunk/plugins/Documentation plugin/skin/Documentation.xml =================================================================== --- trunk/plugins/Documentation plugin/skin/Documentation.xml (rev 0) +++ trunk/plugins/Documentation plugin/skin/Documentation.xml 2007-03-27 10:49:51 UTC (rev 235) @@ -0,0 +1,106 @@ +<?xml version="1.0" encoding="utf-8" standalone="yes"?> +<window> + <id>9999</id> + <defaultcontrol>10</defaultcontrol> + <allowoverlay>yes</allowoverlay> + <define>#header.label:Documentation</define> + <define>#header.image:wiki.team-mediaportal.com.png</define> + <define>#header.hover:hover_wikipedia.png</define> + <controls> + <import>common.window.xml</import> + <control> + <type>group</type> + <description>group element</description> + <animation effect="fade" time="250">WindowOpen</animation> + <animation effect="fade" time="500">WindowClose</animation> + <animation effect="slide" time="250" start="-300,0">WindowOpen</animation> + <animation effect="slide" time="500" end="0,-300">WindowClose</animation> + <layout>StackLayout</layout> + <posX>60</posX> + <posY>97</posY> + <control> + <description>Page name entry</description> + <type>button</type> + <id>10</id> + <onup>17</onup> + <label>Go to...</label> + </control> + <control> + <description>Back to the text (when viewing an image)</description> + <type>button</type> + <id>14</id> + <label>2514</label> + <visible>no</visible> + </control> + <control> + <description>Links Button</description> + <type>button</type> + <id>12</id> + <label>2511</label> + </control> + <control> + <description>Images Button</description> + <type>button</type> + <id>13</id> + <ondown>99</ondown> + <label>2515</label> + </control> + </control> + <control> + <animation effect="fade" time="250">WindowOpen</animation> + <animation effect="fade" time="500">WindowClose</animation> + <description>Article title/searchterm</description> + <type>label</type> + <id>4</id> + <posX>280</posX> + <posY>70</posY> + <visible>no</visible> + <label>Article title/searchterm</label> + <font>font16</font> + <align>left</align> + </control> + <control> + <animation effect="fade" time="250">WindowOpen</animation> + <animation effect="fade" time="500">WindowClose</animation> + <description>Image description</description> + <type>label</type> + <id>5</id> + <posX>264</posX> + <posY>70</posY> + <visible>no</visible> + <label>Image description</label> + <font>font16</font> + <align>left</align> + </control> + <control> + <animation effect="fade" time="250">WindowOpen</animation> + <animation effect="fade" time="500">WindowClose</animation> + <description>Here is the article text displayed</description> + <type>textbox</type> + <id>20</id> + <posX>280</posX> + <posY>92</posY> + <visible>no</visible> + <width>380</width> + <height>444</height> + <text /> + </control> + <control> + <animation effect="fade" time="250">WindowOpen</animation> + <animation effect="fade" time="500">WindowClose</animation> + <description>Article Image</description> + <type>image</type> + <id>25</id> + <posX>264</posX> + <posY>72</posY> + <visible>no</visible> + <width>440</width> + <height>466</height> + <texture /> + <colorkey>00000000</colorkey> + <colordiffuse>White</colordiffuse> + <centered>yes</centered> + <keepaspectratio>yes</keepaspectratio> + </control> + </controls> +</window> \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |