From: <jbo...@li...> - 2005-08-15 22:53:56
|
Author: ral...@jb... Date: 2005-08-15 18:52:40 -0400 (Mon, 15 Aug 2005) New Revision: 863 Modified: trunk/forge/portal-extensions/forge-wiki/src/java/org/jboss/wiki/plugins/HTMLTranslator.java Log: HTML translator with most of the formatting done Modified: trunk/forge/portal-extensions/forge-wiki/src/java/org/jboss/wiki/plugins/HTMLTranslator.java =================================================================== --- trunk/forge/portal-extensions/forge-wiki/src/java/org/jboss/wiki/plugins/HTMLTranslator.java 2005-08-15 20:34:06 UTC (rev 862) +++ trunk/forge/portal-extensions/forge-wiki/src/java/org/jboss/wiki/plugins/HTMLTranslator.java 2005-08-15 22:52:40 UTC (rev 863) @@ -5,221 +5,836 @@ * * */ +import java.io.*; import java.util.regex.*; import java.util.ArrayList; import java.util.Collection; -import java.util.Date; +//import java.util.Date; import java.util.StringTokenizer; -import org.jboss.wiki.Credentials; +//import org.jboss.wiki.Credentials; import org.jboss.wiki.WikiPage; import org.jboss.wiki.WikiPlugin; public class HTMLTranslator implements WikiPlugin { - private Pattern myPattern; + private Pattern myPattern; - private Matcher myMatcher; + private Matcher myMatcher; - /* URL components, later to be configurable */ - private String portalHome = "http://forge.sicore.org:8080/portal/"; + /* URL components, later to be configurable */ + private String portalHome = "http://forge.sicore.org:8080/portal/"; - private String wikiHome = "index.html?ctrl:id=window.default.WikiPortletWindow"; + private String wikiHome = "index.html?ctrl:id=window.default.WikiPortletWindow"; - private String actionType = "&ctrl:type="; + private String actionType = "&ctrl:type="; - private String page = "&page="; + private String page = "&page="; - public ArrayList mediaPatterns; + public ArrayList mediaPatterns; - public ArrayList linkPatterns; + public ArrayList linkPatterns; - public String[] textlinks; + public String[] textlinks; - public String[] notextlinks; + public String[] notextlinks; - private String[] mediaFormats = { "*.png", "*.jpeg", "*.gif" }; + private String[] mediaFormats = { "*.png", "*.jpeg", "*.gif" }; + private boolean m_allowHTML = false; - public HTMLTranslator() { - } + private boolean m_isbold = false; - /* - * compile all links to wiki pages or external pages of the form [...] or - * [...|...] - */ - public String parseLinks(String text, String actionURL) { - String translatedContent = text; - String link;// = ""; - String href1regex = "\\[.+\\|.+\\]"; - String href2regex = "\\[.+\\]"; - Pattern tLinks = Pattern.compile(href1regex); - Pattern links = Pattern.compile(href2regex); - Matcher textlinks = tLinks.matcher(text); - Matcher nontextlinks = links.matcher(text); + private boolean m_isitalic = false; - Matcher match; + private boolean m_isTypedText = false; - while ((match = tLinks.matcher(translatedContent)).find()) { - link = (translatedContent.substring(translatedContent.indexOf("|", - match.start() + 1) + 1, match.end() - 1)).trim(); - if (isExternalLink(link)) // outside link - { - translatedContent = match.replaceFirst("<a href=\"" - + link - + "\">" - + translatedContent.substring(match.start() + 1, - translatedContent.indexOf("|", - match.start() + 1)) + "</a>"); - } else - // need to implement pageExists - translatedContent = match.replaceFirst("<a href=\"" - + portalHome - + wikiHome - + actionType - + "action" - + page - + cleanLink(link) - + "\">" - + translatedContent.substring(match.start() + 1, - translatedContent.indexOf("|", - match.start() + 1)) + "</a>"); - } + private boolean m_istable = false; - while ((match = links.matcher(translatedContent)).find()) { - link = (translatedContent.substring(match.start() + 1, - match.end() - 1)); - if (isExternalLink(link)) // outside link - { - translatedContent = match.replaceFirst("<a href=\"" + link - + "\">" + link + "</a>"); - } else - translatedContent = match.replaceFirst("<a href=\"" - + portalHome + wikiHome + actionType + "action" + page - + cleanLink(link) + "\">" + link + "</a>"); - } + private boolean m_isPre = false; - translatedContent = translatedContent.replace("\n", "<BR>\n"); + private boolean m_isdefinition = false; - return translatedContent; - } + private int m_listlevel = 0; - /* a collection of all media formats allowed on wiki */ - private Collection getImagePatterns() { - ArrayList patterns = new ArrayList(); - for (int i = 0; i < mediaFormats.length; i++) { - patterns.add(mediaFormats[i]); - } + private int m_numlistlevel = 0; - return patterns; - } + /** Tag that gets closed at EOL. */ + private String m_closeTag = null; - /** - * Figures out if a link is an off-site link. This recognizes the most - * common protocols by checking how it starts. - */ - private boolean isExternalLink(String link) { - return link.startsWith("http:") || link.startsWith("ftp:") - || link.startsWith("https:") || link.startsWith("mailto:") - || link.startsWith("news:") || link.startsWith("file:"); - } + /** Allow this many characters to be pushed back in the stream. */ + private static final int PUSHBACK_BUFFER_SIZE = 8; - /** - * Cleans a Wiki name. - * <P>[ This is a link ] -> ThisIsALink - * - * @param link - * Link to be cleared. Null is safe, and causes this to return - * null. - * @return A cleaned link. - * - * @since 2.0 - */ - public String cleanLink(String link) { - StringBuffer clean = new StringBuffer(); + private PushbackReader m_in; - if (link == null) - return null; + public HTMLTranslator() { + + } - // - // Compress away all whitespace and capitalize - // all words in between. - // + /** + * Push back any character to the current input. Does not push back a read + * EOF, though. + */ + private void pushBack(int c) throws IOException { + if (c != -1) { + m_in.unread(c); + } + } - StringTokenizer st = new StringTokenizer(link, " -"); + /* + * compile all links to wiki pages or external pages of the form [...] or + * [...|...] + */ + public String parseLinks(String page) throws IOException { + m_in = new PushbackReader(new StringReader(page), PUSHBACK_BUFFER_SIZE); + String translatedContent = "";// = prepareText(text); + StringBuffer buf = new StringBuffer(); + StringBuffer word = null; + int previousCh = -2; + int start = 0; - while (st.hasMoreTokens()) { - StringBuffer component = new StringBuffer(st.nextToken()); + boolean quitReading = false; + boolean newLine = true; // FIXME: not true if reading starts in middle + // of buffer - component.setCharAt(0, Character.toUpperCase(component.charAt(0))); + while (!quitReading) { + int ch = nextToken(); + String s = null; + if (m_isPre) { + if (ch == '}') { + buf.append(handleClosebrace()); + } else if (ch == '<') { + buf.append("<"); + } else if (ch == '>') { + buf.append(">"); + } else if (ch == -1) { + quitReading = true; + } else { + buf.append((char) ch); + } - // - // We must do this, because otherwise compiling on JDK 1.4 causes - // a downwards incompatibility to JDK 1.3. - // - clean.append(component.toString()); - } + continue; + } + // + // Check if any lists need closing down. + // - // - // Remove non-alphanumeric characters that should not - // be put inside WikiNames. Note that all valid - // Unicode letters are considered okay for WikiNames. - // It is the problem of the WikiPageProvider to take - // care of actually storing that information. - // + if( newLine && ch != '*' && ch != ' ' && m_listlevel > 0 ) + { + buf.append("</LI>\n"); + for( ; m_listlevel > 0; m_listlevel-- ) + { + buf.append("</UL>\n"); + } + } - for (int i = 0; i < clean.length(); i++) { - if (!(Character.isLetterOrDigit(clean.charAt(i)) - || clean.charAt(i) == '_' || clean.charAt(i) == '.')) { - clean.deleteCharAt(i); - --i; // We just shortened this buffer. - } - } + if( newLine && ch != '#' && ch != ' ' && m_numlistlevel > 0 ) + { + buf.append("</LI>\n"); + for( ; m_numlistlevel > 0; m_numlistlevel-- ) + { + buf.append("</OL>\n"); + } + } - return clean.toString(); - } + if( newLine && ch != '|' && m_istable ) + { + buf.append("</TABLE>\n"); + m_istable = false; + m_closeTag = null; + } - public WikiPage process(WikiPage wikiPage) { - WikiPage newPage = null; - try { - newPage = (WikiPage) wikiPage.clone(); - } catch (CloneNotSupportedException cnse) { - System.out.println("cannot clone the page: " + cnse); - } + switch (ch) { + case '\r': + // DOS linefeeds we forget + s = null; + break; - String newContent = parseLinks(newPage.getContent(), "dupa"); + case '\n': + // + // Close things like headings, etc. + // + if( m_closeTag != null ) + { + buf.append( m_closeTag ); + m_closeTag = null; + } - newPage.setPageContent(newContent); + m_isdefinition = false; - return newPage; - } + if( newLine ) + { + // Paragraph change. - public void next() { - // TODO Auto-generated method stub + buf.append("<P>\n"); + } + else + { + buf.append("\n"); + newLine = true; + } - } + break; + case '\\': + s = handleBackslash(); + break; - // public static void main(String[] args) { - // String wikitext = "__Forge Portal__FORGE USER HOW TO:\\\\\n" - // + "[Add a new project and modify its - // data|http://www.NewProject.com]\\\\\n" - // + "[Add and manage project downloads|Project Downloads]\\\\\n" - // + "[Add and manage project freezone|Project *Freezone]\\\\\n" - // + "[Project Freezone]\\\\\n" - // + "[http://www.google.com]\\\\\n"; - // WikiContext wc = new WikiContext(); - // - // String elements = parseLinks(wikitext, wc); - // - // System.out.println(elements); - // } - - public static void main (String[] args) { - HTMLTranslator ht = new HTMLTranslator(); - - WikiPage wp = new WikiPage("name", new Credentials("tomek"), "content", 1, new Date()); - - wp = ht.process(wp); - } + case '_': + s = handleUnderscore(); + break; + + case '\'': + s = handleApostrophe(); + break; + case '{': + s = handleOpenbrace(); + break; + + case '}': + s = handleClosebrace(); + break; + + case '-': + s = handleDash(); + break; + + case '!': + if( newLine ) + { + s = handleHeading(); + } + else + { + s = "!"; + } + break; + + case ';': + if( newLine ) + { + s = handleDefinitionList(); + } + else + { + s = ";"; + } + break; + + case ':': + if( m_isdefinition ) + { + s = "</DT><DD>"; + m_isdefinition = false; + } + else + { + s = ":"; + } + break; + + case '[': + s = handleOpenbracket(); + break; + + case '*': + if( newLine ) + { + s = handleUnorderedList(); + } + else + { + s = "*"; + } + break; + + case '#': + if( newLine ) + { + s = handleOrderedList(); + } + else + { + s = "#"; + } + break; + + case '|': + s = handleBar( newLine ); + break; + + case '<': + s = m_allowHTML ? "<" : "<"; + break; + + case '>': + s = m_allowHTML ? ">" : ">"; + break; + + case '\"': + s = m_allowHTML ? "\"" : """; + break; + + /* + case '&': + s = "&"; + break; + */ + case '~': + s = handleTilde(); + break; + case -1: + quitReading = true; + break; + + default: + buf.append((char) ch); + newLine = false; + break; + } + if (s != null) { + buf.append(s); + + newLine = false; + } + + } + translatedContent = buf.toString(); + // translatedContent = translatedContent.replace("\n", "<BR>\n"); + // System.out.println("final string : "+ translatedContent); + + return translatedContent; + } + + /* a collection of all media formats allowed on wiki */ + private Collection getImagePatterns() { + ArrayList patterns = new ArrayList(); + for (int i = 0; i < mediaFormats.length; i++) { + patterns.add(mediaFormats[i]); + } + + return patterns; + } + + /** + * Generic escape of next character or entity. + */ + private String handleTilde() + throws IOException + { + int ch = nextToken(); + + if( ch == '|' ) + return "|"; + + if( Character.isUpperCase( (char) ch ) ) + { + return String.valueOf( (char)ch ); + } + + // No escape. + pushBack( ch ); + + return "~"; + } + + private String handleUnderscore() throws IOException { + int ch = nextToken(); + String res = "_"; + + if (ch == '_') { + res = m_isbold ? "</B>" : "<B>"; + m_isbold = !m_isbold; + } else { + pushBack(ch); + } + + return res; + } + + /** + * For example: italics. + */ + private String handleApostrophe() throws IOException { + int ch = nextToken(); + String res = "'"; + + if (ch == '\'') { + res = m_isitalic ? "</I>" : "<I>"; + m_isitalic = !m_isitalic; + } else { + m_in.unread(ch); + } + + return res; + } + + private String handleDash() throws IOException { + int ch = nextToken(); + + if (ch == '-') { + int ch2 = nextToken(); + + if (ch2 == '-') { + int ch3 = nextToken(); + + if (ch3 == '-') { + // Empty away all the rest of the dashes. + // Do not forget to return the first non-match back. + while ((ch = nextToken()) == '-') + ; + + pushBack(ch); + return "<HR />"; + } + + pushBack(ch3); + } + pushBack(ch2); + } + + pushBack(ch); + + return "-"; + } + + private String handleHeading() throws IOException { + StringBuffer buf = new StringBuffer(); + + int ch = nextToken(); + + if (ch == '!') { + int ch2 = nextToken(); + + if (ch2 == '!') { + buf.append("<H2>"); + m_closeTag = "</H2>"; + } else { + buf.append("<H3>"); + m_closeTag = "</H3>"; + pushBack(ch2); + } + } else { + buf.append("<H4>"); + m_closeTag = "</H4>"; + pushBack(ch); + } + + return buf.toString(); + } + + private String handleUnorderedList() throws IOException { + StringBuffer buf = new StringBuffer(); + + if (m_listlevel > 0) { + buf.append("</LI>\n"); + } + + int numBullets = countChars(m_in, '*') + 1; + + if (numBullets > m_listlevel) { + for (; m_listlevel < numBullets; m_listlevel++) + buf.append("<UL>\n"); + } else if (numBullets < m_listlevel) { + for (; m_listlevel > numBullets; m_listlevel--) + buf.append("</UL>\n"); + } + + buf.append("<LI>"); + + return buf.toString(); + } + + private String handleOrderedList() throws IOException { + StringBuffer buf = new StringBuffer(); + + if (m_numlistlevel > 0) { + buf.append("</LI>\n"); + } + + int numBullets = countChars(m_in, '#') + 1; + + if (numBullets > m_numlistlevel) { + for (; m_numlistlevel < numBullets; m_numlistlevel++) + buf.append("<OL>\n"); + } else if (numBullets < m_numlistlevel) { + for (; m_numlistlevel > numBullets; m_numlistlevel--) + buf.append("</OL>\n"); + } + + buf.append("<LI>"); + + return buf.toString(); + + } + + private int countChars(PushbackReader in, char c) throws IOException { + int count = 0; + int ch; + + while ((ch = in.read()) != -1) { + if ((char) ch == c) { + count++; + } else { + in.unread(ch); + break; + } + } + + return count; + } + + private String handleBar( boolean newLine ) + throws IOException +{ + StringBuffer sb = new StringBuffer(); + + if( !m_istable && !newLine ) + { + return "|"; + } + + if( newLine ) + { + if( !m_istable ) + { + sb.append("<TABLE CLASS=\"wikitable\" BORDER=\"1\">\n"); + m_istable = true; + } + + sb.append("<TR>"); + m_closeTag = "</TD></TR>"; + } + + int ch = nextToken(); + + if( ch == '|' ) + { + if( !newLine ) + { + sb.append("</TH>"); + } + sb.append("<TH>"); + m_closeTag = "</TH></TR>"; + } + else + { + if( !newLine ) + { + sb.append("</TD>"); + } + sb.append("<TD>"); + pushBack( ch ); + } + + return sb.toString(); +} + + + private String handleDefinitionList() throws IOException { + if (!m_isdefinition) { + m_isdefinition = true; + + m_closeTag = "</DD>\n</DL>"; + + return "<DL>\n<DT>"; + } + + return ";"; + } + + private String handleBackslash() throws IOException { + int ch = nextToken(); + + if (ch == '\\') { + int ch2 = nextToken(); + + if (ch2 == '\\') { + return "<BR clear=\"all\" />"; + } + + pushBack(ch2); + + return "<BR />"; + } + + pushBack(ch); + + return "\\"; + } + + private String handleOpenbracket() throws IOException { + StringBuffer sb = new StringBuffer(); + int ch; + boolean isPlugin = false; + + while ((ch = nextToken()) == '[') { + sb.append((char) ch); + } + + if (sb.length() > 0) { + return sb.toString();//not a link + } + + // + // Find end of hyperlink + // + pushBack(ch); + ch = nextToken(); + + while (ch != -1) { + if (ch == ']')//&& (!isPlugin || sb.charAt( sb.length()-1 ) == '}' + // ) ) + { + break; + } + + sb.append((char) ch); + + ch = nextToken(); + } + + if (ch == -1) { + + return sb.toString(); + } + + return handleHyperlinks(sb.toString()); + } + + private String handleOpenbrace() throws IOException { + int ch = nextToken(); + String res = "{"; + + if (ch == '{') { + int ch2 = nextToken(); + + if (ch2 == '{') { + res = "<PRE>"; + m_isPre = true; + } else { + pushBack(ch2); + + res = "<TT>"; + m_isTypedText = true; + } + } else { + pushBack(ch); + } + + return res; + } + + /** + * Handles both }} and }}} + */ + private String handleClosebrace() throws IOException { + String res = "}"; + + int ch2 = nextToken(); + + if (ch2 == '}') { + int ch3 = nextToken(); + + if (ch3 == '}') { + if (m_isPre) { + m_isPre = false; + res = "</PRE>"; + } else { + res = "}}}"; + } + } else { + pushBack(ch3); + + if (!m_isPre) { + res = "</TT>"; + m_isTypedText = false; + } else { + pushBack(ch2); + } + } + } else { + pushBack(ch2); + } + + return res; + } + + /** + * @param string + * @return + */ + private String handleHyperlinks(String link) { + String result = ""; + int border = link.indexOf('|'); + if (border != -1) { + if (isExternalLink(link.substring(border + 1)))//case + // [...|externallink] + { + result = "<a href=\"" + link.substring(border + 1) + "\">" + + link.substring(0, border - 1) + "</a>"; + } else //case [...|wikipagelink] + { + result = "<a href=\"" + portalHome + wikiHome + actionType + + "action" + page + + cleanLink(link.substring(border + 1)) + "\">" + + link.substring(0, border - 1) + "</a>"; + } + + } else //[...] case + { + if (isExternalLink(link)) { + result = "<a href=\"" + link + "\">" + link + "</a>"; + } else { + result = "<a href=\"" + portalHome + wikiHome + actionType + + "action" + page + cleanLink(link) + "\">" + link + + "</a>"; + } + } + + return result; + } + + /** + * Figures out if a link is an off-site link. This recognizes the most + * common protocols by checking how it starts. + */ + private boolean isExternalLink(String link) { + return link.startsWith("http:") || link.startsWith("ftp:") + || link.startsWith("https:") || link.startsWith("mailto:") + || link.startsWith("news:") || link.startsWith("file:"); + } + + public String prepareText(String text) { + int index; + + { + index = text.indexOf('$', 0); + } + System.out.println("index of $ " + index); + + if (index != -1) + return text.substring(0, index - 1) + '\\' + "$" + + prepareText(text.substring(index + 1)); + else { + + return text; + } + } + + private int nextToken() throws IOException { + return m_in.read(); + } + + /** + * Push back any character to the current input. Does not push back a read + * EOF, though. + */ + + /** + * Cleans a Wiki name. + * <P>[ This is a link ] -> ThisIsALink + * + * @param link + * Link to be cleared. Null is safe, and causes this to return + * null. + * @return A cleaned link. + * + * @since 2.0 + */ + public String cleanLink(String link) { + StringBuffer clean = new StringBuffer(); + + if (link == null) + return null; + + // + // Compress away all whitespace and capitalize + // all words in between. + // + + StringTokenizer st = new StringTokenizer(link, " -"); + + while (st.hasMoreTokens()) { + StringBuffer component = new StringBuffer(st.nextToken()); + + component.setCharAt(0, Character.toUpperCase(component.charAt(0))); + + // + // We must do this, because otherwise compiling on JDK 1.4 causes + // a downwards incompatibility to JDK 1.3. + // + clean.append(component.toString()); + } + + // + // Remove non-alphanumeric characters that should not + // be put inside WikiNames. Note that all valid + // Unicode letters are considered okay for WikiNames. + // It is the problem of the WikiPageProvider to take + // care of actually storing that information. + // + + for (int i = 0; i < clean.length(); i++) { + if (!(Character.isLetterOrDigit(clean.charAt(i)) + || clean.charAt(i) == '_' || clean.charAt(i) == '.')) { + clean.deleteCharAt(i); + --i; // We just shortened this buffer. + } + } + + return clean.toString(); + } + + public WikiPage process(WikiPage wikiPage) { + WikiPage newPage = null; + + try { + newPage = (WikiPage) wikiPage.clone(); + } catch (CloneNotSupportedException cnse) { + System.out.println("cannot clone the page: " + cnse); + } + + try { + String newContent = parseLinks(newPage.getContent()); + + newPage.setPageContent(newContent); + + return newPage; + } catch (IOException e) { + System.out.println(e); + newPage.setPageContent("error on page"); + } + return newPage; + } + + public void next() { + // TODO Auto-generated method stub + + } + + // public static void main(String[] args) { + // String wikitext = "__Forge Portal__FORGE USER HOW TO:\\\\\n" + // + "[Add a new project and modify its + // data|http://www.NewProject.com]\\\\\n" + // + "[Add and manage project downloads|Project Downloads]\\\\\n" + // + "[Add and manage project freezone|Project *Freezone]\\\\\n" + // + "[Project Freezone]\\\\\n" + // + "[http://www.google.com]\\\\\n"; + // WikiContext wc = new WikiContext(); + // + // String elements = parseLinks(wikitext, wc); + // + // System.out.println(elements); + // } + + // public static void main(String[] args) { + // HTMLTranslator ht = new HTMLTranslator(); + // + // WikiPage wp = new WikiPage("name", new Credentials("tomek"), "content", + // 1, new Date()); + // + // wp = ht.process(wp); + // } + } \ No newline at end of file |