|
From: <jbo...@li...> - 2005-09-20 17:32:41
|
Author: adamw
Date: 2005-09-20 13:32:30 -0400 (Tue, 20 Sep 2005)
New Revision: 1157
Modified:
trunk/forge/portal-extensions/forge-freezone/project.xml
trunk/forge/portal-extensions/forge-freezone/src/java/org/jboss/forge/projects/freezone/Freezone.java
Log:
Idiot-proof freezone parser ver. alpha
Modified: trunk/forge/portal-extensions/forge-freezone/project.xml
===================================================================
--- trunk/forge/portal-extensions/forge-freezone/project.xml 2005-09-20 17:06:52 UTC (rev 1156)
+++ trunk/forge/portal-extensions/forge-freezone/project.xml 2005-09-20 17:32:30 UTC (rev 1157)
@@ -42,6 +42,6 @@
<artifactId>javax.servlet</artifactId>
<version>1.0</version>
<jar>javax.servlet.jar</jar>
- </dependency>
+ </dependency>
</dependencies>
</project>
Modified: trunk/forge/portal-extensions/forge-freezone/src/java/org/jboss/forge/projects/freezone/Freezone.java
===================================================================
--- trunk/forge/portal-extensions/forge-freezone/src/java/org/jboss/forge/projects/freezone/Freezone.java 2005-09-20 17:06:52 UTC (rev 1156)
+++ trunk/forge/portal-extensions/forge-freezone/src/java/org/jboss/forge/projects/freezone/Freezone.java 2005-09-20 17:32:30 UTC (rev 1157)
@@ -8,9 +8,9 @@
*****************************************/
package org.jboss.forge.projects.freezone;
-import java.io.BufferedReader;
-import java.io.FileReader;
import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.jboss.forge.common.content.ContentManager;
import org.jboss.forge.common.ForgeHelper;
@@ -31,6 +31,13 @@
*/
private final static String DEFAULT_PAGE = "index.html";
+ private final static String ANCHOR_PATTERN = "<a[^<]*href[^>]*>";
+ private final static String IMG_PATTERN = "<img[^<]*src[^>]*>";
+
+ private final static String ATTRIBUTE_PATTERN = " *= *[\"']? ?[^ \"'>]* ?[\"']?";
+ private final static String HREF_PATTERN = "href" + ATTRIBUTE_PATTERN;
+ private final static String SRC_PATTERN = "src" + ATTRIBUTE_PATTERN;
+
private ContentManager cm;
public void init() {
@@ -38,44 +45,7 @@
}
/**
- * In the given content, finds a first string of the form: <link ... href=,
- * and returns the text inside the reference.
- *
- * @param content
- * String in which to look for the reference.
- * @param refName
- * Name of the reference.
- * @return Text contained in the reference or null if no reference is found.
- */
- private String findNextReference(String content, String refName) {
- int h;
- int start;
- if (refName.equals("link")) {
- h = content.indexOf(" href="); // search for one of the link types
- if (h == -1) {
- return null;
- } else
- start = h;
- int begin = content.substring(start).indexOf('"');
- int end = content.substring(start + begin + 1).indexOf('"');
- begin += start;
- end += begin;
- return content.substring(begin + 1, end + 1);
- } else if (refName.equals("image")) {
- start = content.indexOf("<img src=");
- if (start == -1)
- return null;
- int begin = content.substring(start).indexOf('"');
- int end = content.substring(start + begin + 1).indexOf('"');
- begin += start;
- end += begin;
- return content.substring(begin + 1, end + 1);
- } else
- return null;
- }
-
- /**
- * Figures out if a link is qualified for changing: this is the case when
+ * Figures out if a link shouldn't be changed: this is the case when
* either it is an off-site link, or it starts with '/'. This recognizes the most
* common protocols by checking how it starts (clears all white space in
* case the link starts with '\n' or [space]).
@@ -83,12 +53,12 @@
* @author Janne Jalkanen
* @author adamw
*/
- private boolean qualifyLink(String link) {
+ private boolean isOutsideLink(String link) {
link = link.trim();
return link.startsWith("http:") || link.startsWith("ftp:")
- || link.startsWith("https:") || link.startsWith("mailto:")
- || link.startsWith("news:") || link.startsWith("file:")
- || link.startsWith("/");
+ || link.startsWith("https:") || link.startsWith("mailto:")
+ || link.startsWith("news:") || link.startsWith("file:")
+ || link.startsWith("/");
}
// modifies path to page/resource so folder structure can be supported
@@ -142,6 +112,31 @@
return mainPath + '/' + element;
}
+ /**
+ * Prepares a link for transforming - cuts off unnecessary spaces and
+ * removes any ' or " around it.
+ * @param link Link to prepare.
+ * @return A trimmed link with ' and " cut off from the beginning/end.
+ */
+ private String prepareLink(String link) {
+ link = link.trim();
+
+ // Cutting off the ' or " from the beginning and end of
+ // the link.
+ if (('\'' == link.charAt(0)) || ('\"' == link.charAt(0)))
+ link = link.substring(1);
+
+ int linkLength = link.length();
+ if ((linkLength >= 1) &&
+ (('\'' == link.charAt(linkLength-1)) ||
+ ('\"' == link.charAt(linkLength-1))))
+ link = link.substring(0, linkLength-1);
+
+ link = link.trim();
+
+ return link;
+ }
+
public void doView(JBossRenderRequest request, JBossRenderResponse response)
throws IOException {
response.setContentType("text/html");
@@ -172,205 +167,130 @@
if (pageContent == null)
throw new Exception();
- String nextRef;
- String nextRef2;
+ int patternFlags =
+ Pattern.CASE_INSENSITIVE |
+ Pattern.DOTALL |
+ Pattern.MULTILINE;
+
+ Pattern anchor = Pattern.compile(ANCHOR_PATTERN, patternFlags);
+ Pattern href = Pattern.compile(HREF_PATTERN, patternFlags);
+ Pattern img = Pattern.compile(IMG_PATTERN, patternFlags);
+ Pattern src = Pattern.compile(SRC_PATTERN, patternFlags);
+
+ // This should be prepended to all links that want to read
+ // from file-access.
String fileAccessPrepend = "/file-access/" + portalName + "/"
- + ProjectsHelper.MEMBERS_DIR + "/" + projectId + "/"
- + ProjectsHelper.FREEZONE_DIR + "/";
- while (pageContent.indexOf("HREF=") != -1) {
- pageContent = ForgeHelper
- .replace(pageContent, "HREF=", "href=");
- }
+ + ProjectsHelper.MEMBERS_DIR + "/" + projectId + "/"
+ + ProjectsHelper.FREEZONE_DIR + "/";
- while (pageContent.indexOf("<IMG SRC=") != -1) {
- pageContent = ForgeHelper.replace(pageContent, "<IMG SRC=",
- "<img src=");
- }
+ // REPLACING ANCHORS
+ Matcher anchorMatcher = anchor.matcher(pageContent);
+
+ // Here we will build the freezone-parsed page content.
+ StringBuffer replacedPageContent = new StringBuffer();
- // Replacing links
- while ((nextRef = findNextReference(pageContent, "link")) != null) {
- if (!qualifyLink(nextRef)) {
- // it would be relative not outside link
- nextRef2 = modifyPath(pagePath, nextRef);
- if (nextRef2.endsWith(".png")) {
- nextRef2 = fileAccessPrepend + nextRef2
- + "\" target=\"_blank\"";
- pageContent = ForgeHelper
- .replace(pageContent, "href=" + '"' + nextRef
- + '"', "*ref=" + '"' + nextRef2);
- } if (nextRef2.endsWith(".css")) {
- nextRef2 = fileAccessPrepend + nextRef2 + "\"";
- pageContent = ForgeHelper
- .replace(pageContent, "href=" + '"' + nextRef
- + '"', "*ref=" + '"' + nextRef2);
- } else
- pageContent = ForgeHelper.replace(pageContent, "href="
- + '"' + nextRef + '"', ProjectsHelper
- .createFreezonePageLink(portalName, projectId,
- nextRef2 + '"'));
+ // First we iterate over all anchors. (<a ...>)
+ while (anchorMatcher.find()) {
+ String nextAnchor = pageContent.substring(
+ anchorMatcher.start(), anchorMatcher.end());
+
+ StringBuffer nextAnchorReplacement;
+
+ // Parsing the anchor to get the href element.
+ Matcher hrefMatcher = href.matcher(nextAnchor);
+ if (hrefMatcher.find()) {
+ // Now we have the href attribute. We must get the
+ // link from it, and if it is not an outside or /
+ // link, then transform it to a freezone one.
+ String nextHref = nextAnchor.substring(
+ hrefMatcher.start(), hrefMatcher.end());
+
+ // Getting the proper link from the href.
+ String link = nextHref.substring(nextHref.indexOf('=')+1);
+
+ link = prepareLink(link);
+
+ // Prepending necessary things to the link.
+ if (!isOutsideLink(link)) {
+ link = modifyPath(pagePath, link);
+ if (link.endsWith(".css")) {
+ link = fileAccessPrepend + link;
+ } else if (!((link.contains(".html"))
+ || (link.contains(".htm")))) {
+ // Must be an image or other media.
+ link = fileAccessPrepend + link
+ + "\" target=\"_blank";
+ } else
+ // Must be a html page.
+ link = ProjectsHelper
+ .createFreezonePageLink(portalName, projectId,
+ link);
+ }
+
+ // Replacing the old link with a new one.
+ nextAnchorReplacement = new StringBuffer();
+ hrefMatcher.appendReplacement(nextAnchorReplacement,
+ "href=\"" + link + "\"");
+ hrefMatcher.appendTail(nextAnchorReplacement);
} else {
- pageContent = ForgeHelper.replace(pageContent, "href="
- + '"' + nextRef, "$" + nextRef + '"');
+ nextAnchorReplacement = new StringBuffer(nextAnchor);
}
+
+ anchorMatcher.appendReplacement(replacedPageContent,
+ nextAnchorReplacement.toString());
}
- // Replacing images
- while ((nextRef = findNextReference(pageContent, "image")) != null) {
- if (!qualifyLink(nextRef)) {
- nextRef2 = modifyPath(pagePath, nextRef);
-
- pageContent = ForgeHelper.replace(pageContent, "<img src="
- + '"' + nextRef + '"', "*img src=" + '"' + fileAccessPrepend
- + nextRef2 + '"');
- } else
- pageContent = ForgeHelper.replace(pageContent, "<img src="
- + '"' + nextRef + '"', "*img src=" + '"'
- + nextRef + '"');
- }
+ anchorMatcher.appendTail(replacedPageContent);
- // fix back internal links
- while (pageContent.indexOf("*ref=") != -1)
- pageContent = ForgeHelper.replace(pageContent, "*ref=",
- " href=");
+ // REPLACING IMAGES
+ pageContent = replacedPageContent.toString();
+ Matcher imgMatcher = img.matcher(pageContent);
- while (pageContent.indexOf(" /portal/index.html?ctrl:id") != -1)
- pageContent = ForgeHelper.replace(pageContent,
- " /portal/index.html?ctrl:id", " href=" + '"'
- + "/portal/index.html?ctrl:id");
+ replacedPageContent = new StringBuffer();
- // fix back external links
-
- while (pageContent.indexOf("$http:") != -1
- || pageContent.indexOf("$ftp:") != -1
- || pageContent.indexOf("$https:") != -1
- || pageContent.indexOf("$file:") != -1
- || pageContent.indexOf("$mailto:") != -1
- || pageContent.indexOf("$news:") != -1) {
- pageContent = ForgeHelper.replace(pageContent, "$",
- " href=" + '"');
- }
- // fix back image links
- while (pageContent.indexOf("*img src=") != -1) {
- pageContent = ForgeHelper.replace(pageContent, "*img src=",
- "<img src=");
- }
-
- response.getWriter().write(pageContent);
- } catch (Exception e) {
- e.printStackTrace();
- response.getWriter().write(
- "The page you requested cannot be accessed ");
- }
- }
-
- //
- //
- //
- //
- //
-
- public void test(String portalName, String projectId, String pagePath,
- String pageContent) {
- try {
- String nextRef;
- String nextRef2;
- String fileAccessPrepend = "/file-access/" + portalName + "/"
- + ProjectsHelper.MEMBERS_DIR + "/" + projectId + "/"
- + ProjectsHelper.FREEZONE_DIR + "/";
- while (pageContent.indexOf("HREF=") != -1) {
- pageContent = pageContent.replace("HREF=", "href=");
- }
-
- while (pageContent.indexOf("<IMG SRC=") != -1) {
- pageContent = pageContent.replace("<IMG SRC=",
- "<img src=");
- }
-
- // Replacing links
- while ((nextRef = findNextReference(pageContent, "link")) != null) {
- if (!qualifyLink(nextRef)) {
- // it would be relative not outside link
- nextRef2 = modifyPath(pagePath, nextRef);
- if (nextRef2.endsWith(".png")) {
- nextRef2 = fileAccessPrepend + nextRef2
- + "\" target=\"_blank\"";
- pageContent = ForgeHelper
- .replace(pageContent, "href=" + '"' + nextRef
- + '"', "*ref=" + '"' + nextRef2);
- } if (nextRef2.endsWith(".css")) {
- nextRef2 = fileAccessPrepend + nextRef2 + "\"";
- pageContent = pageContent.replace("href=" + '"' + nextRef
- + '"', "*ref=" + '"' + nextRef2);
- } else
- pageContent = pageContent.replace("href="
- + '"' + nextRef + '"', "");
+ // Iterating over all image references (<img ...>).
+ while (imgMatcher.find()) {
+ String nextImg = pageContent.substring(
+ imgMatcher.start(), imgMatcher.end());
+
+ StringBuffer nextImgReplacement;
+
+ // Parsing the image to get the src element.
+ Matcher srcMatcher = src.matcher(nextImg);
+ if (srcMatcher.find()) {
+ String nextSrc = nextImg.substring(
+ srcMatcher.start(), srcMatcher.end());
+
+ // Getting the proper link from the src.
+ String link = nextSrc.substring(nextSrc.indexOf('=')+1);
+
+ link = prepareLink(link);
+
+ // Modifying the link.
+ if (!isOutsideLink(link)) {
+ link = modifyPath(pagePath, link);
+ link = fileAccessPrepend + link;
+ }
+
+ // Replacing the old link with a new one.
+ nextImgReplacement = new StringBuffer();
+ srcMatcher.appendReplacement(nextImgReplacement,
+ "src=\"" + link + "\"");
+ srcMatcher.appendTail(nextImgReplacement);
} else {
- pageContent = pageContent.replace("href="
- + '"' + nextRef, "$" + nextRef + '"');
+ nextImgReplacement = new StringBuffer(nextImg);
}
+
+ imgMatcher.appendReplacement(replacedPageContent,
+ nextImgReplacement.toString());
}
- // Replacing images
- while ((nextRef = findNextReference(pageContent, "image")) != null) {
- if (!qualifyLink(nextRef)) {
- nextRef2 = modifyPath(pagePath, nextRef);
-
- pageContent = pageContent.replace("<img src="
- + '"' + nextRef + '"', "*img src=" + '"' + fileAccessPrepend
- + nextRef2 + '"');
- } else
- pageContent = pageContent.replace("<img src="
- + '"' + nextRef + '"', "*img src=" + '"'
- + nextRef + '"');
- }
+ imgMatcher.appendTail(replacedPageContent);
- // fix back internal links
- while (pageContent.indexOf("*ref=") != -1)
- pageContent = pageContent.replace("*ref=",
- " href=");
-
- while (pageContent.indexOf(" /portal/index.html?ctrl:id") != -1)
- pageContent = pageContent.replace(
- " /portal/index.html?ctrl:id", " href=" + '"'
- + "/portal/index.html?ctrl:id");
-
- // fix back external links
-
- while (pageContent.indexOf("$http:") != -1
- || pageContent.indexOf("$ftp:") != -1
- || pageContent.indexOf("$https:") != -1
- || pageContent.indexOf("$file:") != -1
- || pageContent.indexOf("$mailto:") != -1
- || pageContent.indexOf("$news:") != -1) {
- pageContent = pageContent.replace("$",
- " href=" + '"');
- }
- // fix back image links
- while (pageContent.indexOf("*img src=") != -1) {
- pageContent = pageContent.replace("*img src=",
- "<img src=");
- }
+ response.getWriter().write(replacedPageContent.toString());
} catch (Exception e) {
e.printStackTrace();
+ response.getWriter().write("The page you requested cannot be accessed");
}
}
-
- public static void main(String[] argv) throws IOException {
- BufferedReader bf = new BufferedReader(new FileReader(
- "/home/adamw/portal-content/default/members/jbossejb3/freezone/index.html"));
-
- StringBuffer sf = new StringBuffer();
- while (true) {
- String line = bf.readLine();
- if (line == null)
- break;
- sf.append(line);
- sf.append('\n');
- }
-
- String pageContent = sf.toString();
- bf.close();
-
- new Freezone().test("default", "jbossejb3", "index.html", pageContent);
- }
}
|