[Practicalxml-commits] SF.net SVN: practicalxml:[76] trunk
Brought to you by:
kdgregory
|
From: Auto-Generated S. C. M. <pra...@li...> - 2009-04-25 12:24:50
|
Revision: 76
http://practicalxml.svn.sourceforge.net/practicalxml/?rev=76&view=rev
Author: kdgregory
Date: 2009-04-25 12:24:37 +0000 (Sat, 25 Apr 2009)
Log Message:
-----------
Add XmlUtil.escape, XmlUtil.unescape
Modified Paths:
--------------
trunk/pom.xml
trunk/src/main/java/net/sf/practicalxml/XmlUtil.java
trunk/src/test/java/net/sf/practicalxml/TestXmlUtil.java
Modified: trunk/pom.xml
===================================================================
--- trunk/pom.xml 2009-01-14 14:14:44 UTC (rev 75)
+++ trunk/pom.xml 2009-04-25 12:24:37 UTC (rev 76)
@@ -5,7 +5,7 @@
<groupId>net.sf.practicalxml</groupId>
<artifactId>practicalxml</artifactId>
<packaging>jar</packaging>
- <version>1.0.0</version>
+ <version>1.0.1</version>
<name>practicalxml</name>
<url>http://sourceforge.net/projects/practicalxml/</url>
Modified: trunk/src/main/java/net/sf/practicalxml/XmlUtil.java
===================================================================
--- trunk/src/main/java/net/sf/practicalxml/XmlUtil.java 2009-01-14 14:14:44 UTC (rev 75)
+++ trunk/src/main/java/net/sf/practicalxml/XmlUtil.java 2009-04-25 12:24:37 UTC (rev 76)
@@ -122,6 +122,95 @@
}
+ /**
+ * Escapes the passed string, converting the five reserved XML characters
+ * into their entities: &amp;, &lt;, &gt;, &apos;, and
+ * &quot;. If the string does not contain any of these characters, it
+ * will be returned unchanged. If passed <code>null</code>, returns an
+ * empty string.
+ * <p>
+ * Yes, this method is available elsewhere, eg Jakarta Commons. I'm trying
+ * to minimize external dependencies from this library, so am reinventing
+ * a few small wheels (but they're round!).
+ */
+ public static String escape(String s)
+ {
+ if (s == null)
+ return "";
+
+ StringBuilder buf = new StringBuilder(s.length());
+ boolean wasEscaped = false;
+
+ for (int ii = 0 ; ii < s.length() ; ii++)
+ {
+ char c = s.charAt(ii);
+ switch (c)
+ {
+ case '&' :
+ buf.append("&");
+ wasEscaped = true;
+ break;
+ case '<' :
+ buf.append("<");
+ wasEscaped = true;
+ break;
+ case '>' :
+ buf.append(">");
+ wasEscaped = true;
+ break;
+ case '\'' :
+ buf.append("'");
+ wasEscaped = true;
+ break;
+ case '"' :
+ buf.append(""");
+ wasEscaped = true;
+ break;
+ default :
+ buf.append(c);
+ }
+ }
+
+ return wasEscaped ? buf.toString() : s;
+ }
+
+
+ /**
+ * Unescapes the passed string, converting the five XML entities
+ * (&amp;, &lt;, &gt;, &apos;, and &quot;) into
+ * their correspinding characters. Also converts any numeric entities
+ * into their characters. If the string does not contain any convertable
+ * entities, it will be returned unchanged. If passed <code>null</code>,
+ * returns an empty string.
+ * <p>
+ * Yes, this method is available elsewhere, eg Jakarta Commons.
+ */
+ public static String unescape(String s)
+ {
+ if (s == null)
+ return "";
+
+ StringBuilder buf = new StringBuilder(s.length() + 20);
+ boolean wasEscaped = false;
+
+ for (int ii = 0 ; ii < s.length() ; ii++)
+ {
+ char c = s.charAt(ii);
+ switch (c)
+ {
+ case '&' :
+ ii = unescapeHelper(s, ii, buf);
+ wasEscaped = true;
+ break;
+ default :
+ buf.append(c);
+ }
+ }
+
+ return wasEscaped ? buf.toString() : s;
+ }
+
+
//----------------------------------------------------------------------------
// Internals
//----------------------------------------------------------------------------
@@ -234,4 +323,125 @@
: "GMT";
cal.setTimeZone(TimeZone.getTimeZone(tz));
}
+
+
+ /**
+ * Attempts to recognize an entity in the passed string, appending the
+ * corresponding character to the passed buffer. If unable to recognize
+ * an entity, appends the current character (an ampersand) to the buffer.
+ * Returns the updated string index (position of the trailing semi-colon).
+ */
+ private static int unescapeHelper(String s, int curPos, StringBuilder buf)
+ {
+ // the case of a malformed entity at the end of the string should be
+ // all but nonexistent in the real world, so rather than clutter the
+ // code with index tests, I'll just catch the exception
+ try
+ {
+ if (s.startsWith("&", curPos))
+ {
+ buf.append("&");
+ return curPos + 4;
+ }
+ else if (s.startsWith("'", curPos))
+ {
+ buf.append("'");
+ return curPos + 5;
+ }
+ else if (s.startsWith(""", curPos))
+ {
+ buf.append('"');
+ return curPos + 5;
+ }
+ else if (s.startsWith("<", curPos))
+ {
+ buf.append("<");
+ return curPos + 3;
+ }
+ else if (s.startsWith(">", curPos))
+ {
+ buf.append(">");
+ return curPos + 3;
+ }
+ else if (s.startsWith("&#", curPos))
+ {
+ char c = numericEntityHelper(s, curPos);
+ if (c != '\0')
+ {
+ buf.append(c);
+ return s.indexOf(';', curPos);
+ }
+ }
+ }
+ catch (StringIndexOutOfBoundsException ignored)
+ {
+ // fall through to default handler
+ }
+
+ // it's not an entity that we know how to process, so just copy the
+ // ampersand and let the rest of the string process
+ buf.append('&');
+ return curPos;
+ }
+
+
+ /**
+ * Attempts to decode a numeric character entity starting at the current
+ * position within the string. If able, returns the corresponding character.
+ * If unable, returns NUL (which is disallowed by both XML 1.0 and XML 1.1).
+ * <p>
+ * Limited to
+ */
+ private static char numericEntityHelper(String s, int curPos)
+ {
+ int value = 0;
+
+ // caller has checked &#, so skip them
+ curPos += 2;
+
+ boolean isHex = false;
+ int multiplier = 10;
+ if (s.charAt(curPos) == 'x')
+ {
+ isHex = true;
+ multiplier = 16;
+ curPos++;
+ }
+
+ // XML is limited to Unicode plane 0, so 4 hex or 5 decimal digits
+ // ... don't index through entire string looking for semi-colon
+ for (int ii = 0 ; ii < 6 ; ii++)
+ {
+ char c = s.charAt(curPos + ii);
+ if (c == ';')
+ break;
+ int cVal = convertDigit(c, isHex);
+ if (cVal < 0)
+ return '\0';
+ value = value * multiplier + cVal;
+ }
+
+ if (value > 65535)
+ return '\0';
+
+ return (char)value;
+ }
+
+
+ // FIXME - refactor this into a common method
+ /**
+ * Verifies that the passed character is a digit, and converts it to its
+ * numeric value if yes. Returns -1 if not a legal digit.
+ */
+ private static int convertDigit(char c, boolean allowHex)
+ {
+ if ((c >= '0') && (c <= '9'))
+ return c - '0';
+ if (allowHex && (c >= 'a') && (c <= 'f'))
+ return c - 'a' + 10;
+ if (allowHex && (c >= 'A') && (c <= 'F'))
+ return c - 'A' + 10;
+ return -1;
+ }
+
}
Modified: trunk/src/test/java/net/sf/practicalxml/TestXmlUtil.java
===================================================================
--- trunk/src/test/java/net/sf/practicalxml/TestXmlUtil.java 2009-01-14 14:14:44 UTC (rev 75)
+++ trunk/src/test/java/net/sf/practicalxml/TestXmlUtil.java 2009-04-25 12:24:37 UTC (rev 76)
@@ -75,4 +75,60 @@
assertEquals(expected.getTime(), XmlUtil.parseXsdDatetime("2004-10-28T09:10:11.123"));
assertEquals(expected.getTime(), XmlUtil.parseXsdDatetime("2004-10-28T04:10:11.123-05:00"));
}
+
+
+ public void testEscape() throws Exception
+ {
+ assertEquals("", XmlUtil.escape(null));
+ assertEquals("", XmlUtil.escape(""));
+
+ String s1 = new String("this has nothing to escape");
+ assertSame(s1, XmlUtil.escape(s1));
+
+ assertEquals("this & <string> does "'",
+ XmlUtil.escape("this & <string> does \"'"));
+ }
+
+
+ public void testUnescape() throws Exception
+ {
+ assertEquals("", XmlUtil.unescape(null));
+ assertEquals("", XmlUtil.unescape(""));
+
+ String s1 = new String("this has nothing to escape");
+ assertSame(s1, XmlUtil.unescape(s1));
+
+ assertEquals("this string'\"does<&>",
+ XmlUtil.unescape("this string'"does<&>"));
+
+ assertEquals("this is an &unknown; entity",
+ XmlUtil.unescape("this is an &unknown; entity"));
+ }
+
+
+ public void testUnescapeWithInvalidNumericEntity() throws Exception
+ {
+ assertEquals("𘚟",
+ XmlUtil.unescape("𘚟"));
+ assertEquals("𒍅",
+ XmlUtil.unescape("𒍅"));
+ assertEquals("cAA;",
+ XmlUtil.unescape("cAA;"));
+
+ assertEquals("&#;",
+ XmlUtil.unescape("&#;"));
+
+ assertEquals("&#this is not really an entity",
+ XmlUtil.unescape("&#this is not really an entity"));
+ }
+
+
+ public void testUnescapeAtEndOfString() throws Exception
+ {
+ assertEquals("&",
+ XmlUtil.unescape("&"));
+ assertEquals("&am",
+ XmlUtil.unescape("&am"));
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|