[Practicalxml-commits] SF.net SVN: practicalxml:[76] trunk
Brought to you by:
kdgregory
From: Auto-Generated S. C. M. <pra...@li...> - 2009-04-25 12:24:50
|
Revision: 76 http://practicalxml.svn.sourceforge.net/practicalxml/?rev=76&view=rev Author: kdgregory Date: 2009-04-25 12:24:37 +0000 (Sat, 25 Apr 2009) Log Message: ----------- Add XmlUtil.escape, XmlUtil.unescape Modified Paths: -------------- trunk/pom.xml trunk/src/main/java/net/sf/practicalxml/XmlUtil.java trunk/src/test/java/net/sf/practicalxml/TestXmlUtil.java Modified: trunk/pom.xml =================================================================== --- trunk/pom.xml 2009-01-14 14:14:44 UTC (rev 75) +++ trunk/pom.xml 2009-04-25 12:24:37 UTC (rev 76) @@ -5,7 +5,7 @@ <groupId>net.sf.practicalxml</groupId> <artifactId>practicalxml</artifactId> <packaging>jar</packaging> - <version>1.0.0</version> + <version>1.0.1</version> <name>practicalxml</name> <url>http://sourceforge.net/projects/practicalxml/</url> Modified: trunk/src/main/java/net/sf/practicalxml/XmlUtil.java =================================================================== --- trunk/src/main/java/net/sf/practicalxml/XmlUtil.java 2009-01-14 14:14:44 UTC (rev 75) +++ trunk/src/main/java/net/sf/practicalxml/XmlUtil.java 2009-04-25 12:24:37 UTC (rev 76) @@ -122,6 +122,95 @@ } + /** + * Escapes the passed string, converting the five reserved XML characters + * into their entities: &amp;, &lt;, &gt;, &apos;, and + * &quot;. If the string does not contain any of these characters, it + * will be returned unchanged. If passed <code>null</code>, returns an + * empty string. + * <p> + * Yes, this method is available elsewhere, eg Jakarta Commons. I'm trying + * to minimize external dependencies from this library, so am reinventing + * a few small wheels (but they're round!). + */ + public static String escape(String s) + { + if (s == null) + return ""; + + StringBuilder buf = new StringBuilder(s.length()); + boolean wasEscaped = false; + + for (int ii = 0 ; ii < s.length() ; ii++) + { + char c = s.charAt(ii); + switch (c) + { + case '&' : + buf.append("&"); + wasEscaped = true; + break; + case '<' : + buf.append("<"); + wasEscaped = true; + break; + case '>' : + buf.append(">"); + wasEscaped = true; + break; + case '\'' : + buf.append("'"); + wasEscaped = true; + break; + case '"' : + buf.append("""); + wasEscaped = true; + break; + default : + buf.append(c); + } + } + + return wasEscaped ? buf.toString() : s; + } + + + /** + * Unescapes the passed string, converting the five XML entities + * (&amp;, &lt;, &gt;, &apos;, and &quot;) into + * their correspinding characters. Also converts any numeric entities + * into their characters. If the string does not contain any convertable + * entities, it will be returned unchanged. If passed <code>null</code>, + * returns an empty string. + * <p> + * Yes, this method is available elsewhere, eg Jakarta Commons. + */ + public static String unescape(String s) + { + if (s == null) + return ""; + + StringBuilder buf = new StringBuilder(s.length() + 20); + boolean wasEscaped = false; + + for (int ii = 0 ; ii < s.length() ; ii++) + { + char c = s.charAt(ii); + switch (c) + { + case '&' : + ii = unescapeHelper(s, ii, buf); + wasEscaped = true; + break; + default : + buf.append(c); + } + } + + return wasEscaped ? buf.toString() : s; + } + + //---------------------------------------------------------------------------- // Internals //---------------------------------------------------------------------------- @@ -234,4 +323,125 @@ : "GMT"; cal.setTimeZone(TimeZone.getTimeZone(tz)); } + + + /** + * Attempts to recognize an entity in the passed string, appending the + * corresponding character to the passed buffer. If unable to recognize + * an entity, appends the current character (an ampersand) to the buffer. + * Returns the updated string index (position of the trailing semi-colon). + */ + private static int unescapeHelper(String s, int curPos, StringBuilder buf) + { + // the case of a malformed entity at the end of the string should be + // all but nonexistent in the real world, so rather than clutter the + // code with index tests, I'll just catch the exception + try + { + if (s.startsWith("&", curPos)) + { + buf.append("&"); + return curPos + 4; + } + else if (s.startsWith("'", curPos)) + { + buf.append("'"); + return curPos + 5; + } + else if (s.startsWith(""", curPos)) + { + buf.append('"'); + return curPos + 5; + } + else if (s.startsWith("<", curPos)) + { + buf.append("<"); + return curPos + 3; + } + else if (s.startsWith(">", curPos)) + { + buf.append(">"); + return curPos + 3; + } + else if (s.startsWith("&#", curPos)) + { + char c = numericEntityHelper(s, curPos); + if (c != '\0') + { + buf.append(c); + return s.indexOf(';', curPos); + } + } + } + catch (StringIndexOutOfBoundsException ignored) + { + // fall through to default handler + } + + // it's not an entity that we know how to process, so just copy the + // ampersand and let the rest of the string process + buf.append('&'); + return curPos; + } + + + /** + * Attempts to decode a numeric character entity starting at the current + * position within the string. If able, returns the corresponding character. + * If unable, returns NUL (which is disallowed by both XML 1.0 and XML 1.1). + * <p> + * Limited to + */ + private static char numericEntityHelper(String s, int curPos) + { + int value = 0; + + // caller has checked &#, so skip them + curPos += 2; + + boolean isHex = false; + int multiplier = 10; + if (s.charAt(curPos) == 'x') + { + isHex = true; + multiplier = 16; + curPos++; + } + + // XML is limited to Unicode plane 0, so 4 hex or 5 decimal digits + // ... don't index through entire string looking for semi-colon + for (int ii = 0 ; ii < 6 ; ii++) + { + char c = s.charAt(curPos + ii); + if (c == ';') + break; + int cVal = convertDigit(c, isHex); + if (cVal < 0) + return '\0'; + value = value * multiplier + cVal; + } + + if (value > 65535) + return '\0'; + + return (char)value; + } + + + // FIXME - refactor this into a common method + /** + * Verifies that the passed character is a digit, and converts it to its + * numeric value if yes. Returns -1 if not a legal digit. + */ + private static int convertDigit(char c, boolean allowHex) + { + if ((c >= '0') && (c <= '9')) + return c - '0'; + if (allowHex && (c >= 'a') && (c <= 'f')) + return c - 'a' + 10; + if (allowHex && (c >= 'A') && (c <= 'F')) + return c - 'A' + 10; + return -1; + } + } Modified: trunk/src/test/java/net/sf/practicalxml/TestXmlUtil.java =================================================================== --- trunk/src/test/java/net/sf/practicalxml/TestXmlUtil.java 2009-01-14 14:14:44 UTC (rev 75) +++ trunk/src/test/java/net/sf/practicalxml/TestXmlUtil.java 2009-04-25 12:24:37 UTC (rev 76) @@ -75,4 +75,60 @@ assertEquals(expected.getTime(), XmlUtil.parseXsdDatetime("2004-10-28T09:10:11.123")); assertEquals(expected.getTime(), XmlUtil.parseXsdDatetime("2004-10-28T04:10:11.123-05:00")); } + + + public void testEscape() throws Exception + { + assertEquals("", XmlUtil.escape(null)); + assertEquals("", XmlUtil.escape("")); + + String s1 = new String("this has nothing to escape"); + assertSame(s1, XmlUtil.escape(s1)); + + assertEquals("this & <string> does "'", + XmlUtil.escape("this & <string> does \"'")); + } + + + public void testUnescape() throws Exception + { + assertEquals("", XmlUtil.unescape(null)); + assertEquals("", XmlUtil.unescape("")); + + String s1 = new String("this has nothing to escape"); + assertSame(s1, XmlUtil.unescape(s1)); + + assertEquals("this string'\"does<&>", + XmlUtil.unescape("this string'"does<&>")); + + assertEquals("this is an &unknown; entity", + XmlUtil.unescape("this is an &unknown; entity")); + } + + + public void testUnescapeWithInvalidNumericEntity() throws Exception + { + assertEquals("𘚟", + XmlUtil.unescape("𘚟")); + assertEquals("𒍅", + XmlUtil.unescape("𒍅")); + assertEquals("cAA;", + XmlUtil.unescape("cAA;")); + + assertEquals("&#;", + XmlUtil.unescape("&#;")); + + assertEquals("&#this is not really an entity", + XmlUtil.unescape("&#this is not really an entity")); + } + + + public void testUnescapeAtEndOfString() throws Exception + { + assertEquals("&", + XmlUtil.unescape("&")); + assertEquals("&am", + XmlUtil.unescape("&am")); + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |