From: Michael S. <sta...@us...> - 2005-10-15 01:19:04
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9411/src/java/org/archive/access/nutch Modified Files: NutchwaxOpenSearchServlet.java Log Message: * src/java/org/archive/access/nutch/NutchwaxOpenSearchServlet.java Use same code as new version 2 patch that I put up into NUTCH-110. Index: NutchwaxOpenSearchServlet.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch/NutchwaxOpenSearchServlet.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** NutchwaxOpenSearchServlet.java 13 Oct 2005 15:53:36 -0000 1.4 --- NutchwaxOpenSearchServlet.java 15 Oct 2005 01:18:56 -0000 1.5 *************** *** 268,272 **** String name, String text) { Element child = doc.createElement(name); ! child.appendChild(doc.createTextNode(getLegalXml(text))); parent.appendChild(child); } --- 268,272 ---- String name, String text) { Element child = doc.createElement(name); ! child.appendChild(doc.createTextNode(toValidXmlText(text))); parent.appendChild(child); } *************** *** 275,279 **** String ns, String name, String text) { Element child = doc.createElementNS((String)NS_MAP.get(ns), ns+":"+name); ! child.appendChild(doc.createTextNode(getLegalXml(text))); parent.appendChild(child); } --- 275,279 ---- String ns, String name, String text) { Element child = doc.createElementNS((String)NS_MAP.get(ns), ns+":"+name); ! child.appendChild(doc.createTextNode(toValidXmlText(text))); parent.appendChild(child); } *************** *** 282,330 **** String name, String value) { Attr attribute = doc.createAttribute(name); ! attribute.setValue(getLegalXml(value)); node.getAttributes().setNamedItem(attribute); } ! /* ! * Ensure string is legal xml. ! * First look to see if string has illegal characters. If it doesn't, ! * just return it. Otherwise, create new string with illegal characters ! * @param text String to verify. ! * @return Passed <code>text</code> or a new string with illegal ! * characters removed if any found in <code>text</code>. ! * @see http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char */ ! private static String getLegalXml(final String text) { ! if (text == null) { ! return null; ! } ! boolean allLegal = true; ! for (int i = 0; i < text.length(); i++) { ! if (!isLegalXml(text.charAt(i))) { ! allLegal = false; ! break; ! } ! } ! return allLegal? text: createLegalXml(text); } ! private static String createLegalXml(final String text) { ! if (text == null) { ! return null; ! } ! StringBuffer buffer = new StringBuffer(text.length()); ! for (int i = 0; i < text.length(); i++) { ! char c = text.charAt(i); ! if (isLegalXml(c)) { ! buffer.append(c); ! } } ! return buffer.toString(); ! } ! ! private static boolean isLegalXml(final char c) { ! return c == 0x9 || c == 0xa || c == 0xd || (c >= 0x20 && c <= 0xd7ff) ! || (c >= 0xe000 && c <= 0xfffd) || (c >= 0x10000 && c <= 0x10ffff); } } - --- 282,427 ---- String name, String value) { Attr attribute = doc.createAttribute(name); ! attribute.setValue(value); node.getAttributes().setNamedItem(attribute); } ! /** ! * Escapes a string so that it can be safely put into an XML text node. ! * Please note that some characters cannot be serialized into an XML text ! * (Such characters are dropped from the String returned). Refer to ! * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#charsets">XML ! * specification</a> for more information. ! * ! * @param str The string to be escaped. ! * <code>IllegalArgumentException</code> is thrown when an unescapable ! * sequence of characters is encountered. Otherwise, the offending ! * characters will be omitted in the output. ! * @return A string that is safe to use in an XML element or attribute. The ! * xml 5 'special characters' are entity encoded if present and characters ! * outside of the legal range for xml documents will have been removed. ! * @author Dawid Weiss */ ! public static String toValidXmlText(final String str) ! { ! return toValidXmlText(str, false); } ! /** ! * Escapes a string so that it can be safely put into an XML text node. ! * Please note that some characters cannot be serialized into an XML text. ! * Refer to <a href="http://www.w3.org/TR/2000/REC-xml-20001006#charsets">XML ! * specification</a> for more information. ! * ! * @param str The string to be escaped. ! * @param exceptionOnUnescapable If true, ! * <code>IllegalArgumentException</code> is thrown when an unescapable ! * sequence of characters is encountered. Otherwise, the offending ! * characters will be omitted in the output. ! * @return A string that is safe to use in an XML element or attribute. The ! * xml 5 'special characters' are entity encoded if present and characters ! * outside of the legal range for xml documents will have been removed ! * (if <code>exceptionOnUnescapable</code> is true. ! * @author Dawid Weiss ! */ ! public static String toValidXmlText(final String str, ! final boolean exceptionOnUnescapable) ! { ! StringBuffer buffer = null; ! ! for (int i = 0; i < str.length(); i++) ! { ! char ch = str.charAt(i); ! String entity; ! ! switch (ch) ! { ! case '<': // '<' ! entity = "<"; ! ! break; ! ! case '>': // '>' ! entity = ">"; ! ! break; ! ! case '&': // '&' ! entity = "&"; ! ! break; ! ! case '\'': ! entity = "'"; ! ! break; ! ! case '"': ! entity = """; ! ! break; ! ! case 0x09: // valid xml characters ! case 0x0a: ! case 0x0d: ! entity = null; ! ! break; ! ! default: ! ! // check if valid XML characters ! if ( ! ((ch >= 0x20) && (ch <= 0xD7FF)) || ! ((ch >= 0xe000) && (ch <= 0xfffd)) || ! ((ch >= 0x10000) && (ch <= 0x10ffff)) ! ) ! { ! entity = null; ! ! break; ! } ! else ! { ! if (exceptionOnUnescapable) ! { ! throw new IllegalArgumentException( ! "Character is not within valid XML characters " + ! "(code: 0x" + Integer.toHexString(ch) + ! ", position: " + i + ")." ! ); ! } ! else ! { ! // replace the character with an empty string. ! entity = ""; ! ! break; ! } ! } ! } ! ! if (buffer == null) ! { ! if (entity != null) ! { ! buffer = new StringBuffer(str.length() + 20); ! buffer.append(str.substring(0, i)); ! buffer.append(entity); ! } ! } ! else ! { ! if (entity == null) ! { ! buffer.append(ch); ! } ! else ! { ! buffer.append(entity); ! } ! } } ! ! return (buffer != null) ? buffer.toString() : str; } } |