From: <bo...@us...> - 2010-09-17 11:50:37
|
Revision: 481 http://xmlunit.svn.sourceforge.net/xmlunit/?rev=481&view=rev Author: bodewig Date: 2010-09-17 11:50:31 +0000 (Fri, 17 Sep 2010) Log Message: ----------- implement whitespace nomalization Modified Paths: -------------- trunk/xmlunit/src/main/java-core/net/sf/xmlunit/util/Nodes.java trunk/xmlunit/src/main/java-legacy/org/custommonkey/xmlunit/Diff.java trunk/xmlunit/src/main/java-legacy/org/custommonkey/xmlunit/NewDifferenceEngine.java trunk/xmlunit/src/main/net-core/util/Nodes.cs trunk/xmlunit/src/tests/java-core/net/sf/xmlunit/util/NodesTest.java trunk/xmlunit/src/tests/net-core/util/NodesTest.cs Added Paths: ----------- trunk/xmlunit/src/main/java-core/net/sf/xmlunit/input/WhitespaceNormalizedSource.java trunk/xmlunit/src/main/net-core/input/WhitespaceNormalizedSource.cs Copied: trunk/xmlunit/src/main/java-core/net/sf/xmlunit/input/WhitespaceNormalizedSource.java (from rev 479, trunk/xmlunit/src/main/java-core/net/sf/xmlunit/input/WhitespaceStrippedSource.java) =================================================================== --- trunk/xmlunit/src/main/java-core/net/sf/xmlunit/input/WhitespaceNormalizedSource.java (rev 0) +++ trunk/xmlunit/src/main/java-core/net/sf/xmlunit/input/WhitespaceNormalizedSource.java 2010-09-17 11:50:31 UTC (rev 481) @@ -0,0 +1,35 @@ +/* + This file is licensed to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +package net.sf.xmlunit.input; + +import javax.xml.transform.Source; +import javax.xml.transform.dom.DOMSource; +import net.sf.xmlunit.util.Convert; +import net.sf.xmlunit.util.Nodes; + +/** + * A source that is obtained from a different source by removing all + * empty text nodes and normalizing the non-empty ones. + * + * <p>"normalized" in this context means all whitespace characters + * are replaced by space characters and consecutive whitespace + * characaters are collapsed.</p> + */ +public class WhitespaceNormalizedSource extends DOMSource { + + public WhitespaceNormalizedSource(Source originalSource) { + super(Nodes.normalizeWhitespace(Convert.toDocument(originalSource))); + setSystemId(originalSource.getSystemId()); + } +} Modified: trunk/xmlunit/src/main/java-core/net/sf/xmlunit/util/Nodes.java =================================================================== --- trunk/xmlunit/src/main/java-core/net/sf/xmlunit/util/Nodes.java 2010-09-16 15:13:39 UTC (rev 480) +++ trunk/xmlunit/src/main/java-core/net/sf/xmlunit/util/Nodes.java 2010-09-17 11:50:31 UTC (rev 481) @@ -91,21 +91,43 @@ public static Node stripWhitespace(Node original) { Node cloned = original.cloneNode(true); cloned.normalize(); - stripWsRec(cloned); + handleWsRec(cloned, false); return cloned; } /** + * Creates a new Node (of the same type as the original node) that + * is similar to the orginal but doesn't contain any empty text or + * CDATA nodes and where all textual content including attribute + * values or comments are trimmed and normalized. + * + * <p>"normalized" in this context means all whitespace characters + * are replaced by space characters and consecutive whitespace + * characaters are collapsed.</p> + */ + public static Node normalizeWhitespace(Node original) { + Node cloned = original.cloneNode(true); + cloned.normalize(); + handleWsRec(cloned, true); + return cloned; + } + + /** * Trims textual content of this node, removes empty text and * CDATA children, recurses into its child nodes. + * @param normalize whether to normalize whitespace as well */ - private static void stripWsRec(Node n) { + private static void handleWsRec(Node n, boolean normalize) { if (n instanceof CharacterData || n instanceof ProcessingInstruction) { - n.setNodeValue(n.getNodeValue().trim()); + String s = n.getNodeValue().trim(); + if (normalize) { + s = normalize(s); + } + n.setNodeValue(s); } List<Node> toRemove = new LinkedList<Node>(); for (Node child : new IterableNodeList(n.getChildNodes())) { - stripWsRec(child); + handleWsRec(child, normalize); if (!(n instanceof Attr) && (child instanceof Text || child instanceof CDATASection) && child.getNodeValue().length() == 0) { @@ -119,8 +141,40 @@ if (attrs != null) { final int len = attrs.getLength(); for (int i = 0; i < len; i++) { - stripWsRec(attrs.item(i)); + handleWsRec(attrs.item(i), normalize); } } } + + private static final char SPACE = ' '; + + /** + * Normalize a string. + * + * <p>"normalized" in this context means all whitespace characters + * are replaced by space characters and consecutive whitespace + * characaters are collapsed.</p> + */ + static String normalize(String s) { + StringBuilder sb = new StringBuilder(); + boolean changed = false; + boolean lastCharWasWS = false; + final int len = s.length(); + for (int i = 0; i < len; i++) { + char c = s.charAt(i); + if (Character.isWhitespace(c)) { + if (!lastCharWasWS) { + sb.append(SPACE); + changed |= (c != SPACE); + } else { + changed = true; + } + lastCharWasWS = true; + } else { + sb.append(c); + lastCharWasWS = false; + } + } + return changed ? sb.toString() : s; + } } Modified: trunk/xmlunit/src/main/java-legacy/org/custommonkey/xmlunit/Diff.java =================================================================== --- trunk/xmlunit/src/main/java-legacy/org/custommonkey/xmlunit/Diff.java 2010-09-16 15:13:39 UTC (rev 480) +++ trunk/xmlunit/src/main/java-legacy/org/custommonkey/xmlunit/Diff.java 2010-09-17 11:50:31 UTC (rev 481) @@ -423,8 +423,6 @@ if ( XMLUnit.getIgnoreAttributeOrder() && - !XMLUnit.getNormalizeWhitespace() - && (!usesUnknownElementQualifier() || XMLUnit.getCompareUnmatched()) ) { Modified: trunk/xmlunit/src/main/java-legacy/org/custommonkey/xmlunit/NewDifferenceEngine.java =================================================================== --- trunk/xmlunit/src/main/java-legacy/org/custommonkey/xmlunit/NewDifferenceEngine.java 2010-09-16 15:13:39 UTC (rev 480) +++ trunk/xmlunit/src/main/java-legacy/org/custommonkey/xmlunit/NewDifferenceEngine.java 2010-09-17 11:50:31 UTC (rev 481) @@ -57,6 +57,7 @@ import net.sf.xmlunit.diff.ElementSelectors; import net.sf.xmlunit.diff.NodeMatcher; import net.sf.xmlunit.input.CommentLessSource; +import net.sf.xmlunit.input.WhitespaceNormalizedSource; import net.sf.xmlunit.input.WhitespaceStrippedSource; import net.sf.xmlunit.util.Linqy; import org.custommonkey.xmlunit.examples.RecursiveElementNameAndTextQualifier; @@ -200,7 +201,10 @@ ctrlSource = new CommentLessSource(ctrlSource); tstSource = new CommentLessSource(tstSource); } - if (XMLUnit.getIgnoreWhitespace()) { + if (XMLUnit.getNormalizeWhitespace()) { + ctrlSource = new WhitespaceNormalizedSource(ctrlSource); + tstSource = new WhitespaceNormalizedSource(tstSource); + } else if (XMLUnit.getIgnoreWhitespace()) { ctrlSource = new WhitespaceStrippedSource(ctrlSource); tstSource = new WhitespaceStrippedSource(tstSource); } Copied: trunk/xmlunit/src/main/net-core/input/WhitespaceNormalizedSource.cs (from rev 479, trunk/xmlunit/src/main/net-core/input/WhitespaceStrippedSource.cs) =================================================================== --- trunk/xmlunit/src/main/net-core/input/WhitespaceNormalizedSource.cs (rev 0) +++ trunk/xmlunit/src/main/net-core/input/WhitespaceNormalizedSource.cs 2010-09-17 11:50:31 UTC (rev 481) @@ -0,0 +1,35 @@ +/* + This file is licensed to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +using net.sf.xmlunit.util; + +namespace net.sf.xmlunit.input { + + /// <summary> + /// A source that is obtained from a different source by removing + /// all empty text nodes and normalizing the non-empty ones. + /// </summary> + /// <remarks> + /// "normalized" in this context means all whitespace characters + /// are replaced by space characters and consecutive whitespace + /// characaters are collapsed. + /// </remarks> + public class WhitespaceNormalizedSource : DOMSource { + public WhitespaceNormalizedSource(ISource originalSource) : + base(Nodes.NormalizeWhitespace(Convert.ToDocument(originalSource))) + { + SystemId = originalSource.SystemId; + } + } +} Modified: trunk/xmlunit/src/main/net-core/util/Nodes.cs =================================================================== --- trunk/xmlunit/src/main/net-core/util/Nodes.cs 2010-09-16 15:13:39 UTC (rev 480) +++ trunk/xmlunit/src/main/net-core/util/Nodes.cs 2010-09-17 11:50:31 UTC (rev 481) @@ -74,21 +74,45 @@ public static XmlNode StripWhitespace(XmlNode original) { XmlNode cloned = original.CloneNode(true); cloned.Normalize(); - StripWsRec(cloned); + HandleWsRec(cloned, false); return cloned; } /// <summary> + /// Creates a new Node (of the same type as the original node) + /// that is similar to the orginal but doesn't contain any + /// empty text or CDATA nodes and where all textual content + /// including attribute values or comments are normalized. + /// </summary> + /// <remarks> + /// "normalized" in this context means all whitespace + /// characters are replaced by space characters and + /// consecutive whitespace characaters are collapsed. + /// </remarks> + public static XmlNode NormalizeWhitespace(XmlNode original) { + XmlNode cloned = original.CloneNode(true); + cloned.Normalize(); + HandleWsRec(cloned, true); + return cloned; + } + + /// <summary> /// Trims textual content of this node, removes empty text and /// CDATA children, recurses into its child nodes. /// </summary> - private static void StripWsRec(XmlNode n) { + /// <parameter name="normalize">whether to normalize + /// whitespace as well</parameter> + private static void HandleWsRec(XmlNode n, bool normalize) { if (n is XmlCharacterData || n is XmlProcessingInstruction) { - n.Value = n.Value.Trim(); + string s = n.Value.Trim(); + if (normalize) { + s = Normalize(s); + } + n.Value = s; } LinkedList<XmlNode> toRemove = new LinkedList<XmlNode>(); foreach (XmlNode child in n.ChildNodes) { - StripWsRec(child); + HandleWsRec(child, normalize); if (!(n is XmlAttribute) && (child is XmlText || child is XmlCDataSection) && child.Value.Length == 0) { @@ -101,10 +125,40 @@ XmlNamedNodeMap attrs = n.Attributes; if (attrs != null) { foreach (XmlAttribute a in attrs) { - StripWsRec(a); + HandleWsRec(a, normalize); } } } + private const char SPACE = ' '; + + /// <summary> + /// Normalize a string. + /// <summary> + /// <remarks> + /// "normalized" in this context means all whitespace + /// characters are replaced by space characters and + /// consecutive whitespace characaters are collapsed. + /// </remarks> + internal static string Normalize(string s) { + StringBuilder sb = new StringBuilder(); + bool changed = false; + bool lastCharWasWS = false; + foreach (char c in s) { + if (char.IsWhiteSpace(c)) { + if (!lastCharWasWS) { + sb.Append(SPACE); + changed |= (c != SPACE); + } else { + changed = true; + } + lastCharWasWS = true; + } else { + sb.Append(c); + lastCharWasWS = false; + } + } + return changed ? sb.ToString() : s; + } } } Modified: trunk/xmlunit/src/tests/java-core/net/sf/xmlunit/util/NodesTest.java =================================================================== --- trunk/xmlunit/src/tests/java-core/net/sf/xmlunit/util/NodesTest.java 2010-09-16 15:13:39 UTC (rev 480) +++ trunk/xmlunit/src/tests/java-core/net/sf/xmlunit/util/NodesTest.java 2010-09-17 11:50:31 UTC (rev 481) @@ -144,16 +144,20 @@ assertEquals(BAR, m.get(new QName(SOME_URI, FOO, BAR))); } - private Map.Entry<Document, Node> stripWsSetup() { - final Document toTest = Convert.toDocument(Input.fromMemory( + private Document handleWsSetup() { + return Convert.toDocument(Input.fromMemory( "<root>\n" - + "<!-- trim me -->\n" + + "<!-- trim\tme -->\n" + "<child attr=' trim me ' attr2='not me'>\n" + " trim me \n" + "</child><![CDATA[ trim me ]]>\n" + "<?target trim me ?>\n" + "<![CDATA[ ]]>\n" + "</root>").build()); + } + + private Map.Entry<Document, Node> stripWsSetup() { + final Document toTest = handleWsSetup(); final Node stripped = Nodes.stripWhitespace(toTest); return new Map.Entry<Document, Node>() { public Document getKey() { @@ -168,8 +172,32 @@ }; } + private Map.Entry<Document, Node> normalizeWsSetup() { + final Document toTest = handleWsSetup(); + final Node stripped = Nodes.normalizeWhitespace(toTest); + return new Map.Entry<Document, Node>() { + public Document getKey() { + return toTest; + } + public Node getValue() { + return stripped; + } + public Node setValue(Node n) { + throw new UnsupportedOperationException(); + } + }; + } + @Test public void stripWhitespaceWorks() { - Map.Entry<Document, Node> s = stripWsSetup(); + handleWsWorks(stripWsSetup(), "trim\tme"); + } + + @Test public void normalizeWhitespaceWorks() { + handleWsWorks(normalizeWsSetup(), "trim me"); + } + + private void handleWsWorks(Map.Entry<Document, Node> s, + String commentContent) { assertTrue(s.getValue() instanceof Document); NodeList top = s.getValue().getChildNodes(); assertEquals(1, top.getLength()); @@ -179,7 +207,8 @@ assertEquals(4, rootsChildren.getLength()); assertTrue("should be comment, is " + rootsChildren.item(0).getClass(), rootsChildren.item(0) instanceof Comment); - assertEquals("trim me", ((Comment) rootsChildren.item(0)).getData()); + assertEquals(commentContent, + ((Comment) rootsChildren.item(0)).getData()); assertTrue("should be element, is " + rootsChildren.item(1).getClass(), rootsChildren.item(1) instanceof Element); assertEquals("child", rootsChildren.item(1).getNodeName()); @@ -206,7 +235,14 @@ } @Test public void stripWhitespaceDoesntAlterOriginal() { - Map.Entry<Document, Node> s = stripWsSetup(); + handleWsDoesntAlterOriginal(stripWsSetup()); + } + + @Test public void normalizeWhitespaceDoesntAlterOriginal() { + handleWsDoesntAlterOriginal(normalizeWsSetup()); + } + + private void handleWsDoesntAlterOriginal(Map.Entry<Document, Node> s) { NodeList top = s.getKey().getChildNodes(); assertEquals(1, top.getLength()); assertTrue(top.item(0) instanceof Element); @@ -216,7 +252,7 @@ assertNewlineTextNode(rootsChildren.item(0)); assertTrue("should be comment, is " + rootsChildren.item(1).getClass(), rootsChildren.item(1) instanceof Comment); - assertEquals(" trim me ", ((Comment) rootsChildren.item(1)).getData()); + assertEquals(" trim\tme ", ((Comment) rootsChildren.item(1)).getData()); assertNewlineTextNode(rootsChildren.item(2)); assertTrue("should be element, is " + rootsChildren.item(3).getClass(), rootsChildren.item(3) instanceof Element); @@ -255,4 +291,11 @@ n instanceof Text); assertEquals("\n", ((Text) n).getData()); } + + @Test public void normalize() { + assertSame("foo", Nodes.normalize("foo")); + assertSame("foo bar", Nodes.normalize("foo bar")); + assertEquals("foo bar", Nodes.normalize("foo\nbar")); + assertEquals("foo bar", Nodes.normalize("foo \r\n\t bar")); + } } Modified: trunk/xmlunit/src/tests/net-core/util/NodesTest.cs =================================================================== --- trunk/xmlunit/src/tests/net-core/util/NodesTest.cs 2010-09-16 15:13:39 UTC (rev 480) +++ trunk/xmlunit/src/tests/net-core/util/NodesTest.cs 2010-09-17 11:50:31 UTC (rev 481) @@ -123,23 +123,42 @@ Assert.AreEqual(BAR, m[new XmlQualifiedName(FOO, SOME_URI)]); } - private KeyValuePair<XmlDocument, XmlNode> StripWsSetup() { - XmlDocument toTest = Convert.ToDocument(Input.FromMemory( + private XmlDocument HandleWsSetup() { + return Convert.ToDocument(Input.FromMemory( "<root>\n" - + "<!-- trim me -->\n" + + "<!-- trim\tme -->\n" + "<child attr=' trim me ' attr2='not me'>\n" + " trim me \n" + "</child><![CDATA[ trim me ]]>\n" + "<?target trim me ?>\n" + "<![CDATA[ ]]>\n" + "</root>").Build()); + } + + private KeyValuePair<XmlDocument, XmlNode> StripWsSetup() { + XmlDocument toTest = HandleWsSetup(); return new KeyValuePair<XmlDocument, XmlNode>(toTest, Nodes.StripWhitespace(toTest)); } + private KeyValuePair<XmlDocument, XmlNode> NormalizeWsSetup() { + XmlDocument toTest = HandleWsSetup(); + return new KeyValuePair<XmlDocument, + XmlNode>(toTest, Nodes.NormalizeWhitespace(toTest)); + } + [Test] public void StripWhitespaceWorks() { - KeyValuePair<XmlDocument, XmlNode> s = StripWsSetup(); + HandleWsWorks(StripWsSetup(), "trim\tme"); + } + + [Test] + public void NormalizeWhitespaceWorks() { + HandleWsWorks(NormalizeWsSetup(), "trim me"); + } + + private void HandleWsWorks(KeyValuePair<XmlDocument, XmlNode> s, + string commentContent) { Assert.IsTrue(s.Value is XmlDocument); XmlNodeList top = s.Value.ChildNodes; Assert.AreEqual(1, top.Count); @@ -149,7 +168,7 @@ Assert.AreEqual(4, rootsChildren.Count); Assert.IsTrue(rootsChildren[0] is XmlComment, "should be comment, is " + rootsChildren[0].GetType()); - Assert.AreEqual("trim me", + Assert.AreEqual(commentContent, ((XmlComment) rootsChildren[0]).Data); Assert.IsTrue(rootsChildren[1] is XmlElement, "should be element, is " + rootsChildren[1].GetType()); @@ -178,7 +197,16 @@ [Test] public void StripWhitespaceDoesntAlterOriginal() { - KeyValuePair<XmlDocument, XmlNode> s = StripWsSetup(); + HandleWsDoesntAlterOriginal(StripWsSetup()); + } + + [Test] + public void NormalizeWhitespaceDoesntAlterOriginal() { + HandleWsDoesntAlterOriginal(NormalizeWsSetup()); + } + + private void HandleWsDoesntAlterOriginal(KeyValuePair<XmlDocument, + XmlNode> s) { XmlNodeList top = s.Key.ChildNodes; Assert.AreEqual(1, top.Count); Assert.IsTrue(top[0] is XmlElement); @@ -187,7 +215,7 @@ Assert.AreEqual(5, rootsChildren.Count); Assert.IsTrue(rootsChildren[0] is XmlComment, "should be comment, is " + rootsChildren[0].GetType()); - Assert.AreEqual(" trim me ", + Assert.AreEqual(" trim\tme ", ((XmlComment) rootsChildren[0]).Data); Assert.IsTrue(rootsChildren[1] is XmlElement, "should be element, is " + rootsChildren[1].GetType()); @@ -217,5 +245,13 @@ XmlAttribute a2 = (XmlAttribute) attrs.GetNamedItem("attr2"); Assert.AreEqual("not me", a2.Value); } + + [Test] + public void Normalize() { + Assert.AreSame("foo", Nodes.Normalize("foo")); + Assert.AreSame("foo bar", Nodes.Normalize("foo bar")); + Assert.AreEqual("foo bar", Nodes.Normalize("foo\nbar")); + Assert.AreEqual("foo bar", Nodes.Normalize("foo \r\n\t bar")); + } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |