[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/utilTests CharacterTranslationTest.java,1.38,1.
Brought to you by:
derrickoswald
From: <der...@us...> - 2004-01-14 02:54:01
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests In directory sc8-pr-cvs1:/tmp/cvs-serv28098/src/org/htmlparser/tests/utilTests Modified Files: CharacterTranslationTest.java Log Message: Index: CharacterTranslationTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/CharacterTranslationTest.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** CharacterTranslationTest.java 2 Jan 2004 16:24:57 -0000 1.38 --- CharacterTranslationTest.java 14 Jan 2004 02:53:47 -0000 1.39 *************** *** 27,30 **** --- 27,37 ---- package org.htmlparser.tests.utilTests; + import java.io.ByteArrayInputStream; + import java.io.ByteArrayOutputStream; + import java.io.IOException; + import java.io.InputStream; + import java.io.PrintStream; + import java.net.URL; + import java.net.URLConnection; import org.htmlparser.tests.ParserTestCase; import org.htmlparser.util.Translate; *************** *** 63,67 **** "character entity reference without a semicolon at start of string doesn't work", "\u00f7 is the division sign.", ! Translate.decode ("÷ is the division sign.")); } --- 70,74 ---- "character entity reference without a semicolon at start of string doesn't work", "\u00f7 is the division sign.", ! Translate.decode ("÷ is the division sign.")); } *************** *** 71,75 **** "numeric character reference without a semicolon at start of string doesn't work", "\u00f7 is the division sign.", ! Translate.decode ("÷ is the division sign.")); } --- 78,82 ---- "numeric character reference without a semicolon at start of string doesn't work", "\u00f7 is the division sign.", ! Translate.decode ("÷ is the division sign.")); } *************** *** 145,148 **** --- 152,241 ---- Translate.encode ("<a href=\"http://www.w3.org/TR/REC-html40/sgml/entities.html\">http://www.w3.org/TR/REC-html40/sgml/entities.html</a>")); } + + public byte[] encodedecode (byte[] bytes) + throws + IOException + { + InputStream in; + ByteArrayOutputStream out; + + // encode + in = new ByteArrayInputStream (bytes); + out = new ByteArrayOutputStream (); + Translate.encode (in, new PrintStream (out)); + in.close (); + out.close (); + + // decode + in = new ByteArrayInputStream (out.toByteArray ()); + out = new ByteArrayOutputStream (); + Translate.decode (in, new PrintStream (out)); + in.close (); + out.close (); + + return (out.toByteArray ()); + } + + public void check (byte[] reference, byte[] result) + throws + IOException + { + InputStream ref; + InputStream in; + int i; + int i1; + int i2; + + ref = new ByteArrayInputStream (reference); + in = new ByteArrayInputStream (result); + i = 0; + do + { + i1 = ref.read (); + i2 = in.read (); + if (i1 != i2) + fail ("byte difference detected at offset " + i); + i++; + } + while (-1 != i1); + ref.close (); + in.close (); + } + + // public void testInitialCharacterEntityReferenceCodec () + // throws + // IOException + // { + // byte[] data = "\u00f7 is the division sign.".getBytes (); + // check (data, encodedecode (data)); + // } + + public void testEncodeDecodePage () throws IOException + { + URL url; + URLConnection connection; + InputStream in; + ByteArrayOutputStream out; + byte[] bytes; + byte[] result; + int c; + + // get some bytes + url = new URL ("http://sourceforge.net/projects/htmlparser"); + connection = url.openConnection (); + in = connection.getInputStream (); + out = new ByteArrayOutputStream (); + while (-1 != (c = in.read ())) + out.write (c); + in.close (); + out.close (); + bytes = out.toByteArray (); + + // run it through + result = encodedecode (bytes); + + // check + check (bytes, result); + } } |