Thread: [Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/lexerTests SourceTests.java,1.16,1.17
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2004-07-03 13:56:31
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14744/tests/lexerTests Modified Files: SourceTests.java Log Message: Further fix to bug #973137 Double-bytes characters are messed after parsing. Created a proper String based source with the encoding only optionally specified. A string is no longer converted to a byte array and then back to characters. Index: SourceTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/SourceTests.java,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** SourceTests.java 14 Jan 2004 02:53:47 -0000 1.16 --- SourceTests.java 3 Jul 2004 13:56:08 -0000 1.17 *************** *** 34,40 **** --- 34,42 ---- import java.net.URL; import java.net.URLConnection; + import org.htmlparser.lexer.InputStreamSource; import org.htmlparser.lexer.Stream; import org.htmlparser.lexer.Source; + import org.htmlparser.lexer.StringSource; import org.htmlparser.tests.ParserTestCase; *************** *** 65,95 **** * Test initialization with a null value. */ ! public void testNull () throws IOException { Source source; ! source = new Source (null); assertTrue ("erroneous character", -1 == source.read ()); } /** ! * Test initialization with a null charset name. */ ! public void testEmpty () throws IOException { Source source; ! source = new Source (new Stream (new ByteArrayInputStream (new byte[0])), null); assertTrue ("erroneous character", -1 == source.read ()); } /** ! * Test initialization with an input stream having only one byte. */ ! public void testOneByte () throws IOException { Source source; ! source = new Source (new Stream (new ByteArrayInputStream (new byte[] { (byte)0x42 })), null); assertTrue ("erroneous character", 'B' == source.read ()); assertTrue ("extra character", -1 == source.read ()); --- 67,97 ---- * Test initialization with a null value. */ ! public void testInputStreamSourceNull () throws IOException { Source source; ! source = new InputStreamSource (null); assertTrue ("erroneous character", -1 == source.read ()); } /** ! * Test initialization of a InputStreamSource with a zero length byte array. */ ! public void testInputStreamSourceEmpty () throws IOException { Source source; ! source = new InputStreamSource (new Stream (new ByteArrayInputStream (new byte[0])), null); assertTrue ("erroneous character", -1 == source.read ()); } /** ! * Test initialization of a InputStreamSource with an input stream having only one byte. */ ! public void testInputStreamSourceOneByte () throws IOException { Source source; ! source = new InputStreamSource (new Stream (new ByteArrayInputStream (new byte[] { (byte)0x42 })), null); assertTrue ("erroneous character", 'B' == source.read ()); assertTrue ("extra character", -1 == source.read ()); *************** *** 97,107 **** /** ! * Test close. */ ! public void testClose () throws IOException { Source source; ! source = new Source (new Stream (new ByteArrayInputStream ("hello word".getBytes ())), null); assertTrue ("no character", -1 != source.read ()); source.destroy (); --- 99,109 ---- /** ! * Test closing a InputStreamSource. */ ! public void testInputStreamSourceClose () throws IOException { Source source; ! source = new InputStreamSource (new Stream (new ByteArrayInputStream ("hello word".getBytes ())), null); assertTrue ("no character", -1 != source.read ()); source.destroy (); *************** *** 118,124 **** /** ! * Test reset. */ ! public void testReset () throws IOException { String reference; --- 120,126 ---- /** ! * Test resetting a InputStreamSource. */ ! public void testInputStreamSourceReset () throws IOException { String reference; *************** *** 128,132 **** reference = "Now is the time for all good men to come to the aid of the party"; ! source = new Source (new Stream (new ByteArrayInputStream (reference.getBytes (DEFAULT_CHARSET))), null); buffer = new StringBuffer (reference.length ()); while (-1 != (c = source.read ())) --- 130,134 ---- reference = "Now is the time for all good men to come to the aid of the party"; ! source = new InputStreamSource (new Stream (new ByteArrayInputStream (reference.getBytes (DEFAULT_CHARSET))), null); buffer = new StringBuffer (reference.length ()); while (-1 != (c = source.read ())) *************** *** 142,148 **** /** ! * Test reset in the middle of reading. */ ! public void testMidReset () throws IOException { String reference; --- 144,150 ---- /** ! * Test resetting a InputStreamSource in the middle of reading. */ ! public void testInputStreamSourceMidReset () throws IOException { String reference; *************** *** 152,156 **** reference = "Now is the time for all good men to come to the aid of the party"; ! source = new Source (new Stream (new ByteArrayInputStream (reference.getBytes (DEFAULT_CHARSET))), null); buffer = new StringBuffer (reference.length ()); for (int i = 0; i < 25; i++) --- 154,158 ---- reference = "Now is the time for all good men to come to the aid of the party"; ! source = new InputStreamSource (new Stream (new ByteArrayInputStream (reference.getBytes (DEFAULT_CHARSET))), null); buffer = new StringBuffer (reference.length ()); for (int i = 0; i < 25; i++) *************** *** 166,172 **** /** ! * Test mark/reset in the middle of reading. */ ! public void testMarkReset () throws IOException { String reference; --- 168,174 ---- /** ! * Test mark/reset of a InputStreamSource in the middle of reading. */ ! public void testInputStreamSourceMarkReset () throws IOException { String reference; *************** *** 176,180 **** reference = "Now is the time for all good men to come to the aid of the party"; ! source = new Source (new Stream (new ByteArrayInputStream (reference.getBytes (DEFAULT_CHARSET))), null); assertTrue ("not markable", source.markSupported ()); buffer = new StringBuffer (reference.length ()); --- 178,182 ---- reference = "Now is the time for all good men to come to the aid of the party"; ! source = new InputStreamSource (new Stream (new ByteArrayInputStream (reference.getBytes (DEFAULT_CHARSET))), null); assertTrue ("not markable", source.markSupported ()); buffer = new StringBuffer (reference.length ()); *************** *** 192,198 **** /** ! * Test skip. */ ! public void testSkip () throws IOException { String part1; --- 194,200 ---- /** ! * Test skipping a InputStreamSource. */ ! public void testInputStreamSourceSkip () throws IOException { String part1; *************** *** 208,212 **** part3 = "to come to the aid of the party"; reference = part1 + part2 + part3; ! source = new Source (new Stream (new ByteArrayInputStream (reference.getBytes (DEFAULT_CHARSET))), null); buffer = new StringBuffer (reference.length ()); for (int i = 0; i < part1.length (); i++) --- 210,214 ---- part3 = "to come to the aid of the party"; reference = part1 + part2 + part3; ! source = new InputStreamSource (new Stream (new ByteArrayInputStream (reference.getBytes (DEFAULT_CHARSET))), null); buffer = new StringBuffer (reference.length ()); for (int i = 0; i < part1.length (); i++) *************** *** 220,226 **** /** ! * Test multi-byte read. */ ! public void testMultByte () throws IOException { String reference; --- 222,228 ---- /** ! * Test multi-byte read with a InputStreamSource. */ ! public void testInputStreamSourceMultByte () throws IOException { String reference; *************** *** 229,233 **** reference = "Now is the time for all good men to come to the aid of the party"; ! source = new Source (new Stream (new ByteArrayInputStream (reference.getBytes (DEFAULT_CHARSET))), null); buffer = new char[reference.length ()]; source.read (buffer, 0, buffer.length); --- 231,235 ---- reference = "Now is the time for all good men to come to the aid of the party"; ! source = new InputStreamSource (new Stream (new ByteArrayInputStream (reference.getBytes (DEFAULT_CHARSET))), null); buffer = new char[reference.length ()]; source.read (buffer, 0, buffer.length); *************** *** 238,244 **** /** ! * Test positioned multi-byte read. */ ! public void testPositionedMultByte () throws IOException { String part1; --- 240,246 ---- /** ! * Test positioned multi-byte read with a InputStreamSource. */ ! public void testInputStreamSourcePositionedMultByte () throws IOException { String part1; *************** *** 255,259 **** part3 = "to come to the aid of the party"; reference = part1 + part2 + part3; ! source = new Source (new Stream (new ByteArrayInputStream (reference.getBytes (DEFAULT_CHARSET))), null); buffer = new char[reference.length ()]; for (int i = 0; i < part1.length (); i++) --- 257,261 ---- part3 = "to come to the aid of the party"; reference = part1 + part2 + part3; ! source = new InputStreamSource (new Stream (new ByteArrayInputStream (reference.getBytes (DEFAULT_CHARSET))), null); buffer = new char[reference.length ()]; for (int i = 0; i < part1.length (); i++) *************** *** 270,280 **** /** ! * Test ready. */ ! public void testReady () throws IOException { Source source; ! source = new Source (new Stream (new ByteArrayInputStream (new byte[] { (byte)0x42, (byte)0x62 })), null); assertTrue ("ready?", !source.ready ()); assertTrue ("erroneous character", 'B' == source.read ()); --- 272,282 ---- /** ! * Test ready of a InputStreamSource. */ ! public void testInputStreamSourceReady () throws IOException { Source source; ! source = new InputStreamSource (new Stream (new ByteArrayInputStream (new byte[] { (byte)0x42, (byte)0x62 })), null); assertTrue ("ready?", !source.ready ()); assertTrue ("erroneous character", 'B' == source.read ()); *************** *** 300,305 **** int index; ! // pick a big file ! link = "http://htmlparser.sourceforge.net/HTMLParser_Coverage.html"; try { --- 302,306 ---- int index; ! link = "http://htmlparser.sourceforge.net"; try { *************** *** 310,314 **** connection2 = url.openConnection (); connection2.connect (); ! source = new Source (new Stream (connection2.getInputStream ()), "UTF-8"); index = 0; while (-1 != (c1 = in.read ())) --- 311,315 ---- connection2 = url.openConnection (); connection2.connect (); ! source = new InputStreamSource (new Stream (connection2.getInputStream ()), "UTF-8"); index = 0; while (-1 != (c1 = in.read ())) *************** *** 329,331 **** --- 330,555 ---- } } + + /** + * Test initialization of a StringSource with a null value. + */ + public void testStringSourceNull () throws IOException + { + Source source; + + source = new StringSource (null); + assertTrue ("erroneous character", -1 == source.read ()); + } + + /** + * Test initialization of a StringSource with a zero length string. + */ + public void testStringSourceEmpty () throws IOException + { + Source source; + + source = new StringSource (""); + assertTrue ("erroneous character", -1 == source.read ()); + } + + /** + * Test initialization of a StringSource with a one character string. + */ + public void testStringSourceOneCharacter () throws IOException + { + Source source; + + source = new StringSource (new String ("B")); + assertTrue ("erroneous character", 'B' == source.read ()); + assertTrue ("extra character", -1 == source.read ()); + } + + /** + * Test closing a StringSource. + */ + public void testStringSourceClose () throws IOException + { + Source source; + + source = new StringSource ("hello word"); + assertTrue ("no character", -1 != source.read ()); + source.destroy (); + try + { + source.read (); + fail ("not closed"); + } + catch (IOException ioe) + { + // expected outcome + } + } + + /** + * Test resetting a StringSource. + */ + public void testStringSourceReset () throws IOException + { + String reference; + Source source; + StringBuffer buffer; + int c; + + reference = "Now is the time for all good men to come to the aid of the party"; + source = new StringSource (reference); + buffer = new StringBuffer (reference.length ()); + while (-1 != (c = source.read ())) + buffer.append ((char)c); + assertTrue ("string incorrect", reference.equals (buffer.toString ())); + source.reset (); + buffer.setLength (0); + while (-1 != (c = source.read ())) + buffer.append ((char)c); + assertTrue ("string incorrect", reference.equals (buffer.toString ())); + source.close (); + } + + /** + * Test resetting a StringSource in the middle of reading. + */ + public void testStringSourceMidReset () throws IOException + { + String reference; + Source source; + StringBuffer buffer; + int c; + + reference = "Now is the time for all good men to come to the aid of the party"; + source = new StringSource (reference); + buffer = new StringBuffer (reference.length ()); + for (int i = 0; i < 25; i++) + buffer.append ((char)source.read ()); + source.reset (); + for (int i = 0; i < 25; i++) + source.read (); + while (-1 != (c = source.read ())) + buffer.append ((char)c); + assertTrue ("string incorrect", reference.equals (buffer.toString ())); + source.close (); + } + + /** + * Test mark/reset of a StringSource in the middle of reading. + */ + public void testStringSourceMarkReset () throws IOException + { + String reference; + Source source; + StringBuffer buffer; + int c; + + reference = "Now is the time for all good men to come to the aid of the party"; + source = new StringSource (reference); + assertTrue ("not markable", source.markSupported ()); + buffer = new StringBuffer (reference.length ()); + for (int i = 0; i < 25; i++) + buffer.append ((char)source.read ()); + source.mark (88); + for (int i = 0; i < 25; i++) + source.read (); + source.reset (); + while (-1 != (c = source.read ())) + buffer.append ((char)c); + assertTrue ("string incorrect", reference.equals (buffer.toString ())); + source.close (); + } + + /** + * Test skipping a StringSource. + */ + public void testStringSourceSkip () throws IOException + { + String part1; + String part2; + String part3; + String reference; + Source source; + StringBuffer buffer; + int c; + + part1 = "Now is the time "; + part2 = "for all good men "; + part3 = "to come to the aid of the party"; + reference = part1 + part2 + part3; + source = new StringSource (reference); + buffer = new StringBuffer (reference.length ()); + for (int i = 0; i < part1.length (); i++) + buffer.append ((char)source.read ()); + source.skip (part2.length ()); + while (-1 != (c = source.read ())) + buffer.append ((char)c); + assertTrue ("string incorrect", (part1 + part3).equals (buffer.toString ())); + source.close (); + } + + /** + * Test multi-byte read with a StringSource. + */ + public void testStringSourceMultByte () throws IOException + { + String reference; + Source source; + char[] buffer; + + reference = "Now is the time for all good men to come to the aid of the party"; + source = new StringSource (reference); + buffer = new char[reference.length ()]; + source.read (buffer, 0, buffer.length); + assertTrue ("string incorrect", reference.equals (new String (buffer))); + assertTrue ("extra character", -1 == source.read ()); + source.close (); + } + + /** + * Test positioned multi-byte read with a StringSource. + */ + public void testStringSourcePositionedMultByte () throws IOException + { + String part1; + String part2; + String part3; + String reference; + Source source; + char[] buffer; + int c; + int length; + + part1 = "Now is the time "; + part2 = "for all good men "; + part3 = "to come to the aid of the party"; + reference = part1 + part2 + part3; + source = new StringSource (reference); + buffer = new char[reference.length ()]; + for (int i = 0; i < part1.length (); i++) + buffer[i] = (char)source.read (); + length = source.read (buffer, part1.length (), part2.length ()); + assertTrue ("incorrect length", part2.length () == length); + length += part1.length (); + for (int i = 0; i < part3.length (); i++) + buffer[i + length] = (char)source.read (); + assertTrue ("string incorrect", reference.equals (new String (buffer))); + assertTrue ("extra character", -1 == source.read ()); + source.close (); + } + + /** + * Test ready of a StringSource. + */ + public void testStringSourceReady () throws IOException + { + Source source; + + source = new StringSource ("Bb"); + assertTrue ("ready?", source.ready ()); + assertTrue ("erroneous character", 'B' == source.read ()); + assertTrue ("not ready", source.ready ()); + assertTrue ("erroneous character", 'b' == source.read ()); + assertTrue ("ready?", !source.ready ()); + assertTrue ("extra character", -1 == source.read ()); + } } |