Revision: 12086
http://sourceforge.net/p/foray/code/12086
Author: victormote
Date: 2021-11-18 15:56:31 +0000 (Thu, 18 Nov 2021)
Log Message:
-----------
Refactor Lexer tests to cover both the Java and ICU4J Lexers.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
Added Paths:
-----------
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java
Added: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java (rev 0)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java 2021-11-18 15:56:31 UTC (rev 12086)
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2019 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography;
+
+import org.foray.common.i18n.WritingSystem4a;
+
+import org.junit.Before;
+
+/**
+ * Tests of {@link LexerLatin1}.
+ */
+public class LexerEnglishIcu4jTests extends LexerEnglishTests {
+
+ /** The object under test. */
+ private LexerIcu4jBreakIterator out;
+
+ /**
+ * Setup the test.
+ */
+ @Before
+ public void setupTest() {
+ this.out = new LexerIcu4jBreakIterator(WritingSystem4a.USA);
+ }
+
+ @Override
+ public LexerIcu4jBreakIterator getObjectUnderTest() {
+ return this.out;
+ }
+
+}
Property changes on: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Added: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java (rev 0)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java 2021-11-18 15:56:31 UTC (rev 12086)
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2019 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography;
+
+import org.foray.common.i18n.WritingSystem4a;
+
+import org.junit.Before;
+
+/**
+ * Tests of {@link LexerLatin1}.
+ */
+public class LexerEnglishJavaTests extends LexerEnglishTests {
+
+ /** The object under test. */
+ private LexerJavaBreakIterator out;
+
+ /**
+ * Setup the test.
+ */
+ @Before
+ public void setupTest() {
+ this.out = new LexerJavaBreakIterator(WritingSystem4a.USA);
+ }
+
+ @Override
+ public LexerJavaBreakIterator getObjectUnderTest() {
+ return this.out;
+ }
+
+}
Property changes on: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2021-11-18 15:17:09 UTC (rev 12085)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2021-11-18 15:56:31 UTC (rev 12086)
@@ -28,10 +28,8 @@
package org.foray.orthography;
-import org.foray.common.i18n.WritingSystem4a;
-
import org.junit.Assert;
-import org.junit.Before;
+import org.junit.Ignore;
import org.junit.Test;
import java.util.List;
@@ -39,19 +37,13 @@
/**
* Tests of {@link LexerLatin1}.
*/
-public class LexerEnglishTests {
+public abstract class LexerEnglishTests {
- /** The object under test. */
- private LexerJavaBreakIterator out;
- /* TODO: This is temporarily referencing the wrong type. */
-
/**
- * Setup the test.
+ * Returns the Lexer object that is being tested.
+ * @return The Lexer object.
*/
- @Before
- public void setupTest() {
- this.out = new LexerJavaBreakIterator(WritingSystem4a.USA);
- }
+ public abstract Lexer4a getObjectUnderTest();
/**
* A simple test of {@link LexerLatin1#breakIntoWords(CharSequence)}.
@@ -59,7 +51,7 @@
@Test
public void testBreakSimple() {
final String testString = "Beware the ides of March.";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
Assert.assertEquals(10, actual.size());
Assert.assertEquals("Beware", actual.get(0));
Assert.assertEquals(" ", actual.get(1));
@@ -79,7 +71,7 @@
@Test
public void testMedium() {
final String testString = "39. It was the best of times. It was the worst of times. <----";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
Assert.assertEquals(27, actual.size());
Assert.assertEquals("39", actual.get(0));
Assert.assertEquals(". ", actual.get(1));
@@ -117,7 +109,7 @@
public void testWithCompoundWord() {
/* Spoken by Juliet, Romeo & Juliet, Act 3 Scene 2. */
final String testString = "Gallop apace, you fiery-footed steeds,";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
/* Compound word "fiery-footed" treated as one word. */
Assert.assertEquals(10, actual.size());
@@ -140,7 +132,7 @@
public void testWithMidWordContractionApostrophe() {
/* Spoken by Hamlet, Hamlet, Act 2, Scene 2. */
final String testString = "The play's the thing";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
Assert.assertEquals(7, actual.size());
Assert.assertEquals("The", actual.get(0));
Assert.assertEquals(" ", actual.get(1));
@@ -158,7 +150,7 @@
@Test
public void testWithSymbolsAsWords() {
final String testString = "! @ # $ %";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
Assert.assertEquals(9, actual.size());
Assert.assertEquals("!", actual.get(0));
Assert.assertEquals(" ", actual.get(1));
@@ -179,7 +171,7 @@
@Test
public void testWithAttachedPunctuation() {
final String testString = "Parentheses (as I stated earlier) are a matching pair of ( and ) characters.";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
Assert.assertEquals(28, actual.size());
Assert.assertEquals("Parentheses", actual.get(0));
Assert.assertEquals(" (", actual.get(1));
@@ -219,7 +211,7 @@
@Test
public void testUnicodeWordBoundariesExample() {
final String testString = "The quick (“brown”) fox can’t jump 32.3 feet, right?";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
Assert.assertEquals(18, actual.size());
Assert.assertEquals("The", actual.get(0));
Assert.assertEquals(" ", actual.get(1));
@@ -247,7 +239,7 @@
@Test
public void testCorner001() {
final String testString = "Appendix D.4)";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
Assert.assertEquals(4, actual.size());
Assert.assertEquals("Appendix", actual.get(0));
Assert.assertEquals(" ", actual.get(1));
@@ -255,4 +247,19 @@
Assert.assertEquals(")", actual.get(3));
}
+ /**
+ * Test.
+ */
+ @Test
+ @Ignore
+ public void testDoubleTrailingPunctuationAtEnd() {
+ final String testString = "every creature.”";
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
+ Assert.assertEquals(4, actual.size());
+ Assert.assertEquals("every", actual.get(0));
+ Assert.assertEquals(" ", actual.get(1));
+ Assert.assertEquals("creature", actual.get(2));
+ Assert.assertEquals(".”", actual.get(3));
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|