Revision: 12055
http://sourceforge.net/p/foray/code/12055
Author: victormote
Date: 2021-11-13 17:58:04 +0000 (Sat, 13 Nov 2021)
Log Message:
-----------
Simplify line-break testing by removing dependency on orthography tokenizing.
Modified Paths:
--------------
trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/TotalFitLbTests.java
Added Paths:
-----------
trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/ParaBranch4aTestFixture.java
Added: trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/ParaBranch4aTestFixture.java
===================================================================
--- trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/ParaBranch4aTestFixture.java (rev 0)
+++ trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/ParaBranch4aTestFixture.java 2021-11-13 17:58:04 UTC (rev 12055)
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2021 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.linebreak;
+
+import org.foray.common.para.ParaBranch4a;
+import org.foray.orthography.StringWord;
+import org.foray.orthography.Whitespace4a;
+
+import org.axsl.common.para.ParaConfig;
+import org.axsl.common.para.ParaNode;
+
+import java.util.regex.Pattern;
+
+/**
+ * Adds some methods that simplify and clarify some tests.
+ */
+public class ParaBranch4aTestFixture extends ParaBranch4a {
+
+ /** Regex pattern used to break compound words into their components. */
+ private Pattern wordBreaker;
+
+ /**
+ * Constructor.
+ * @param config The configuration information for this branch.
+ * @param softHyphenChar The character used to mark syllable breaks.
+ */
+ public ParaBranch4aTestFixture(final ParaConfig config, final char softHyphenChar) {
+ super(config);
+ this.wordBreaker = Pattern.compile(Character.toString(softHyphenChar));
+ }
+
+ /**
+ * Add a word and space to this dictionary.
+ * @param wordChars The characters in the word, including raw hyphenation points, e.g. "run-ning", "ap-ple", etc.
+ */
+ public void addWordAndSpace(final CharSequence wordChars) {
+ addWord(wordChars);
+ this.add(Whitespace4a.SPACE);
+ }
+
+ /**
+ * Add a word and space to this dictionary.
+ * @param wordChars The characters in the word, including raw hyphenation points, e.g. "run-ning", "ap-ple", etc.
+ */
+ public void addWord(final CharSequence wordChars) {
+ final char partsOfSpeech = 0;
+ final String[] components = this.wordBreaker.split(wordChars);
+ final StringWord word = new StringWord(partsOfSpeech, components);
+ this.add(word);
+ }
+
+ public void addNodeAndSpace(final ParaNode node) {
+ this.add(node);
+ this.add(Whitespace4a.SPACE);
+ }
+
+}
Property changes on: trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/ParaBranch4aTestFixture.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Modified: trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/TotalFitLbTests.java
===================================================================
--- trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/TotalFitLbTests.java 2021-11-13 17:50:31 UTC (rev 12054)
+++ trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/TotalFitLbTests.java 2021-11-13 17:58:04 UTC (rev 12055)
@@ -32,10 +32,7 @@
import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.para.ParaBranch4a;
import org.foray.common.para.ParaConfig4a;
-import org.foray.orthography.Orthography4a;
-import org.foray.orthography.OrthographyServer4a;
-import org.foray.orthography.OrthographyServerConfig;
-import org.foray.orthography.StringWord;
+import org.foray.orthography.Punctuation4a;
import org.axsl.common.para.DiscretionaryHyphen;
import org.axsl.common.para.ParaConfig;
@@ -44,14 +41,11 @@
import org.axsl.font.FontUse;
import org.axsl.linebreak.LineBreakControl;
import org.axsl.linebreak.OutputLine;
-import org.axsl.orthography.OrthographyException;
import org.junit.Assert;
-import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;
-import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -66,32 +60,167 @@
public static final String TEST_STRING_01 =
"In olden times when wishing still helped one, there lived a king " +
"whose daughters were all beautiful; and the youngest was so beautiful that the sun itself, which has " +
- "seen so much, was astonished whenever it shone in her face. Close by the king\u2019s castle lay a great " +
+ "seen so much, was astonished whenever it shone in her face. Close by the king’s castle lay a great " +
"dark forest, and under an old lime-tree in the forest was a well, and when the day was very warm, the " +
- "king\u2019s child went out into the forest and sat down by the side of the cool fountain; and when she " +
+ "king’s child went out into the forest and sat down by the side of the cool fountain; and when she " +
"was bored she took a golden ball, and threw it up on high and caught it; and this ball was her favorite " +
"plaything.";
- /** The orthography server doing the basic parsing. */
- private OrthographyServer4a orthographyServer;
-
- /** The paragraph configuration to be used for these tests. */
- private ParaConfig paraConfig;
-
/**
- * Setup the test.
- * @throws IOException For errors creating the URLs to the various resources.
- * @throws OrthographyException For errors creating a hyphenation server.
+ * Creates the tokenized content of {@link #TEST_STRING_01}.
+ * We could use the orthography system to build this, but the line-breaking logic is independent of that, so we
+ * manually build it in this method.
+ * @return The tokenized content of {@link #TEST_STRING_01}.
*/
- @Before
- public void setup() throws IOException, OrthographyException {
- final OrthographyServerConfig config = LbTestUtilities.createHyphenationServerConfig();
- this.orthographyServer = new OrthographyServer4a(config);
+ private ParaBranch4a createPara() {
+ final ParaBranch4aTestFixture para = new ParaBranch4aTestFixture(null, '-');
+ para.addWordAndSpace("In");
+ para.addWordAndSpace("olden");
+ para.addWordAndSpace("times");
+ para.addWordAndSpace("when");
+ para.addWordAndSpace("wish-ing"); // 2 segments
+ para.addWordAndSpace("still");
+ para.addWordAndSpace("helped");
+ para.addWord("one");
+ para.addNodeAndSpace(Punctuation4a.COMMA);
+ para.addWordAndSpace("there");
+ para.addWordAndSpace("lived");
+ para.addWordAndSpace("a");
+ para.addWordAndSpace("king");
+ para.addWordAndSpace("whose");
+ para.addWordAndSpace("daugh-ters"); // 2 segments
+ para.addWordAndSpace("were");
+ para.addWordAndSpace("all");
+ para.addWord("beau-ti-ful"); // 3 segments
+ para.addNodeAndSpace(Punctuation4a.SEMICOLON);
+ para.addWordAndSpace("and");
+ para.addWordAndSpace("the");
+ para.addWordAndSpace("young-est"); // 2 segments
+ para.addWordAndSpace("was");
+ para.addWordAndSpace("so");
+ para.addWordAndSpace("beau-ti-ful"); // 3 segments
+ para.addWordAndSpace("that");
+ para.addWordAndSpace("the");
+ para.addWordAndSpace("sun");
+ para.addWord("it-self"); // 2 segments
+ para.addNodeAndSpace(Punctuation4a.COMMA);
+ para.addWordAndSpace("which");
+ para.addWordAndSpace("has");
+ para.addWordAndSpace("seen");
+ para.addWordAndSpace("so");
+ para.addWord("much");
+ para.addNodeAndSpace(Punctuation4a.COMMA);
+ para.addWordAndSpace("was");
+ para.addWordAndSpace("aston-ished"); // 2 segments
+ para.addWordAndSpace("when-ever");
+ para.addWordAndSpace("it");
+ para.addWordAndSpace("shone");
+ para.addWordAndSpace("in");
+ para.addWordAndSpace("her");
+ para.addWord("face");
+ para.addNodeAndSpace(Punctuation4a.PERIOD);
+ para.addWordAndSpace("Close");
+ para.addWordAndSpace("by");
+ para.addWordAndSpace("the");
+ para.addWordAndSpace("king’s");
+ para.addWordAndSpace("castle");
+ para.addWordAndSpace("lay");
+ para.addWordAndSpace("a");
+ para.addWordAndSpace("great");
+ para.addWordAndSpace("dark");
+ para.addWord("for-est"); // 2 segments
+ para.addNodeAndSpace(Punctuation4a.COMMA);
+ para.addWordAndSpace("and");
+ para.addWordAndSpace("un-der"); // 2 segments
+ para.addWordAndSpace("an");
+ para.addWordAndSpace("old");
+ para.addWord("lime");
+ para.add(Punctuation4a.MANDATORY_HYPHEN);
+ para.addWordAndSpace("tree");
+ para.addWordAndSpace("in");
+ para.addWordAndSpace("the");
+ para.addWordAndSpace("for-est"); // 2 segments
+ para.addWordAndSpace("was");
+ para.addWordAndSpace("a");
+ para.addWord("well");
+ para.addNodeAndSpace(Punctuation4a.COMMA);
+ para.addWordAndSpace("and");
+ para.addWordAndSpace("when");
+ para.addWordAndSpace("the");
+ para.addWordAndSpace("day");
+ para.addWordAndSpace("was");
+ para.addWordAndSpace("very");
+ para.addWord("warm");
+ para.addNodeAndSpace(Punctuation4a.COMMA);
+ para.addWordAndSpace("the");
+ para.addWordAndSpace("king’s");
+ para.addWordAndSpace("child");
+ para.addWordAndSpace("went");
+ para.addWordAndSpace("out");
+ para.addWordAndSpace("into");
+ para.addWordAndSpace("the");
+ para.addWordAndSpace("for-est"); // 2 segments
+ para.addWordAndSpace("and");
+ para.addWordAndSpace("sat");
+ para.addWordAndSpace("down");
+ para.addWordAndSpace("by");
+ para.addWordAndSpace("the");
+ para.addWordAndSpace("side");
+ para.addWordAndSpace("of");
+ para.addWordAndSpace("the");
+ para.addWordAndSpace("cool");
+ para.addWord("foun-tain"); // 2 segments
+ para.addNodeAndSpace(Punctuation4a.SEMICOLON);
+ para.addWordAndSpace("and");
+ para.addWordAndSpace("when");
+ para.addWordAndSpace("she");
+ para.addWordAndSpace("was");
+ para.addWordAndSpace("bored");
+ para.addWordAndSpace("she");
+ para.addWordAndSpace("took");
+ para.addWordAndSpace("a");
+ para.addWordAndSpace("golden");
+ para.addWord("ball");
+ para.addNodeAndSpace(Punctuation4a.COMMA);
+ para.addWordAndSpace("and");
+ para.addWordAndSpace("threw");
+ para.addWordAndSpace("it");
+ para.addWordAndSpace("up");
+ para.addWordAndSpace("on");
+ para.addWordAndSpace("high");
+ para.addWordAndSpace("and");
+ para.addWordAndSpace("caught");
+ para.addWord("it;");
+ para.addNodeAndSpace(Punctuation4a.SEMICOLON);
+ para.addWordAndSpace("and");
+ para.addWordAndSpace("this");
+ para.addWordAndSpace("ball");
+ para.addWordAndSpace("was");
+ para.addWordAndSpace("her");
+ para.addWordAndSpace("favor-ite"); // 2 segments
+ para.addWordAndSpace("play-thing"); // 2 segments
+ para.add(Punctuation4a.PERIOD);
final Font font = LbTestUtilities.createMonotypeFont();
final FontUse fontUse = Mockito.mock(FontUse.class);
Mockito.when(fontUse.getFont()).thenReturn(font);
- this.paraConfig = new ParaConfig4a(fontUse, 10, WritingSystem4a.USA, 0, 0);
+ final ParaConfig paraConfig = new ParaConfig4a(fontUse, 10, WritingSystem4a.USA, 0, 0);
+ para.setParaConfig(paraConfig);
+
+ /* The tokenized text treated "’s" as part of the attached word.
+ * The compound word "lime-tree" was treated as two words divided by punctuation.
+ * There are 126 lines of code creating the para.
+ * Count Nodes
+ * addWord 12 12
+ * addNodeAndSpace 11 22
+ * add 2 2
+ * addWordAndSpace (forced) 101 202
+ * ----- -----
+ * totals 126 238
+ * ===== ===== */
+
+ Assert.assertEquals(238, para.getQtyParaNodeChildren());
+ return para;
}
/**
@@ -100,24 +229,8 @@
@Test
public void bestFitTest1() {
/* TODO: THE CLASS BEING TESTED IS VERY INCOMPLETE, AND SO IS THIS TEST !!!!!! */
- final Orthography4a orthography = this.orthographyServer.getOrthography(WritingSystem4a.USA);
- final ParaBranch4a paragraph =
- orthography.tokenizeWordSequence(TEST_STRING_01, 0, TEST_STRING_01.length());
- paragraph.setParaConfig(paraConfig);
-
- /* Make manual changes to get the paragraph features identical to our baseline paragraph, as documented
- * in the Knuth-Plass article. These are all cases where the native hyphenation opportunies violate the
- * "leave at least 2, and push at least 3" general rule. */
- paragraph.setParaNodeChild(2, new StringWord(0, "olden")); // Native = old-en
- paragraph.setParaNodeChild(66, new StringWord(0, "aston", "ished")); // Native = as-ton-ish-ed
- paragraph.setParaNodeChild(68, new StringWord(0, "when", "ever")); // Native = when-ev-er
- paragraph.setParaNodeChild(88, new StringWord(0, "castle")); // Native = cas-tle
- paragraph.setParaNodeChild(134, new StringWord(0, "very")); // Native = ver-y
- paragraph.setParaNodeChild(148, new StringWord(0, "into")); // Native = in-to
- paragraph.setParaNodeChild(190, new StringWord(0, "golden")); // Native = gold-en
- paragraph.setParaNodeChild(222, new StringWord(0, "favor", "ite")); // Native = fa-vor-ite
-
+ final ParaBranch4a content = this.createPara();
final LineBreakControl lbControl = Mockito.mock(LineBreakControl.class);
final OutputLine lineOutput = Mockito.mock(OutputLine.class);
/* Page is 8.5 inches wide, 1 inch margin on both ends, leaving 6.5 inches, or 468,000 millipoints. */
@@ -126,19 +239,12 @@
final ParaControl paraControl = Mockito.mock(ParaControl.class);
Mockito.when(paraControl.getCost(Mockito.any(DiscretionaryHyphen.Quality.class))).thenReturn(50);
final TotalFitLb out = new TotalFitLb();
- final List<OrderedTreePath4a> actual = out.breakIntoLines(paragraph, paraControl, lbControl);
+ final List<OrderedTreePath4a> actual = out.breakIntoLines(content, paraControl, lbControl);
- /* For now, treat "’s" as interword content of an apostrophe, followed by a 1-syllable word. */
- /* Words: 98 1-syllable, 13 2-syllable, 2 3-syllable, total of 114 words
- * 98 x 2 = 196, 13 x 4 = 52, 2 x 6 = 12, subtotal of 260 nodes.
- * Special Interword: 11 normal, 2 "’s", 1 ="-", 1 terminating period (no space after).
- * 11 x 2 = 22, 2 x 3 = 6, 1 x 1 = 1, 1 x 1 = 1, subtotal of 30 nodes.
- * Grand total of 290 nodes. */
-// Assert.assertEquals(290, out.getContentNodes().size());
final List<OrderedTreePath4a> expected = new ArrayList<OrderedTreePath4a>();
-// final int[] expectedArray = {};
-// expected.add(new OrderedTreePath4a(new IntArray(expectedArray)));
+//// final int[] expectedArray = {};
+//// expected.add(new OrderedTreePath4a(new IntArray(expectedArray)));
Assert.assertArrayEquals(expected.toArray(), actual.toArray());
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|