[FOray-commit] SF.net SVN: foray:[12388] trunk/foray
Modular XSL-FO Implementation for Java.
Status: Alpha
Brought to you by:
victormote
|
From: <vic...@us...> - 2022-01-13 13:16:08
|
Revision: 12388
http://sourceforge.net/p/foray/code/12388
Author: victormote
Date: 2022-01-13 13:16:05 +0000 (Thu, 13 Jan 2022)
Log Message:
-----------
1. Add tests for TokenFlow4a.extract(). 2. Improvements to Location marking.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/WellKnownConstants.java
trunk/foray/foray-common/src/main/java/org/foray/common/para/ParaBranch4aIterator.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/MutableTokenFlowLocation.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/TokenFlow4a.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/TokenFlow4aTests.java
trunk/foray/foray-pioneer/src/main/java/org/foray/pioneer/FoTextWordsPnr.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/WellKnownConstants.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/WellKnownConstants.java 2022-01-12 21:01:38 UTC (rev 12387)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/WellKnownConstants.java 2022-01-13 13:16:05 UTC (rev 12388)
@@ -54,16 +54,16 @@
* 16. */
public static final byte MAX_4_BIT_UNSIGNED_VALUES = 16;
- /** The maximum number of values that can be stored in 8 bits, that is,
- * 256. */
+ /** The maximum number of values that can be stored in 8 bits, that is, 256. */
public static final short MAX_8_BIT_UNSIGNED_VALUES = 256;
- /** The maximum number of values that can be stored in 7 bits, that is,
- * 128. */
+ /** The maximum number of values that can be stored in 7 unsigned bits, which is {@value}. */
public static final short MAX_7_BIT_UNSIGNED_VALUES = 128;
- /** The maximum number of values that can be stored in 16 bits, that is,
- * 65,536. */
+ /** The maximum value that can be stored in 7 unsigned bits, which is {@value}. */
+ public static final short MAX_7_BIT_UNSIGNED_VALUE = 127;
+
+ /** The maximum number of values that can be stored in 16 bits, that is, 65,536. */
public static final int MAX_16_BIT_UNSIGNED_VALUES = 65536;
/** The number of bytes in a long, that is, 8. */
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/para/ParaBranch4aIterator.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/para/ParaBranch4aIterator.java 2022-01-12 21:01:38 UTC (rev 12387)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/para/ParaBranch4aIterator.java 2022-01-13 13:16:05 UTC (rev 12388)
@@ -28,6 +28,7 @@
package org.foray.common.para;
+import org.foray.common.MarkedIndexOutOfBoundsException;
import org.foray.common.sequence.IntArrayBuilder;
import org.axsl.common.para.ParaBranch;
@@ -206,7 +207,30 @@
* @return The content of the branches stack at index {@code index}.
*/
public int branchIndexAt(final int index) {
- return this.branchIndexes.intAt(0);
+ return this.branchIndexes.intAt(index);
}
+ /**
+ * Advances to a given leaf index.
+ * @param newLeafIndex The leaf index to which the iterator should be advanced.
+ * @throws IllegalArgumentException If {@code newLeafIndex} is less than the current internal location of this
+ * iterator.
+ */
+ public void advanceToLeaf(final int newLeafIndex) {
+ if (newLeafIndex < 0
+ || newLeafIndex > this.paraLeafSize) {
+ throw new MarkedIndexOutOfBoundsException(newLeafIndex, this.paraLeafSize);
+ }
+ if (newLeafIndex < this.leafIndex) {
+ throw new IllegalArgumentException("Iterator is already past requested leaf index: " + newLeafIndex);
+ }
+ while (nextIndex() < newLeafIndex) {
+ if (hasNext()) {
+ next();
+ } else {
+ throw new IllegalStateException("Iterator cannot go past last element.");
+ }
+ }
+ }
+
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/MutableTokenFlowLocation.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/MutableTokenFlowLocation.java 2022-01-12 21:01:38 UTC (rev 12387)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/MutableTokenFlowLocation.java 2022-01-13 13:16:05 UTC (rev 12388)
@@ -35,6 +35,9 @@
*/
public class MutableTokenFlowLocation implements TokenFlow.Location {
+ /** String format for {@link #toString()}. */
+ private static final String TO_STRING_FORMAT = "[%d, %d, %d]";
+
/** The token index. See {@link TokenFlow.Location#getTokenIndex()}. */
private int tokenIndex;
@@ -82,6 +85,11 @@
}
}
+ @Override
+ public String toString() {
+ return String.format(TO_STRING_FORMAT, this.tokenIndex, this.segmentIndex, this.charIndex);
+ }
+
/**
* Marks the current token complete by incrementing the token index and setting the segmentIndex and charIndex to
* zero.
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/TokenFlow4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/TokenFlow4a.java 2022-01-12 21:01:38 UTC (rev 12387)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/TokenFlow4a.java 2022-01-13 13:16:05 UTC (rev 12388)
@@ -28,6 +28,7 @@
package org.foray.orthography;
+import org.foray.common.WellKnownConstants;
import org.foray.common.para.ParaBranch4a;
import org.foray.common.para.ParaBranch4aIterator;
@@ -59,6 +60,9 @@
* using the 32 bits as 20 bits for the token, 6 for the segment, and 6 for the char, adding 4 bits to the size
* of the token index. */
+ /** String format for {@link #toString()}. */
+ private static final String TO_STRING_FORMAT = "[%d, %d, %d]";
+
/** The token index. See {@link TokenFlow.Location#getTokenIndex()}. */
private char tokenIndex;
@@ -68,12 +72,31 @@
/** The char index. See {@link TokenFlow.Location#getCharIndex()}. */
private byte charIndex;
- Location(final int tokenIndex, final byte segmentIndex, final byte charIndex) {
- /* A char can index 65,536 tokens, which is probably 30,000 words, allowing half of the tokens to be
- * whitespace, and some others to be punctuation. */
+ /**
+ * Constructor.
+ * @param tokenIndex The token index, in the range 0 thru 65,535.
+ * Estimating that half of the tokens are whitespace, and a good chunk more are punctuation, this should allow
+ * 25,000 to 30,000 words, which, for now, we consider sufficient.
+ * @param segmentIndex The segment index, in the range 0 thru 127.
+ * @param charIndex The char index, in the range 0 thru 127.
+ */
+ Location(final int tokenIndex, final int segmentIndex, final int charIndex) {
+ if (tokenIndex < 0
+ || tokenIndex > Character.MAX_VALUE) {
+ throw new IllegalArgumentException("The tokenIndex is out of range: " + tokenIndex);
+ }
+ if (segmentIndex < 0
+ || segmentIndex > WellKnownConstants.MAX_7_BIT_UNSIGNED_VALUE) {
+ throw new IllegalArgumentException("The segmentIndex is out of range: " + segmentIndex);
+ }
+ if (charIndex < 0
+ || charIndex > WellKnownConstants.MAX_7_BIT_UNSIGNED_VALUE) {
+ throw new IllegalArgumentException("The charIndex is out of range: " + charIndex);
+ }
+
this.tokenIndex = (char) tokenIndex;
- this.segmentIndex = segmentIndex;
- this.charIndex = charIndex;
+ this.segmentIndex = (byte) segmentIndex;
+ this.charIndex = (byte) charIndex;
}
@Override
@@ -108,6 +131,11 @@
}
}
+ @Override
+ public String toString() {
+ return String.format(TO_STRING_FORMAT, (int) this.tokenIndex, this.segmentIndex, this.charIndex);
+ }
+
}
/** The tokens (children) of this text flow. */
@@ -187,7 +215,7 @@
}
@Override
- public TokenFlow.Location markLocation(final int tokenIndex, final byte segmentIndex, final byte charIndex) {
+ public TokenFlow.Location markLocation(final int tokenIndex, final int segmentIndex, final int charIndex) {
return new Location(tokenIndex, segmentIndex, charIndex);
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/TokenFlow4aTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/TokenFlow4aTests.java 2022-01-12 21:01:38 UTC (rev 12387)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/TokenFlow4aTests.java 2022-01-13 13:16:05 UTC (rev 12388)
@@ -28,10 +28,16 @@
package org.foray.orthography;
+import org.axsl.common.TextModifiers;
+import org.axsl.common.value.LinefeedTreatment;
+import org.axsl.common.value.TextTransform;
import org.axsl.common.value.WhiteSpaceTreatment;
+import org.axsl.fotree.text.FoTokenFlow;
+import org.axsl.orthography.TokenFlow;
import org.junit.Assert;
import org.junit.Test;
+import org.mockito.Mockito;
/**
* Tests of {@link TokenFlow4a}.
@@ -149,4 +155,59 @@
}
+ /**
+ * Tests of {@link TokenFlow4a#extract(FoTokenFlow.Location, FoTokenFlow.Location, TextModifiers, boolean,
+ * boolean)}.
+ */
+ @Test
+ public void extract_Test_001() {
+ /* William Shakespeare, "King Lear," Act iii, Scene 2. */
+ final TokenFlow4a out = new TokenFlow4a();
+ out.addToken(new StringWord(0, "I")); // Token 0
+ out.addToken(Whitespace4a.SPACE); // Token 1
+ out.addToken(new StringWord(0, "am")); // Token 2
+ out.addToken(Whitespace4a.SPACE); // Token 3
+ out.addToken(new StringWord(0, "a")); // Token 4
+ out.addToken(Whitespace4a.SPACE); // Token 5
+ out.addToken(new StringWord(0, "man")); // Token 6
+ out.addToken(Whitespace4a.LINE_FEED); // Token 7
+ out.addToken(new StringWord(0, "More")); // Token 8
+ out.addToken(Whitespace4a.SPACE); // Token 9
+ out.addToken(new StringWord(0, "sinn’d")); // Token 10
+ out.addToken(Whitespace4a.SPACE); // Token 11
+ out.addToken(new StringWord(0, "a", "gainst")); // Token 12
+ out.addToken(Whitespace4a.SPACE); // Token 13
+ out.addToken(new StringWord(0, "than")); // Token 14
+ out.addToken(Whitespace4a.SPACE); // Token 15
+ out.addToken(new StringWord(0, "sin", "ning")); // Token 16
+ out.addToken(Punctuation4a.PERIOD); // Token 17
+ Assert.assertEquals(18, out.qtyTokens());
+
+ final TextModifiers textModifiers = Mockito.mock(TextModifiers.class);
+ Mockito.when(textModifiers.traitLinefeedTreatment()).thenReturn(LinefeedTreatment.TREAT_AS_SPACE);
+ Mockito.when(textModifiers.traitTextTransform()).thenReturn(TextTransform.NONE);
+ Mockito.when(textModifiers.traitWhiteSpaceTreatment()).thenReturn(
+ WhiteSpaceTreatment.IGNORE_IF_SURROUNDING_LINEFEED);
+ Mockito.when(textModifiers.traitWhiteSpaceCollapse()).thenReturn(true);
+
+ /* Test extract starting at the start. */
+ TokenFlow.Location start = out.markLocation(0, (byte) 0, (byte) 0);
+ TokenFlow.Location end = out.markLocation(7, (byte) 0, (byte) 0);
+ Assert.assertEquals("I am a man", out.extract(start, end, textModifiers, false, false));
+
+ /* Test extract ending with a space. */
+ end = out.markLocation(8, (byte) 0, (byte) 0);
+ Assert.assertEquals("I am a man ", out.extract(start, end, textModifiers, false, false));
+
+ /* Test extract ending at the end. */
+ start = out.markLocation(8, (byte) 0, (byte) 0);
+ end = out.markLocation(18, (byte) 0, (byte) 0);
+ Assert.assertEquals("More sinn’d against than sinning.", out.extract(start, end, textModifiers, false, false));
+
+// /* Test extract starting in the middle of a word. */
+// start = out.markLocation(12, (byte) 2, (byte) 0);
+// end = out.markLocation(18, (byte) 0, (byte) 0);
+// Assert.assertEquals("gainst than sinning.", out.extract(start, end, textModifiers, false, false));
+ }
+
}
Modified: trunk/foray/foray-pioneer/src/main/java/org/foray/pioneer/FoTextWordsPnr.java
===================================================================
--- trunk/foray/foray-pioneer/src/main/java/org/foray/pioneer/FoTextWordsPnr.java 2022-01-12 21:01:38 UTC (rev 12387)
+++ trunk/foray/foray-pioneer/src/main/java/org/foray/pioneer/FoTextWordsPnr.java 2022-01-13 13:16:05 UTC (rev 12388)
@@ -77,7 +77,7 @@
final LineBreakResult result =
lb.breakIntoLines(this.node.getFoTokenFlow(), (ParaConfig) this.node, paraContext);
- FoTokenFlow.Location startLocation = this.node.getFoTokenFlow().markLocation(0, (byte) -1, (byte) -1);
+ FoTokenFlow.Location startLocation = this.node.getFoTokenFlow().markLocation(0, 0, 0);
/* Run a leaf iterator alongside the iteration of the line-break results, so that we can conveniently retrieve
* the branch indexes. */
final ParaBranch4aIterator iterator = new ParaBranch4aIterator(this.node.getFoTokenFlow());
@@ -84,18 +84,12 @@
for (int index = 0; index < result.getQtyLines(); index ++) {
final int currentLeafIndex = result.getBreakPosition(index);
final LineArea lineArea = normalBlockArea.makeLineArea(true);
- while (iterator.nextIndex() < currentLeafIndex) {
- if (iterator.hasNext()) {
- iterator.next();
- } else {
- throw new IllegalStateException("Iterator cannot go past last element.");
- }
- }
- /* Element 0 should be the token index. If there is an element 1, it should be the segment index. */
+ iterator.advanceToLeaf(currentLeafIndex);
+ /* Element 0 should be the token index. If there is an element 1, it is the segment index. */
final int tokenIndex = iterator.branchIndexAt(0);
- final byte segmentIndex = iterator.depth() > 1 ? (byte) iterator.branchIndexAt(1) : -1;
+ final int segmentIndex = iterator.depth() > 1 ? iterator.branchIndexAt(1) : 0;
final FoTokenFlow.Location endLocation =
- this.node.getFoTokenFlow().markLocation(tokenIndex, segmentIndex, (byte) -1);
+ this.node.getFoTokenFlow().markLocation(tokenIndex, segmentIndex, 0);
lineArea.makeGlyphAreaSequence(this.node, startLocation, endLocation, false);
startLocation = endLocation;
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|