Revision: 12282
http://sourceforge.net/p/foray/code/12282
Author: victormote
Date: 2021-12-28 00:40:43 +0000 (Tue, 28 Dec 2021)
Log Message:
-----------
Change acquisition of marker for invalid Unicode character.
Modified Paths:
--------------
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoRefinedText4a.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoText4a.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoWordSequenceContent.java
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoRefinedText4a.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoRefinedText4a.java 2021-12-27 17:37:20 UTC (rev 12281)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoRefinedText4a.java 2021-12-28 00:40:43 UTC (rev 12282)
@@ -28,6 +28,7 @@
package org.foray.fotree.fo.obj;
+import org.foray.common.WellKnownConstants;
import org.foray.common.primitive.XmlCharacterUtils;
import org.foray.common.sequence.IntArrayBuilder;
@@ -74,7 +75,7 @@
/* Check linefeed-treatment. */
final char raw = wrapped.charAt(index);
final char filtered = FoText4a.applyLinefeedTreatment(raw, linefeedTreatment);
- if (filtered == FoTextCharacters4a.DISCARD_CHAR) {
+ if (filtered == WellKnownConstants.INVALID_UNICODE_CHARACTER) {
deleted.append(index);
} else if (filtered != raw) {
changed.append(index);
@@ -97,7 +98,7 @@
final int charBefore = index == 0 ? codepointBefore : wrapped.charAt(index - 1);
final int charAfter = index == wrapped.length() - 1 ? codepointAfter : wrapped.charAt(index + 1);
final char filtered = FoText4a.applyWhiteSpaceCollapse(charBefore, raw, charAfter);
- if (filtered == FoTextCharacters4a.DISCARD_CHAR) {
+ if (filtered == WellKnownConstants.INVALID_UNICODE_CHARACTER) {
deleted.insertSortedUnique(index);
} else if (filtered != raw) {
final int changeIndex = changed.insertSortedUnique(index);
@@ -128,7 +129,7 @@
}
if (this.deleted.binarySearch(realIndex) > -1) {
- return FoTextCharacters4a.DISCARD_CHAR;
+ return WellKnownConstants.INVALID_UNICODE_CHARACTER;
}
final int deltaIndex = this.changed.binarySearch(realIndex);
final char filteredChar = deltaIndex > -1 ? this.deltas.charAt(deltaIndex) : this.wrapped.charAt(realIndex);
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoText4a.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoText4a.java 2021-12-27 17:37:20 UTC (rev 12281)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoText4a.java 2021-12-28 00:40:43 UTC (rev 12282)
@@ -27,6 +27,7 @@
*/
package org.foray.fotree.fo.obj;
+import org.foray.common.WellKnownConstants;
import org.foray.common.primitive.XmlCharacterUtils;
import org.foray.fotree.FoObj;
import org.foray.fotree.Namespace;
@@ -48,9 +49,6 @@
*/
public abstract class FoText4a extends FoObj implements FoText {
- /** Constant indicating that a given character should be discarded. */
- public static final char DISCARD_CHAR = 0xFFFF;
-
/**
* The resolved primary Font.
* It is very, VERY tempting to think that we don't need to cache this here, since this is inherited from the
@@ -180,8 +178,9 @@
* @param index Index into charArray pointing to a char element that is to be converted.
* @param whiteSpaceTreatment The value of the whitespace-treatment trait.
* @return The character to which the charArray[index] should be converted for white-space-treatment purposes.
- * Returns {@link #DISCARD_CHAR} if charArray[index] should be discarded.
- * Note that {@link #DISCARD_CHAR} is not a valid Unicode codepoint, and should therefore never be in the input.
+ * Returns {@link WellKnownConstants#INVALID_UNICODE_CHARACTER} if charArray[index] should be discarded.
+ * Note that {@link WellKnownConstants#INVALID_UNICODE_CHARACTER} is not a valid Unicode codepoint, and should
+ * therefore never be in the input.
*/
public static char applyWhiteSpaceTreatment(final CharSequence charArray, final int index,
final WhiteSpaceTreatment whiteSpaceTreatment) {
@@ -188,7 +187,7 @@
/* The char immediately before. */
final char before;
if (index == 0) {
- before = DISCARD_CHAR;
+ before = WellKnownConstants.INVALID_UNICODE_CHARACTER;
} else {
before = charArray.charAt(index - 1);
}
@@ -199,7 +198,7 @@
/* The char immediately after. */
final char after;
if (index == charArray.length() - 1) {
- after = DISCARD_CHAR;
+ after = WellKnownConstants.INVALID_UNICODE_CHARACTER;
} else {
after = charArray.charAt(index + 1);
}
@@ -213,8 +212,10 @@
* @param c The char being tested.
* @param charAfter The char immediately after the char being tested, or -1 of there is none.
* @param whiteSpaceTreatment The value of the whitespace-treatment trait.
- * @return The char at this index, or {@link #DISCARD_CHAR} if this char should be discarded.
- * Note that {@link #DISCARD_CHAR} is not a valid Unicode codepoint, and should therefore never be in the input.
+ * @return The char at this index, or {@link WellKnownConstants#INVALID_UNICODE_CHARACTER} if this char should be
+ * discarded.
+ * Note that {@link WellKnownConstants#INVALID_UNICODE_CHARACTER} is not a valid Unicode codepoint, and should
+ * therefore never be in the input.
*/
public static char applyWhiteSpaceTreatment(final char charBefore, final char c, final char charAfter,
final WhiteSpaceTreatment whiteSpaceTreatment) {
@@ -227,42 +228,42 @@
return c;
}
switch (whiteSpaceTreatment) {
- case IGNORE: return FoTextCharacters4a.DISCARD_CHAR;
+ case IGNORE: return WellKnownConstants.INVALID_UNICODE_CHARACTER;
case PRESERVE: return ' ';
case IGNORE_IF_BEFORE_LINEFEED: {
/* If last element, no change needed. */
- if (charAfter == FoTextCharacters4a.DISCARD_CHAR) {
+ if (charAfter == WellKnownConstants.INVALID_UNICODE_CHARACTER) {
return c;
}
if (charAfter != Basic_Latin_Block.CONTROL_LINE_FEED) {
return c;
}
- return FoTextCharacters4a.DISCARD_CHAR;
+ return WellKnownConstants.INVALID_UNICODE_CHARACTER;
}
case IGNORE_IF_AFTER_LINEFEED: {
/* If first element, no change needed. */
- if (charBefore == FoTextCharacters4a.DISCARD_CHAR) {
+ if (charBefore == WellKnownConstants.INVALID_UNICODE_CHARACTER) {
return c;
}
if (charBefore != Basic_Latin_Block.CONTROL_LINE_FEED) {
return c;
}
- return FoTextCharacters4a.DISCARD_CHAR;
+ return WellKnownConstants.INVALID_UNICODE_CHARACTER;
}
/* default handles "initial" value IGNORE_IF_SURROUNDING_LINEFEED */
default: {
/* If first element, only check next. */
- if (charBefore == FoTextCharacters4a.DISCARD_CHAR) {
+ if (charBefore == WellKnownConstants.INVALID_UNICODE_CHARACTER) {
if (charAfter == Basic_Latin_Block.CONTROL_LINE_FEED) {
- return FoTextCharacters4a.DISCARD_CHAR;
+ return WellKnownConstants.INVALID_UNICODE_CHARACTER;
}
return c;
}
/* If last element, only check previous. */
- if (charAfter == FoTextCharacters4a.DISCARD_CHAR) {
+ if (charAfter == WellKnownConstants.INVALID_UNICODE_CHARACTER) {
if (charBefore == Basic_Latin_Block.CONTROL_LINE_FEED) {
- return FoTextCharacters4a.DISCARD_CHAR;
+ return WellKnownConstants.INVALID_UNICODE_CHARACTER;
}
return c;
}
@@ -269,7 +270,7 @@
/* Otherwise, check both. */
if (charBefore == Basic_Latin_Block.CONTROL_LINE_FEED
|| charAfter == Basic_Latin_Block.CONTROL_LINE_FEED) {
- return FoTextCharacters4a.DISCARD_CHAR;
+ return WellKnownConstants.INVALID_UNICODE_CHARACTER;
}
return c;
}
@@ -313,7 +314,7 @@
final char c = charArray.charAt(i);
final char conversionChar = applyWhiteSpaceTreatment(charArray, i,
whiteSpaceTreatment);
- if (conversionChar == FoTextCharacters4a.DISCARD_CHAR) {
+ if (conversionChar == WellKnownConstants.INVALID_UNICODE_CHARACTER) {
discardElements ++;
} else if (conversionChar != c) {
changeElements ++;
@@ -332,7 +333,7 @@
for (int i = 0; i < charArray.length(); i++) {
final char conversionChar = applyWhiteSpaceTreatment(charArray,
i, whiteSpaceTreatment);
- if (conversionChar == FoTextCharacters4a.DISCARD_CHAR) {
+ if (conversionChar == WellKnownConstants.INVALID_UNICODE_CHARACTER) {
discardElements ++;
} else {
returnArray[i - discardElements] = conversionChar;
@@ -347,8 +348,8 @@
* Applies the linefeed treatment to a specific character.
* @param c The character being processed, which should always be a linefeed character.
* @param linefeedTreatment The value of the linefeed-treatment trait.
- * @return The value by which the linefeed character should be replaced, or the marker {@link #DISCARD_CHAR}
- * indicating that the character should be removed.
+ * @return The value by which the linefeed character should be replaced, or the marker
+ * {@link WellKnownConstants#INVALID_UNICODE_CHARACTER} indicating that the character should be removed.
*/
public static char applyLinefeedTreatment(final char c, final LinefeedTreatment linefeedTreatment) {
if (c != Basic_Latin_Block.CONTROL_LINE_FEED) {
@@ -356,7 +357,7 @@
return c;
}
switch (linefeedTreatment) {
- case IGNORE: return FoTextCharacters4a.DISCARD_CHAR;
+ case IGNORE: return WellKnownConstants.INVALID_UNICODE_CHARACTER;
case TREAT_AS_SPACE: return ' ';
case TREAT_AS_ZERO_WIDTH_SPACE: return '\u200b';
// default handles the "initial" value of "preserve"
@@ -418,7 +419,7 @@
if (c == Basic_Latin_Block.CONTROL_LINE_FEED) {
final char conversionChar = applyLinefeedTreatment(c,
linefeedTreatment);
- if (conversionChar == FoTextCharacters4a.DISCARD_CHAR) {
+ if (conversionChar == WellKnownConstants.INVALID_UNICODE_CHARACTER) {
discardElements ++;
} else {
returnArray[i - discardElements] = conversionChar;
@@ -486,14 +487,16 @@
* should <em>not</em> be considered.</p>
* @param charArray The char[] that is being converted.
* @param index The 0-based index into charArray that points to the char that is being tested.
- * @return {@link #DISCARD_CHAR} if the previous character is whitespace or if the next character is a line-feed.
+ * @return {@link WellKnownConstants#INVALID_UNICODE_CHARACTER} if the previous character is whitespace or if the
+ * next character is a line-feed.
* If neither of these is true, returns the original character.
*/
public static char applyWhiteSpaceCollapse(final CharSequence charArray, final int index) {
- final char charBefore = (index == 0) ? AbstractCharacterSequence.DISCARD_CHAR : charArray.charAt(index - 1);
+ final char charBefore = (index == 0) ? WellKnownConstants.INVALID_UNICODE_CHARACTER :
+ charArray.charAt(index - 1);
final char c = charArray.charAt(index);
final char charAfter = (index == charArray.length() - 1) ?
- AbstractCharacterSequence.DISCARD_CHAR : charArray.charAt(index + 1);
+ WellKnownConstants.INVALID_UNICODE_CHARACTER : charArray.charAt(index + 1);
return applyWhiteSpaceCollapse(charBefore, c, charAfter);
}
@@ -502,7 +505,8 @@
* @param codepointBefore The codepoint before c, or -1 if there is none.
* @param c The character being considered for conversion.
* @param codepointAfter The codepoint after c, or if there is none.
- * @return {@link #DISCARD_CHAR} if the previous character is whitespace or if the next character is a line-feed.
+ * @return {@link WellKnownConstants#INVALID_UNICODE_CHARACTER} if the previous character is whitespace or if the
+ * next character is a line-feed.
* If neither of these is true, returns the original character.
*/
public static char applyWhiteSpaceCollapse(final int codepointBefore, final char c, final int codepointAfter) {
@@ -514,12 +518,12 @@
/* If it is a linefeed, nothing should change. */
return c;
}
- if (codepointBefore != AbstractCharacterSequence.DISCARD_CHAR
+ if (codepointBefore != WellKnownConstants.INVALID_UNICODE_CHARACTER
&& XmlCharacterUtils.isXMLWhitespace(codepointBefore)) {
- return FoTextCharacters4a.DISCARD_CHAR;
+ return WellKnownConstants.INVALID_UNICODE_CHARACTER;
}
if (codepointAfter == Basic_Latin_Block.CONTROL_LINE_FEED) {
- return FoTextCharacters4a.DISCARD_CHAR;
+ return WellKnownConstants.INVALID_UNICODE_CHARACTER;
}
return c;
}
@@ -544,7 +548,7 @@
int discardElements = 0;
for (int i = 0; i < charArray.length(); i++) {
final char convertChar = applyWhiteSpaceCollapse(charArray, i);
- if (convertChar == FoTextCharacters4a.DISCARD_CHAR) {
+ if (convertChar == WellKnownConstants.INVALID_UNICODE_CHARACTER) {
discardElements ++;
}
}
@@ -559,7 +563,7 @@
discardElements = 0;
for (int i = 0; i < charArray.length(); i++) {
final char conversionChar = applyWhiteSpaceCollapse(charArray, i);
- if (conversionChar == FoTextCharacters4a.DISCARD_CHAR) {
+ if (conversionChar == WellKnownConstants.INVALID_UNICODE_CHARACTER) {
discardElements ++;
} else {
returnArray[i - discardElements] = conversionChar;
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoWordSequenceContent.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoWordSequenceContent.java 2021-12-27 17:37:20 UTC (rev 12281)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoWordSequenceContent.java 2021-12-28 00:40:43 UTC (rev 12282)
@@ -28,6 +28,8 @@
package org.foray.fotree.fo.obj;
+import org.foray.common.WellKnownConstants;
+
import org.axsl.common.para.ParaNode;
import org.axsl.common.value.LinefeedTreatment;
import org.axsl.common.value.TextTransform;
@@ -76,14 +78,15 @@
int length = 0;
for (int index = 0; index < length(); index ++) {
/* The char immediately before. */
- final char before = (index == 0) ? FoTextCharacters4a.DISCARD_CHAR : charAt(index - 1);
+ final char before = (index == 0) ? WellKnownConstants.INVALID_UNICODE_CHARACTER : charAt(index - 1);
/* The char we want to test. */
final char c = charAt(index);
/* The char immediately after. */
- final char after = (index == length() - 1) ? FoTextCharacters4a.DISCARD_CHAR : charAt(index + 1);
+ final char after = (index == length() - 1) ? WellKnownConstants.INVALID_UNICODE_CHARACTER :
+ charAt(index + 1);
final char converted = FoTextCharacters4a.applyWhiteSpaceTreatment(before, c, after, whiteSpaceTreatment);
- if (converted != FoTextCharacters4a.DISCARD_CHAR) {
+ if (converted != WellKnownConstants.INVALID_UNICODE_CHARACTER) {
length ++;
}
}
@@ -102,14 +105,15 @@
int currentIndex = -1;
for (int index = 0; index < length(); index ++) {
/* The char immediately before. */
- final char before = (index == 0) ? FoTextCharacters4a.DISCARD_CHAR : charAt(index - 1);
+ final char before = (index == 0) ? WellKnownConstants.INVALID_UNICODE_CHARACTER : charAt(index - 1);
/* The char we want to test. */
final char c = charAt(index);
/* The char immediately after. */
- final char after = (index == length() - 1) ? FoTextCharacters4a.DISCARD_CHAR : charAt(index + 1);
+ final char after = (index == length() - 1) ? WellKnownConstants.INVALID_UNICODE_CHARACTER :
+ charAt(index + 1);
final char converted = FoTextCharacters4a.applyWhiteSpaceTreatment(before, c, after, whiteSpaceTreatment);
- if (converted != FoTextCharacters4a.DISCARD_CHAR) {
+ if (converted != WellKnownConstants.INVALID_UNICODE_CHARACTER) {
currentIndex ++;
}
if (currentIndex == whiteSpaceTreatmentIndex) {
@@ -133,7 +137,7 @@
for (int index = 0; index < whiteSpaceTreatmentLength(whiteSpaceTreatment); index ++) {
final char c = charAt(index);
final char converted = FoTextCharacters4a.applyLinefeedTreatment(c, linefeedTreatment);
- if (converted != FoTextCharacters4a.DISCARD_CHAR) {
+ if (converted != WellKnownConstants.INVALID_UNICODE_CHARACTER) {
length ++;
}
}
@@ -156,7 +160,7 @@
for (int index = 0; index < whiteSpaceTreatmentLength; index ++) {
final char c = charAt(index);
final char converted = FoTextCharacters4a.applyLinefeedTreatment(c, linefeedTreatment);
- if (converted != FoTextCharacters4a.DISCARD_CHAR) {
+ if (converted != WellKnownConstants.INVALID_UNICODE_CHARACTER) {
currentIndex ++;
}
if (currentIndex == linefeedTreatmentIndex) {
@@ -211,14 +215,14 @@
int length = 0;
final int textTransformLength = textTransformLength(whiteSpaceTreatment, linefeedTreatment, textTransform);
for (int index = 0; index < textTransformLength; index ++) {
- final char charBefore = (index == 0) ? AbstractCharacterSequence.DISCARD_CHAR :
+ final char charBefore = (index == 0) ? WellKnownConstants.INVALID_UNICODE_CHARACTER :
textTransformCharAt(whiteSpaceTreatment, linefeedTreatment, textTransform, index - 1);
final char c = textTransformCharAt(whiteSpaceTreatment, linefeedTreatment, textTransform, index);
final char charAfter = (index == textTransformLength - 1) ?
- AbstractCharacterSequence.DISCARD_CHAR :
+ WellKnownConstants.INVALID_UNICODE_CHARACTER :
textTransformCharAt(whiteSpaceTreatment, linefeedTreatment, textTransform, index + 1);
final char converted = AbstractCharacterSequence.applyWhiteSpaceCollapse(charBefore, c, charAfter);
- if (converted != FoTextCharacters4a.DISCARD_CHAR) {
+ if (converted != WellKnownConstants.INVALID_UNICODE_CHARACTER) {
length ++;
}
}
@@ -242,14 +246,14 @@
int currentIndex = -1;
final int textTransformLength = textTransformLength(whiteSpaceTreatment, linefeedTreatment, textTransform);
for (int index = 0; index < textTransformLength; index ++) {
- final char charBefore = (index == 0) ? AbstractCharacterSequence.DISCARD_CHAR :
+ final char charBefore = (index == 0) ? WellKnownConstants.INVALID_UNICODE_CHARACTER :
textTransformCharAt(whiteSpaceTreatment, linefeedTreatment, textTransform, index - 1);
final char c = textTransformCharAt(whiteSpaceTreatment, linefeedTreatment, textTransform, index);
final char charAfter = (index == textTransformLength - 1) ?
- AbstractCharacterSequence.DISCARD_CHAR :
+ WellKnownConstants.INVALID_UNICODE_CHARACTER :
textTransformCharAt(whiteSpaceTreatment, linefeedTreatment, textTransform, index + 1);
final char converted = AbstractCharacterSequence.applyWhiteSpaceCollapse(charBefore, c, charAfter);
- if (converted != FoTextCharacters4a.DISCARD_CHAR) {
+ if (converted != WellKnownConstants.INVALID_UNICODE_CHARACTER) {
currentIndex ++;
}
if (currentIndex == whiteSpaceCollapseIndex) {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|