[FOray-commit] SF.net SVN: foray:[12722] trunk/foray
Modular XSL-FO Implementation for Java.
Status: Alpha
Brought to you by:
victormote
|
From: <vic...@us...> - 2022-08-29 22:04:05
|
Revision: 12722
http://sourceforge.net/p/foray/code/12722
Author: victormote
Date: 2022-08-29 22:03:57 +0000 (Mon, 29 Aug 2022)
Log Message:
-----------
Handle punctuation inside of words better.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java 2022-08-29 21:08:57 UTC (rev 12721)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java 2022-08-29 22:03:57 UTC (rev 12722)
@@ -76,6 +76,8 @@
private static final String POSSIBLE_INTRAWORD_PUNCTUATION = new String(new char[] {
Basic_Latin_Block.APOSTROPHE,
General_Punctuation_Block.RIGHT_SINGLE_QUOTATION_MARK,
+ Basic_Latin_Block.LEFT_PARENTHESIS, //English example of alternate spelling:
+ Basic_Latin_Block.RIGHT_PARENTHESIS, // pa(e)leography
Basic_Latin_Block.FULL_STOP, //English example: "Section 8.16"
});
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2022-08-29 21:08:57 UTC (rev 12721)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2022-08-29 22:03:57 UTC (rev 12722)
@@ -105,6 +105,9 @@
* parenthesis. */
ATTACHED_TRAILING_PUNCTUATION,
+ /** Character is a either attached leading punctuation mark or intraword punctuation. */
+ ATTACHED_LEADING_OR_INTRAWORD_PUNCTUATION,
+
/** Character is a either attached trailing punctuation mark or intraword punctuation. */
ATTACHED_TRAILING_OR_INTRAWORD_PUNCTUATION,
@@ -254,6 +257,32 @@
}
break;
}
+ case ATTACHED_LEADING_OR_INTRAWORD_PUNCTUATION: {
+ switch (nextBreakType) {
+ case WORD_CHAR: {
+ switch (previousBreakType) {
+ case WORD_CHAR: {
+ /* This also is part of the word. */
+ breakTypes[breakIndex] = CharType.WORD_CHAR;
+ break;
+ }
+ default: {
+ breakTypes[breakIndex] = CharType.ATTACHED_LEADING_PUNCTUATION;
+ break;
+ }
+ }
+ break;
+ }
+ case ATTACHED_LEADING_PUNCTUATION: {
+ /* This is additional leading punctuation. */
+ breakTypes[breakIndex] = CharType.ATTACHED_LEADING_PUNCTUATION;
+ break;
+ }
+ default:
+ break;
+ }
+ break;
+ }
default: {
break;
}
@@ -466,7 +495,11 @@
return CharType.WORD_CHAR;
}
if (CharacterUtils.isAttachedLeadingPunctuation(c)) {
- return CharType.ATTACHED_LEADING_PUNCTUATION;
+ if (CharacterUtils.isPossibleIntrawordPunctuation(c)) {
+ return CharType.ATTACHED_LEADING_OR_INTRAWORD_PUNCTUATION;
+ } else {
+ return CharType.ATTACHED_LEADING_PUNCTUATION;
+ }
}
if (CharacterUtils.isAttachedTrailingPunctuation(c)) {
if (CharacterUtils.isPossibleIntrawordPunctuation(c)) {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|