foray-commit Mailing List for FOray (Page 72)
Modular XSL-FO Implementation for Java.
Status: Alpha
Brought to you by:
victormote
You can subscribe to this list here.
| 2006 |
Jan
|
Feb
|
Mar
(139) |
Apr
(98) |
May
(250) |
Jun
(394) |
Jul
(84) |
Aug
(13) |
Sep
(420) |
Oct
(186) |
Nov
(1) |
Dec
(3) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2007 |
Jan
(108) |
Feb
(202) |
Mar
(291) |
Apr
(247) |
May
(374) |
Jun
(227) |
Jul
(231) |
Aug
(60) |
Sep
(31) |
Oct
(45) |
Nov
(18) |
Dec
|
| 2008 |
Jan
(38) |
Feb
(71) |
Mar
(142) |
Apr
|
May
(59) |
Jun
(6) |
Jul
(10) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2009 |
Jan
(12) |
Feb
(4) |
Mar
(88) |
Apr
(121) |
May
(17) |
Jun
(30) |
Jul
|
Aug
(5) |
Sep
|
Oct
(1) |
Nov
|
Dec
|
| 2010 |
Jan
(11) |
Feb
(76) |
Mar
(11) |
Apr
|
May
(11) |
Jun
|
Jul
|
Aug
(44) |
Sep
(14) |
Oct
(7) |
Nov
|
Dec
|
| 2011 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(9) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(10) |
Nov
|
Dec
|
| 2012 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
(3) |
Jul
(4) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2016 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(168) |
| 2017 |
Jan
(77) |
Feb
(11) |
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2018 |
Jan
|
Feb
|
Mar
(1) |
Apr
(6) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2019 |
Jan
|
Feb
(88) |
Mar
(118) |
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2020 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(6) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(141) |
| 2021 |
Jan
(170) |
Feb
(20) |
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
|
Sep
|
Oct
(62) |
Nov
(189) |
Dec
(162) |
| 2022 |
Jan
(201) |
Feb
(118) |
Mar
(8) |
Apr
|
May
(2) |
Jun
(47) |
Jul
(19) |
Aug
(14) |
Sep
(3) |
Oct
|
Nov
(28) |
Dec
(235) |
| 2023 |
Jan
(112) |
Feb
(23) |
Mar
(2) |
Apr
(2) |
May
|
Jun
(1) |
Jul
|
Aug
(70) |
Sep
(92) |
Oct
(20) |
Nov
(1) |
Dec
(1) |
| 2024 |
Jan
|
Feb
|
Mar
(1) |
Apr
(1) |
May
(14) |
Jun
(11) |
Jul
(1) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2025 |
Jan
(10) |
Feb
(29) |
Mar
|
Apr
(162) |
May
(245) |
Jun
(83) |
Jul
|
Aug
(1) |
Sep
|
Oct
|
Nov
(4) |
Dec
|
|
From: <vic...@us...> - 2021-11-19 17:44:48
|
Revision: 12102
http://sourceforge.net/p/foray/code/12102
Author: victormote
Date: 2021-11-19 17:44:46 +0000 (Fri, 19 Nov 2021)
Log Message:
-----------
Convert dependency on WritingSystem to Orthography.
Modified Paths:
--------------
trunk/foray/foray-areatree/src/main/java/org/foray/area/FoLinkage.java
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/FoLinkage.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/FoLinkage.java 2021-11-19 17:38:03 UTC (rev 12101)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/FoLinkage.java 2021-11-19 17:44:46 UTC (rev 12102)
@@ -28,9 +28,9 @@
package org.foray.area;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.fo.Fo;
import org.axsl.fo.fo.GraftingPoint;
+import org.axsl.orthography.Orthography;
/**
* Abstract superclass for classes that handle the linkage between the FO Tree
@@ -68,8 +68,8 @@
* Retrieves the writing system, if any, for the wrapped FO.
* @return The writing system for the wrapped FO, of null if the FO does not have one.
*/
- public WritingSystem getWritingSystem() {
- return this.foGenerator.getWritingSystem();
+ public Orthography getOrthography() {
+ return this.foGenerator.getOrthography();
}
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-19 17:38:06
|
Revision: 12101
http://sourceforge.net/p/foray/code/12101
Author: victormote
Date: 2021-11-19 17:38:03 +0000 (Fri, 19 Nov 2021)
Log Message:
-----------
Conform to aXSL changes: Remove more dependencies on WritingSystem.
Modified Paths:
--------------
trunk/foray/foray-areatree/src/main/java/org/foray/area/AreaNode4a.java
trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/AreaNode4a.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/AreaNode4a.java 2021-11-19 17:26:53 UTC (rev 12100)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/AreaNode4a.java 2021-11-19 17:38:03 UTC (rev 12101)
@@ -34,7 +34,6 @@
import org.axsl.area.AreaNode;
import org.axsl.area.AreaTreeException;
import org.axsl.area.BlockContentFactory;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.fo.Fo;
import org.axsl.fo.fo.GraftingPoint;
import org.axsl.fo.fo.Table;
@@ -771,15 +770,6 @@
}
@Override
- public WritingSystem writingSystem() {
- final Fo generatedBy = this.traitGeneratedBy();
- if (generatedBy != null) {
- return generatedBy.getWritingSystem();
- }
- return null;
- }
-
- @Override
public Orthography orthography() {
final Fo generatedBy = this.traitGeneratedBy();
if (generatedBy != null) {
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java 2021-11-19 17:26:53 UTC (rev 12100)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java 2021-11-19 17:38:03 UTC (rev 12101)
@@ -33,7 +33,6 @@
import org.foray.common.primitive.XmlCharacterUtils;
import org.axsl.area.AreaTreeException;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.common.value.WhiteSpaceTreatment;
import org.axsl.fo.Fo;
import org.axsl.fo.fo.Block;
@@ -678,11 +677,6 @@
}
@Override
- public WritingSystem writingSystem() {
- return this.generatedBy.getWritingSystem();
- }
-
- @Override
public Area4a getParent() {
return this.parent;
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-19 17:26:56
|
Revision: 12100
http://sourceforge.net/p/foray/code/12100
Author: victormote
Date: 2021-11-19 17:26:53 +0000 (Fri, 19 Nov 2021)
Log Message:
-----------
Conform to aXSL changes: Convert more dependencies on WritingSystem to Orthography.
Modified Paths:
--------------
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoLineText.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java
trunk/foray/foray-linebreak/src/main/java/org/foray/text/line/solitary/SolitaryLineBreaker.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoLineText.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoLineText.java 2021-11-19 16:56:31 UTC (rev 12099)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoLineText.java 2021-11-19 17:26:53 UTC (rev 12100)
@@ -28,7 +28,6 @@
package org.foray.fotree;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.fo.FoContext;
import org.axsl.fo.fo.GraftingPoint;
import org.axsl.font.FontContext;
@@ -140,11 +139,6 @@
}
@Override
- public WritingSystem inlineWritingSystem() {
- return this.realLineText.inlineWritingSystem();
- }
-
- @Override
public Orthography inlineOrthography() {
return this.realLineText.inlineOrthography();
}
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java 2021-11-19 16:56:31 UTC (rev 12099)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java 2021-11-19 17:26:53 UTC (rev 12100)
@@ -30,7 +30,6 @@
import org.foray.common.i18n.Country4a;
import org.foray.common.i18n.Language4a;
-import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.primitive.XmlCharacterUtils;
import org.foray.fotree.FoObj;
import org.foray.fotree.Namespace;
@@ -37,7 +36,6 @@
import org.foray.fotree.PropertyList;
import org.foray.fotree.fo.FoValue;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.common.value.LinefeedTreatment;
import org.axsl.common.value.TextTransform;
import org.axsl.common.value.WhiteSpaceTreatment;
@@ -216,13 +214,6 @@
}
@Override
- public WritingSystem inlineWritingSystem() {
- final Language4a language = inlineLanguage();
- final Country4a country = inlineCountry();
- return WritingSystem4a.find(language, null, country);
- }
-
- @Override
public Orthography inlineOrthography() {
return getOrthography();
}
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java 2021-11-19 16:56:31 UTC (rev 12099)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java 2021-11-19 17:26:53 UTC (rev 12100)
@@ -31,7 +31,6 @@
import org.foray.common.i18n.Country4a;
import org.foray.common.i18n.Language4a;
import org.foray.common.i18n.Script4a;
-import org.foray.common.i18n.WritingSystem4a;
import org.foray.fotree.FoObj;
import org.foray.fotree.Namespace;
import org.foray.fotree.PropertyList;
@@ -282,14 +281,6 @@
}
@Override
- public WritingSystem4a inlineWritingSystem() {
- final Language4a language = inlineLanguage();
- final Script4a script = inlineScript();
- final Country4a country = inlineCountry();
- return WritingSystem4a.find(language, script, country);
- }
-
- @Override
public Orthography inlineOrthography() {
return this.getOrthography();
}
Modified: trunk/foray/foray-linebreak/src/main/java/org/foray/text/line/solitary/SolitaryLineBreaker.java
===================================================================
--- trunk/foray/foray-linebreak/src/main/java/org/foray/text/line/solitary/SolitaryLineBreaker.java 2021-11-19 16:56:31 UTC (rev 12099)
+++ trunk/foray/foray-linebreak/src/main/java/org/foray/text/line/solitary/SolitaryLineBreaker.java 2021-11-19 17:26:53 UTC (rev 12100)
@@ -31,11 +31,9 @@
import org.foray.text.TextServer4a;
import org.foray.text.line.EagerLineBreaker;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.FontConsumer;
import org.axsl.linebreak.LineBreakControl;
import org.axsl.orthography.Orthography;
-import org.axsl.orthography.OrthographyServer;
import org.axsl.orthography.Word;
import org.axsl.text.TextException;
import org.axsl.text.line.LineBreakHandler;
@@ -114,8 +112,8 @@
final int end) throws TextException {
this.currentLineText = lineText;
this.currentChars = this.currentLineText.inlineText();
- final WritingSystem orthography = lineText.inlineWritingSystem();
- final String language = orthography.getLanguage().getAlpha3Code();
+// final WritingSystem orthography = lineText.inlineWritingSystem();
+// final String language = orthography.getLanguage().getAlpha3Code();
this.finalWidth = 0;
this.wordWidth = 0;
@@ -164,7 +162,7 @@
this.canEatLeadingSpaces = false;
final int charWidth = charWidth(lineText, codePoint,
whitespaceWidth);
- processTextChar(language, thisCharStarts, charWidth);
+ processTextChar(lineText.inlineOrthography(), thisCharStarts, charWidth);
if ((this.finalWidth + this.spaceWidth + this.wordWidth)
<= lineReceivingContent().capacityRemaining()) {
@@ -285,18 +283,17 @@
/**
* Processes one character.
- * @param language The language for the character.
+ * @param orthography The orthography.
* @param i Index to the character.
* @param charWidth The width, in millipoints, of the character.
*/
- private void processTextChar(final String language, final int i,
- final int charWidth) {
+ private void processTextChar(final Orthography orthography, final int i, final int charWidth) {
if (this.previousCharacter == SolitaryLineBreaker.CONNECTOR) {
// Current is TEXT, previous is WHITESPACE.
this.wordWidth = charWidth;
this.wordStart = i;
} else if (this.previousCharacter == SolitaryLineBreaker.TEXT) {
- if (canBreakMidWord(language)) {
+ if (orthography.canBreakLineMidWord()) {
this.finalWidth += this.spaceWidth;
this.spaceWidth = 0;
// add the current word
@@ -393,14 +390,8 @@
* backward at any text in the same LineText item, and then to prior
* LineText items to find the beginning of the word.
*/
- final OrthographyServer server = this.getTextServer().getHyphenationServer();
- final WritingSystem writingSystem = this.currentLineText.inlineWritingSystem();
- final Orthography orthographyConfig = server.getOrthography(writingSystem);
- if (orthographyConfig == null) {
- throw new TextException(String.format("Configuration not found for orthography {}",
- writingSystem.toString()));
- }
+ final Orthography orthographyConfig = this.currentLineText.inlineOrthography();
// Count the number of chars at the beginning that should be ignored.
final int actualWordStart = wordStarts(this.currentChars, this.wordStart);
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2021-11-19 16:56:31 UTC (rev 12099)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2021-11-19 17:26:53 UTC (rev 12100)
@@ -453,4 +453,10 @@
}
+ @Override
+ public boolean canBreakLineMidWord() {
+ // TODO Auto-generated method stub
+ return false;
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-19 16:56:35
|
Revision: 12099
http://sourceforge.net/p/foray/code/12099
Author: victormote
Date: 2021-11-19 16:56:31 +0000 (Fri, 19 Nov 2021)
Log Message:
-----------
Conform to aXSL changes: Convert some dependencies on WritingSystem (what is configured) to Orthography (what is actually useful for work).
Modified Paths:
--------------
trunk/foray/foray-app/src/test/java/org/foray/app/area/TestBlock.java
trunk/foray/foray-areatree/src/main/java/org/foray/area/FoLinkage.java
trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberArea.java
trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationArea.java
trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationLastArea.java
trunk/foray/foray-areatree/src/main/java/org/foray/area/ScalingValueCitationArea.java
trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java
trunk/foray/foray-common/src/main/java/org/foray/common/para/ParaConfig4a.java
trunk/foray/foray-font/src/main/java/org/foray/font/Font4a.java
trunk/foray/foray-font/src/main/java/org/foray/font/FontUse4a.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/TtfTableGsub.java
trunk/foray/foray-font/src/main/java/org/foray/font/util/MockFont.java
trunk/foray/foray-font/src/test/java/org/foray/font/Font4aTests.java
trunk/foray/foray-font/src/test/java/org/foray/font/format/ttf/OtfLookupGsubTests.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoLineText.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoWordSequence.java
trunk/foray/foray-graphic/src/main/java/org/foray/graphic/batik/PdfTextPainter.java
trunk/foray/foray-graphic/src/main/java/org/foray/graphic/output/SvgPdf.java
trunk/foray/foray-linebreak/src/main/java/org/foray/text/line/LineBreaker.java
trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/TotalFitLbTests.java
trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfContentStream4a.java
trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfDocument4a.java
trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfFont4a.java
trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfString.java
trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfXformMath.java
trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfXformSvg.java
trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfXobject4a.java
trunk/foray/foray-pdf/src/main/java/org/foray/pdf/util/PdfGraphics2D.java
trunk/foray/foray-pdf/src/test/java/org/foray/pdf/object/PdfDocumentTests.java
trunk/foray/foray-render/src/main/java/org/foray/render/pdf/PdfRenderer.java
trunk/foray/foray-render/src/main/java/org/foray/render/ps/PsRenderer.java
Modified: trunk/foray/foray-app/src/test/java/org/foray/app/area/TestBlock.java
===================================================================
--- trunk/foray/foray-app/src/test/java/org/foray/app/area/TestBlock.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-app/src/test/java/org/foray/app/area/TestBlock.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -37,7 +37,6 @@
import org.foray.area.PageCollection;
import org.foray.area.TextArea;
import org.foray.common.FontContext4a;
-import org.foray.common.i18n.WritingSystem4a;
import org.foray.core.ForayException;
import org.axsl.font.FontException;
@@ -292,7 +291,7 @@
/* The following computation is tested in {@link TestFont4a#testWidth()}. */
final String testString = "Test of Centering";
final int textWidth = fontUse.width(
- testString, 0, testString.length(), 12000, 0, 0, this.fontOptionsWithKerning, WritingSystem4a.USA);
+ testString, 0, testString.length(), 12000, 0, 0, this.fontOptionsWithKerning, null);
Assert.assertEquals(textWidth, textArea.crIpd());
/* The x value of the text area content rectangle should be at the x
@@ -357,7 +356,7 @@
* The extra word spacing is .3em = .3 * 12000 = 3600. */
final String testString = "Centered with Word Spacing";
final int textWidth = fontUse.width(testString, 0, testString.length(), 12000, 0, 3600,
- this.fontOptionsWithKerning, WritingSystem4a.USA);
+ this.fontOptionsWithKerning, null);
Assert.assertEquals(textWidth, textArea.crIpd());
/* The x value of the text area content rectangle should be at the x
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/FoLinkage.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/FoLinkage.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/FoLinkage.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -30,7 +30,6 @@
import org.axsl.common.i18n.WritingSystem;
import org.axsl.fo.Fo;
-import org.axsl.fo.FoLineText;
import org.axsl.fo.fo.GraftingPoint;
/**
@@ -70,11 +69,7 @@
* @return The writing system for the wrapped FO, of null if the FO does not have one.
*/
public WritingSystem getWritingSystem() {
- if (this.foGenerator instanceof FoLineText) {
- final FoLineText lt = (FoLineText) this.foGenerator;
- return lt.inlineWritingSystem();
- }
- return null;
+ return this.foGenerator.getWritingSystem();
}
}
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberArea.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberArea.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberArea.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -239,7 +239,7 @@
final FontUse fontUse = getPrimaryFont();
fontUse.registerCharsUsed(word);
return fontUse.width(word, 0, word.length(), traitFontSize(), traitGeneratedBy().traitLetterSpacingOpt(this),
- traitGeneratedBy().traitWordSpacingOpt(this), fontContext(), writingSystem());
+ traitGeneratedBy().traitWordSpacingOpt(this), fontContext(), orthography());
}
@Override
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationArea.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationArea.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationArea.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -266,7 +266,7 @@
final FontUse fontUse = getPrimaryFont();
fontUse.registerCharsUsed(word);
return fontUse.width(word, 0, word.length(), traitFontSize(), traitGeneratedBy().traitLetterSpacingOpt(this),
- traitGeneratedBy().traitWordSpacingOpt(this), fontContext(), writingSystem());
+ traitGeneratedBy().traitWordSpacingOpt(this), fontContext(), orthography());
}
@Override
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationLastArea.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationLastArea.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationLastArea.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -267,7 +267,7 @@
final FontUse fontUse = getPrimaryFont();
fontUse.registerCharsUsed(word);
return fontUse.width(word, 0, word.length(), traitFontSize(), traitGeneratedBy().traitLetterSpacingOpt(this),
- traitGeneratedBy().traitWordSpacingOpt(this), fontContext(), writingSystem());
+ traitGeneratedBy().traitWordSpacingOpt(this), fontContext(), orthography());
}
@Override
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/ScalingValueCitationArea.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/ScalingValueCitationArea.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/ScalingValueCitationArea.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -252,7 +252,7 @@
final FontUse fontUse = getPrimaryFont();
fontUse.registerCharsUsed(word);
return fontUse.width(word, 0, word.length(), traitFontSize(), traitGeneratedBy().traitLetterSpacingOpt(this),
- traitGeneratedBy().traitWordSpacingOpt(this), fontContext(), writingSystem());
+ traitGeneratedBy().traitWordSpacingOpt(this), fontContext(), orthography());
}
@Override
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -612,7 +612,7 @@
private int recomputeProgressionDimension() {
final CharSequence text = getText();
final int pd = getPrimaryFont().width(text, 0, text.length(), traitFontSize(), traitLetterSpacingOpt(),
- traitWordSpacingOpt(), fontContext(), writingSystem());
+ traitWordSpacingOpt(), fontContext(), orthography());
return pd;
}
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/para/ParaConfig4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/para/ParaConfig4a.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/para/ParaConfig4a.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -28,12 +28,12 @@
package org.foray.common.para;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.common.para.ParaConfig;
import org.axsl.common.para.ParaGlue;
import org.axsl.common.para.ParaLeaf;
import org.axsl.font.FontContext;
import org.axsl.font.FontUse;
+import org.axsl.orthography.Orthography;
/**
* Hard-wired implementation of {@link ParaConfig}.
@@ -59,7 +59,7 @@
private int hyphenationCharacter = '-';
/** The orthography that should be used when applying locale-sensitive font features. */
- private WritingSystem orthography;
+ private Orthography orthography;
/**
* Constructor.
@@ -69,7 +69,7 @@
* @param stretchability The size, in millipoints, of any stretchability.
* @param shrinkability The size, in millipoints, of any shrinkability.
*/
- public ParaConfig4a(final FontUse fontUse, final int fontSize, final WritingSystem orthography,
+ public ParaConfig4a(final FontUse fontUse, final int fontSize, final Orthography orthography,
final int stretchability, final int shrinkability) {
this.fontUse = fontUse;
this.fontSize = fontSize;
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/Font4a.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/Font4a.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/Font4a.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -32,10 +32,10 @@
import org.foray.font.config.RegisteredFont;
import org.foray.font.format.Kerning;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.common.sequence.IntSequence;
import org.axsl.font.Font;
import org.axsl.font.FontContext;
+import org.axsl.orthography.Orthography;
import org.axsl.ps.CharSet;
import org.slf4j.Logger;
@@ -178,7 +178,7 @@
@Override
public int width(final IntSequence word, final int offset, final int length, final int fontSize,
final int letterSpacing, final int wordSpacing, final FontContext requestedFontContext,
- final WritingSystem orthography) {
+ final Orthography orthography) {
final FontContext fontContext = requestedFontContext == null ? FontContext4a.DEFAULT : requestedFontContext;
if (word == null) {
return 0;
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/FontUse4a.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/FontUse4a.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/FontUse4a.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -38,7 +38,6 @@
import org.foray.font.format.ttf.TtfTableGsub;
import org.foray.font.format.type1.Type1Font;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.common.sequence.ByteSequence;
import org.axsl.common.sequence.IntSequence;
import org.axsl.font.Font;
@@ -47,6 +46,7 @@
import org.axsl.font.FontException;
import org.axsl.font.FontServer;
import org.axsl.font.FontUse;
+import org.axsl.orthography.Orthography;
import org.axsl.ps.CharSet;
import org.axsl.ps.Encoding;
import org.axsl.unicode.block.Basic_Latin_Block;
@@ -140,7 +140,7 @@
@Override
public IntArrayBuilder encode(final CharSequence chars, final int offset, final int length,
- final FontContext fontContext, final WritingSystem orthography) {
+ final FontContext fontContext, final Orthography orthography) {
/* Convert the chars to code points. */
/* Some substitutions add glyphs, so make the output a bit bigger than the input to avoid array copying. */
final IntArrayBuilder codePoints = CharSequenceUtils.toCodepoints(chars, offset, length, 2);
@@ -195,7 +195,7 @@
@Override
public int width(final CharSequence chars, final int offset, final int length, final int fontSize,
final int letterSpacing, final int wordSpacing, final FontContext fontContext,
- final WritingSystem orthography) {
+ final Orthography orthography) {
final IntArrayBuilder metricIndexes = new IntArrayBuilder(chars.length());
for (int i = 0; i < chars.length(); i ++) {
final int metricIndex = getFont().metricIndex(chars.charAt(i));
@@ -580,7 +580,7 @@
@Override
public String textToPdf(final CharSequence theString, final FontContext fontContext,
- final WritingSystem orthography) {
+ final Orthography orthography) {
final Font font = getFOrayFont();
final IntSequence glyphIndexes = encode(theString, 0, theString.length(), fontContext, orthography);
final StringBuilder buffer = new StringBuilder();
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/TtfTableGsub.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/TtfTableGsub.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/TtfTableGsub.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -31,8 +31,8 @@
import org.foray.common.io.RandomAccessInput;
import org.foray.common.sequence.IntArrayBuilder;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.FontContext;
+import org.axsl.orthography.Orthography;
import java.io.IOException;
import java.util.ArrayList;
@@ -140,7 +140,7 @@
* @param orthography The orthography that should be used when applying subsitutions.
*/
public void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontContext context,
- final WritingSystem orthography) {
+ final Orthography orthography) {
/* "To implement features, a client applies the lookups in the order the lookup definitions occur in the
* LookupList. As a result, within the GSUB or GPOS table, lookups from several different features may be
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/util/MockFont.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/util/MockFont.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/util/MockFont.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -32,11 +32,11 @@
import org.foray.common.sequence.IntArray;
import org.axsl.common.data.BoundingBox;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.common.sequence.IntSequence;
import org.axsl.font.Font;
import org.axsl.font.FontContext;
import org.axsl.font.Panose;
+import org.axsl.orthography.Orthography;
import org.axsl.ps.Encoding;
/**
@@ -79,7 +79,7 @@
@Override
public int width(final IntSequence metricIndexes, final int offset, final int length, final int fontSize,
final int letterSpacing, final int wordSpacing, final FontContext options,
- final WritingSystem orthography) {
+ final Orthography orthography) {
// TODO Auto-generated method stub
return 0;
}
Modified: trunk/foray/foray-font/src/test/java/org/foray/font/Font4aTests.java
===================================================================
--- trunk/foray/foray-font/src/test/java/org/foray/font/Font4aTests.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-font/src/test/java/org/foray/font/Font4aTests.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -29,7 +29,6 @@
package org.foray.font;
import org.foray.common.FontContext4a;
-import org.foray.common.i18n.WritingSystem4a;
import org.foray.font.config.RegisteredFont;
import org.axsl.font.FontException;
@@ -111,7 +110,7 @@
* If these are scaled to 12 points, the millipoints used are 7565 * 12
* = 90,780. */
Assert.assertEquals(90780, fontUse.width(
- testString, 0, testString.length(), 12000, 0, 0, fontOptionsWithKerning, WritingSystem4a.USA));
+ testString, 0, testString.length(), 12000, 0, 0, fontOptionsWithKerning, null));
/* Test without kerning.
* Total kerning = -105.
@@ -118,7 +117,7 @@
* Adjusted text space units = 7,565 + 105 = 7,670.
* When scaled to 12 points, the millipoints used are 7670 * 12 = 92,040. */
Assert.assertEquals(92040, fontUse.width(
- testString, 0, testString.length(), 12000, 0, 0, fontOptionsWithoutKerning, WritingSystem4a.USA));
+ testString, 0, testString.length(), 12000, 0, 0, fontOptionsWithoutKerning, null));
/* Test with word spacing.
* Word spacing = 300.
@@ -126,7 +125,7 @@
* Total word spacing = 600.
* Total space (kerning on) = 90,780 + 600 = 91,380. */
Assert.assertEquals(91380, fontUse.width(
- testString, 0, testString.length(), 12000, 0, 300, fontOptionsWithKerning, WritingSystem4a.USA));
+ testString, 0, testString.length(), 12000, 0, 300, fontOptionsWithKerning, null));
/* Test with letter spacing.
* Letter spacing = 5.
@@ -134,12 +133,12 @@
* Total word spacing = 80.
* Total space (kerning on) = 90,780 + 80 = 90,860. */
Assert.assertEquals(90860, fontUse.width(
- testString, 0, testString.length(), 12000, 5, 0, fontOptionsWithKerning, WritingSystem4a.USA));
+ testString, 0, testString.length(), 12000, 5, 0, fontOptionsWithKerning, null));
/* Test with both word spacing and letter spacing, using same parameters as above.
* Total space (kerning on) = 90,780 + 600 + 80 = 91,460. */
Assert.assertEquals(91460, fontUse.width(
- testString, 0, testString.length(), 12000, 5, 300, fontOptionsWithKerning, WritingSystem4a.USA));
+ testString, 0, testString.length(), 12000, 5, 300, fontOptionsWithKerning, null));
}
}
Modified: trunk/foray/foray-font/src/test/java/org/foray/font/format/ttf/OtfLookupGsubTests.java
===================================================================
--- trunk/foray/foray-font/src/test/java/org/foray/font/format/ttf/OtfLookupGsubTests.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-font/src/test/java/org/foray/font/format/ttf/OtfLookupGsubTests.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -29,7 +29,6 @@
package org.foray.font.format.ttf;
import org.foray.common.FontContext4a;
-import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.io.SimpleDataSource;
import org.foray.common.primitive.CharSequenceUtils;
import org.foray.common.sequence.IntArrayBuilder;
@@ -39,8 +38,8 @@
import org.foray.font.format.ttf.OtfLookupGsubx04x01.Ligature;
import org.foray.font.format.ttf.OtfLookupGsubx04x01.LigatureSet;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.FontException;
+import org.axsl.orthography.Orthography;
import org.axsl.ps.Encoding;
import org.junit.Assert;
@@ -63,7 +62,7 @@
private static TrueTypeFont ttfFont;
/** The orthography that should be used when applying font features like subsitutions. */
- private static WritingSystem orthography;
+ private static Orthography orthography;
/**
* Setup this class for testing.
@@ -85,7 +84,7 @@
OtfLookupGsubTests.ttfFont = ttfSingle.getTTFFont(null);
Assert.assertNotNull(ttfFont);
- OtfLookupGsubTests.orthography = WritingSystem4a.USA;
+ OtfLookupGsubTests.orthography = null;
}
/**
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoLineText.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoLineText.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoLineText.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -33,6 +33,7 @@
import org.axsl.fo.fo.GraftingPoint;
import org.axsl.font.FontContext;
import org.axsl.font.FontUse;
+import org.axsl.orthography.Orthography;
import org.axsl.text.line.LineText;
/**
@@ -144,6 +145,11 @@
}
@Override
+ public Orthography inlineOrthography() {
+ return this.realLineText.inlineOrthography();
+ }
+
+ @Override
public int inlineHyphenationRemainCount() {
return this.realLineText.inlineHyphenationRemainCount(this.context);
}
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -3899,7 +3899,7 @@
public int getWidth(final ParaLeaf leaf) {
final FontUse fontUse = resolvePrimaryFont(null);
return fontUse.width(leaf.getText(), 0, leaf.getText().length(), this.traitFontSize(null), 0, 0,
- FontContext4a.DEFAULT, getWritingSystem());
+ FontContext4a.DEFAULT, getOrthography());
}
@Override
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -51,6 +51,7 @@
import org.axsl.font.FontContext;
import org.axsl.font.FontServer;
import org.axsl.font.FontUse;
+import org.axsl.orthography.Orthography;
import org.axsl.text.line.LineText;
import org.axsl.unicode.block.Basic_Latin_Block;
@@ -222,6 +223,11 @@
}
@Override
+ public Orthography inlineOrthography() {
+ return getOrthography();
+ }
+
+ @Override
public int inlineHyphenationRemainCount() {
return inlineHyphenationRemainCount(null);
}
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -44,6 +44,7 @@
import org.axsl.font.Font;
import org.axsl.font.FontContext;
import org.axsl.font.FontUse;
+import org.axsl.orthography.Orthography;
import org.axsl.text.line.LineText;
import java.util.List;
@@ -289,6 +290,11 @@
}
@Override
+ public Orthography inlineOrthography() {
+ return this.getOrthography();
+ }
+
+ @Override
public int inlineHyphenationRemainCount() {
return this.inlineHyphenationRemainCount(null);
}
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoWordSequence.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoWordSequence.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoWordSequence.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -62,7 +62,7 @@
final Orthography orthography = orthographyServer.getOrthography(getWritingSystem());
final ParaBranch wordSequence =
orthography.tokenizeWordSequence(content, 0, content.length());
- final ParaConfig4a config = new ParaConfig4a(getPrimaryFont(null), inlineFontSize(), inlineWritingSystem(),
+ final ParaConfig4a config = new ParaConfig4a(getPrimaryFont(null), inlineFontSize(), inlineOrthography(),
parent.traitWordSpacingMax(null), parent.traitWordSpacingMin(null));
wordSequence.setParaConfig(config);
this.content = new ArrayList<FoWordSequenceContent>(wordSequence.getQtyParaNodeChildren());
Modified: trunk/foray/foray-graphic/src/main/java/org/foray/graphic/batik/PdfTextPainter.java
===================================================================
--- trunk/foray/foray-graphic/src/main/java/org/foray/graphic/batik/PdfTextPainter.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-graphic/src/main/java/org/foray/graphic/batik/PdfTextPainter.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -36,11 +36,11 @@
import org.foray.common.FontContext4a;
import org.foray.common.WellKnownConstants;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.Font;
import org.axsl.font.FontConsumer;
import org.axsl.font.FontException;
import org.axsl.font.FontUtility;
+import org.axsl.orthography.Orthography;
import org.apache.batik.bridge.Mark;
import org.apache.batik.bridge.StrokingTextPainter;
@@ -80,8 +80,8 @@
/** The FontConsumer that should be used for font resolution. */
private FontConsumer fontConsumer;
- /** The writing system. */
- private WritingSystem writingSystem;
+ /** The orthography. */
+ private Orthography orthography;
/** The font family name used by this text. */
private String fontFamily = null;
@@ -92,11 +92,11 @@
/**
* Constructor.
* @param consumer The FontConsumer that should be used for font resolution.
- * @param writingSystem The writing system.
+ * @param orthography The orthography.
*/
- public PdfTextPainter(final FontConsumer consumer, final WritingSystem writingSystem) {
+ public PdfTextPainter(final FontConsumer consumer, final Orthography orthography) {
this.fontConsumer = consumer;
- this.writingSystem = writingSystem;
+ this.orthography = orthography;
}
@Override
@@ -285,7 +285,7 @@
/* TODO: Pass the font options below instead of hard-coding them. */
final int width = fontToUse.width(txt, 0, txt.length(),
awtFontSize * WellKnownConstants.MILLIPOINTS_PER_POINT, letterSpacing, 0, FontContext4a.DEFAULT,
- this.writingSystem);
+ this.orthography);
final float advance = WellKnownConstants.millipointsToPoints(width);
float tx = 0;
if (anchor != null) {
Modified: trunk/foray/foray-graphic/src/main/java/org/foray/graphic/output/SvgPdf.java
===================================================================
--- trunk/foray/foray-graphic/src/main/java/org/foray/graphic/output/SvgPdf.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-graphic/src/main/java/org/foray/graphic/output/SvgPdf.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -34,9 +34,9 @@
import org.foray.graphic.batik.PdfTextPainter;
import org.foray.graphic.batik.PdfaElementBridge;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.FontConsumer;
import org.axsl.graphic.GraphicException;
+import org.axsl.orthography.Orthography;
import org.apache.batik.bridge.BridgeContext;
import org.apache.batik.bridge.GVTBuilder;
@@ -71,7 +71,7 @@
@Override
public void drawVectorContent(final Graphics2D graphic2d, final FontConsumer fontConsumer,
- final WritingSystem writingSystem) throws GraphicException {
+ final Orthography orthography) throws GraphicException {
/* If not running in a graphical environment, log an error message
* and skip the SVG. */
if (! Environment.isGraphicalEnvironment()) {
@@ -89,7 +89,7 @@
if (fontConsumer == null) {
textPainter = new StrokingTextPainter();
} else {
- textPainter = new PdfTextPainter(fontConsumer, writingSystem);
+ textPainter = new PdfTextPainter(fontConsumer, orthography);
}
ctx.setTextPainter(textPainter);
Modified: trunk/foray/foray-linebreak/src/main/java/org/foray/text/line/LineBreaker.java
===================================================================
--- trunk/foray/foray-linebreak/src/main/java/org/foray/text/line/LineBreaker.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-linebreak/src/main/java/org/foray/text/line/LineBreaker.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -332,7 +332,7 @@
final FontUse fontUse = lineText.inlinePrimaryFont();
fontUse.registerCharsUsed(word);
return fontUse.width(word, offset, length, lineText.inlineFontSize(), lineText.inlineLetterSpacingOptimum(), 0,
- lineText.inlineFontContext(), lineText.inlineWritingSystem());
+ lineText.inlineFontContext(), lineText.inlineOrthography());
}
/**
Modified: trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/TotalFitLbTests.java
===================================================================
--- trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/TotalFitLbTests.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/TotalFitLbTests.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -29,7 +29,6 @@
package org.foray.linebreak;
import org.foray.common.data.OrderedTreePath4a;
-import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.para.ParaBranch4a;
import org.foray.common.para.ParaConfig4a;
import org.foray.orthography.Punctuation4a;
@@ -204,7 +203,7 @@
final Font font = LbTestUtilities.createMonotypeFont();
final FontUse fontUse = Mockito.mock(FontUse.class);
Mockito.when(fontUse.getFont()).thenReturn(font);
- final ParaConfig paraConfig = new ParaConfig4a(fontUse, 10, WritingSystem4a.USA, 0, 0);
+ final ParaConfig paraConfig = new ParaConfig4a(fontUse, 10, null, 0, 0);
para.setParaConfig(paraConfig);
/* The tokenized text treated "’s" as part of the attached word.
Modified: trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfContentStream4a.java
===================================================================
--- trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfContentStream4a.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfContentStream4a.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -31,11 +31,11 @@
import org.foray.common.WellKnownConstants;
import org.foray.pdf.PdfGraphicsState4a;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.FontConsumer;
import org.axsl.font.FontContext;
import org.axsl.graphic.Graphic;
import org.axsl.graphic.GraphicException;
+import org.axsl.orthography.Orthography;
import org.axsl.pdf.PdfColor;
import org.axsl.pdf.PdfContentStream;
import org.axsl.pdf.PdfException;
@@ -111,7 +111,7 @@
@Override
public synchronized void drawText(final CharSequence text, final FontContext fontContext,
- final WritingSystem orthography) throws PdfException {
+ final Orthography orthography) throws PdfException {
if (text.length() < 1) {
return;
}
Modified: trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfDocument4a.java
===================================================================
--- trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfDocument4a.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfDocument4a.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -45,7 +45,6 @@
import org.foray.pdf.PdfParser;
import org.axsl.common.Gradient;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.common.sequence.IntPrimitiveIterator;
import org.axsl.font.Font;
import org.axsl.font.FontConsumer;
@@ -53,6 +52,7 @@
import org.axsl.graphic.Graphic;
import org.axsl.graphic.GraphicException;
import org.axsl.graphic.output.GraphicOutputContext;
+import org.axsl.orthography.Orthography;
import org.axsl.pdf.PdfDocument;
import org.axsl.pdf.PdfException;
import org.axsl.pdf.PdfPageLabelStyle;
@@ -767,11 +767,11 @@
}
@Override
- public PdfXobject createXobject(final Graphic graphic, final WritingSystem writingSystem,
+ public PdfXobject createXobject(final Graphic graphic, final Orthography orthography,
final FontConsumer fontConsumer) throws PdfException {
final PdfXobject4a xObject;
try {
- xObject = PdfXobject4a.makeXObject(this, graphic, writingSystem, fontConsumer);
+ xObject = PdfXobject4a.makeXObject(this, graphic, orthography, fontConsumer);
} catch (final GraphicException e) {
throw new PdfException(e);
}
Modified: trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfFont4a.java
===================================================================
--- trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfFont4a.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfFont4a.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -30,10 +30,10 @@
import org.foray.common.WellKnownConstants;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.Font;
import org.axsl.font.FontContext;
import org.axsl.font.FontUse;
+import org.axsl.orthography.Orthography;
import org.axsl.pdf.PdfFont;
import org.axsl.ps.Encoding;
import org.axsl.ps.PsEncoding;
@@ -432,7 +432,7 @@
@Override
public CharSequence textToPdf(final CharSequence theString, final FontContext fontContext,
- final WritingSystem orthography) {
+ final Orthography orthography) {
/* If available, delegate this to the FontUse instance. */
return this.fsFont == null ? theString : this.fsFont.textToPdf(theString, fontContext, orthography);
}
Modified: trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfString.java
===================================================================
--- trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfString.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfString.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -28,8 +28,8 @@
package org.foray.pdf.object;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.FontContext;
+import org.axsl.orthography.Orthography;
import org.axsl.pdf.PdfFont;
/**
@@ -52,7 +52,7 @@
private FontContext fontContext;
/** The orthography that should be used when applying font features like subsitutions. */
- private WritingSystem orthography;
+ private Orthography orthography;
/**
* Constructor.
@@ -63,7 +63,7 @@
* @param orthography The orthography that should be used when applying font features like subsitutions.
*/
public PdfString(final String string, final PdfFont font, final FontContext fontContext,
- final WritingSystem orthography) {
+ final Orthography orthography) {
this.theString = string;
this.font = font;
this.fontContext = fontContext;
Modified: trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfXformMath.java
===================================================================
--- trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfXformMath.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfXformMath.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -33,12 +33,12 @@
import org.foray.pdf.util.PdfGraphics2D;
import org.axsl.common.data.BoundingBox;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.FontConsumer;
import org.axsl.graphic.GraphicException;
import org.axsl.graphic.MathGraphic;
import org.axsl.graphic.output.GraphicOutput;
import org.axsl.graphic.output.GraphicPdf;
+import org.axsl.orthography.Orthography;
import java.awt.Graphics2D;
import java.awt.geom.AffineTransform;
@@ -56,21 +56,21 @@
/** The FontConsumer to use for resolving fonts in the MathML document. */
private FontConsumer fontConsumer;
- /** The writing system. */
- private WritingSystem writingSystem;
+ /** The orthography. */
+ private Orthography orthography;
/**
* Constructor.
* @param doc The parent PDF document.
* @param graphic The form to be encapsulated.
- * @param writingSystem The writing system.
+ * @param orthography The writing system.
* @param fontConsumer The font consumer to use for resolving fonts in the SVG.
*/
- public PdfXformMath(final PdfDocument4a doc, final MathGraphic graphic, final WritingSystem writingSystem,
+ public PdfXformMath(final PdfDocument4a doc, final MathGraphic graphic, final Orthography orthography,
final FontConsumer fontConsumer) {
super(doc, graphic);
this.graphic = graphic;
- this.writingSystem = writingSystem;
+ this.orthography = orthography;
this.fontConsumer = fontConsumer;
}
@@ -85,8 +85,8 @@
}
final GraphicPdf graphicPdf = (GraphicPdf) graphicOutput;
// graphicPdf.drawVectorContent(outputStream, doc, this.fontConsumer, this.strokeText, false);
- final Graphics2D graphic2D = new PdfGraphics2D(this.writingSystem, this.fontConsumer, doc, outputStream);
- graphicPdf.drawVectorContent(graphic2D, this.fontConsumer, this.writingSystem);
+ final Graphics2D graphic2D = new PdfGraphics2D(this.orthography, this.fontConsumer, doc, outputStream);
+ graphicPdf.drawVectorContent(graphic2D, this.fontConsumer, this.orthography);
return outputStream.toByteArray();
}
Modified: trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfXformSvg.java
===================================================================
--- trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfXformSvg.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfXformSvg.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -33,12 +33,12 @@
import org.foray.pdf.util.PdfGraphics2D;
import org.axsl.common.data.BoundingBox;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.FontConsumer;
import org.axsl.graphic.GraphicException;
import org.axsl.graphic.SvgGraphic;
import org.axsl.graphic.output.GraphicOutput;
import org.axsl.graphic.output.GraphicPdf;
+import org.axsl.orthography.Orthography;
import java.awt.Graphics2D;
import java.awt.geom.AffineTransform;
@@ -56,22 +56,22 @@
/** The FontConsumer to use for resolving fonts in the SVG. */
private FontConsumer fontConsumer;
- /** The writing system. */
- private WritingSystem writingSystem;
+ /** The orthography. */
+ private Orthography orthography;
/**
* Constructor.
* @param doc The parent PDF document.
* @param graphic The form to be encapsulated.
- * @param writingSystem The writing system.
+ * @param orthography The orthography.
* @param fontConsumer The font consumer to use for resolving fonts in the SVG.
*/
- public PdfXformSvg(final PdfDocument4a doc, final SvgGraphic graphic, final WritingSystem writingSystem,
+ public PdfXformSvg(final PdfDocument4a doc, final SvgGraphic graphic, final Orthography orthography,
final FontConsumer fontConsumer) {
super(doc, graphic);
this.graphic = graphic;
this.fontConsumer = fontConsumer;
- this.writingSystem = writingSystem;
+ this.orthography = orthography;
}
@Override
@@ -85,8 +85,8 @@
}
final GraphicPdf graphicPdf = (GraphicPdf) graphicOutput;
// graphicPdf.drawVectorContent(outputStream, doc, this.fontConsumer, this.strokeText, false);
- final Graphics2D graphic2D = new PdfGraphics2D(this.writingSystem, this.fontConsumer, doc, outputStream);
- graphicPdf.drawVectorContent(graphic2D, this.fontConsumer, this.writingSystem);
+ final Graphics2D graphic2D = new PdfGraphics2D(this.orthography, this.fontConsumer, doc, outputStream);
+ graphicPdf.drawVectorContent(graphic2D, this.fontConsumer, this.orthography);
return outputStream.toByteArray();
}
Modified: trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfXobject4a.java
===================================================================
--- trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfXobject4a.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfXobject4a.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -34,7 +34,6 @@
package org.foray.pdf.object;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.FontConsumer;
import org.axsl.graphic.EpsGraphic;
import org.axsl.graphic.Graphic;
@@ -41,6 +40,7 @@
import org.axsl.graphic.GraphicException;
import org.axsl.graphic.MathGraphic;
import org.axsl.graphic.SvgGraphic;
+import org.axsl.orthography.Orthography;
import org.axsl.pdf.PdfException;
import org.axsl.pdf.PdfXobject;
import org.axsl.ps.PsEncodeFilter;
@@ -86,13 +86,13 @@
* PdfXobject instance.
* @param pdfDoc The parent PDF document.
* @param img The graphic to be encapsulated.
- * @param writingSystem The writing system.
+ * @param orthography The orthography.
* @param fontConsumer The font consumer to use for resolving fonts in the SVG.
* @return An appropriate PdfXobject instance that encapsulates the input.
* @throws GraphicException For errors getting the appropriate filter.
*/
public static PdfXobject4a makeXObject(final PdfDocument4a pdfDoc, final Graphic img,
- final WritingSystem writingSystem, final FontConsumer fontConsumer) throws GraphicException {
+ final Orthography orthography, final FontConsumer fontConsumer) throws GraphicException {
/* If it has already been created, reuse it ... */
PdfXobject4a xObject = pdfDoc.findXObject(img);
if (xObject != null) {
@@ -105,10 +105,10 @@
xObject = new PdfXformEps(pdfDoc, epsGraphic);
} else if (img instanceof SvgGraphic) {
final SvgGraphic svgGraphic = (SvgGraphic) img;
- xObject = new PdfXformSvg(pdfDoc, svgGraphic, writingSystem, fontConsumer);
+ xObject = new PdfXformSvg(pdfDoc, svgGraphic, orthography, fontConsumer);
} else if (img instanceof MathGraphic) {
final MathGraphic mathGraphic = (MathGraphic) img;
- xObject = new PdfXformMath(pdfDoc, mathGraphic, writingSystem, fontConsumer);
+ xObject = new PdfXformMath(pdfDoc, mathGraphic, orthography, fontConsumer);
} else if (img.getGraphicType() == Graphic.Type.PDF) {
xObject = new PdfXreference(pdfDoc, img);
} else {
Modified: trunk/foray/foray-pdf/src/main/java/org/foray/pdf/util/PdfGraphics2D.java
===================================================================
--- trunk/foray/foray-pdf/src/main/java/org/foray/pdf/util/PdfGraphics2D.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-pdf/src/main/java/org/foray/pdf/util/PdfGraphics2D.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -42,11 +42,11 @@
import org.foray.pdf.PdfConstants;
import org.foray.pdf.object.PdfColor4a;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.FontConsumer;
import org.axsl.font.FontUse;
import org.axsl.font.FontUtility;
import org.axsl.graphic.output.GraphicOutputContext;
+import org.axsl.orthography.Orthography;
import org.axsl.pdf.PdfColor;
import org.axsl.pdf.PdfPage;
import org.axsl.unicode.block.Basic_Latin_Block;
@@ -158,8 +158,8 @@
* the next "restore". */
private Stack<PdfGraphics2D.PdfGraphicsState> graphicsStateStack = new Stack<PdfGraphics2D.PdfGraphicsState>();
- /** The current writing system. */
- private WritingSystem writingSystem;
+ /** The current orthography system. */
+ private Orthography orthography;
/** Lazily-loaded logger. Use {@link getLogger()} to obtain the instance. */
private Logger logger;
@@ -167,12 +167,12 @@
/**
* Create a new PdfGraphics2D with the given pdf document info.
* This is used to create a Graphics object for use inside an already existing document.
- * @param writingSystem The writing system in use.
+ * @param orthography The orthography in use.
* @param fontConsumer The font consumer for this document.
* @param pdfContext The PDF context in which this content is being written.
* @param outputStream The output stream to which this processor writes its PDF output.
*/
- public PdfGraphics2D(final WritingSystem writingSystem, final FontConsumer fontConsumer,
+ public PdfGraphics2D(final Orthography orthography, final FontConsumer fontConsumer,
final GraphicOutputContext pdfContext, final OutputStream outputStream) {
super(fontConsumer == null);
this.gc = new GraphicContext();
@@ -183,7 +183,7 @@
this.fontConsumer = fontConsumer;
this.pdfContext = pdfContext;
this.graphicsStateStack.push(new PdfGraphicsState(null));
- this.writingSystem = writingSystem;
+ this.orthography = orthography;
}
/**
@@ -680,7 +680,7 @@
this.write(matrixString + "cm");
this.write("1 0 0 -1 0 0 Tm ");
- final CharSequence outputString = font.textToPdf(s, FontContext4a.DEFAULT, this.writingSystem);
+ final CharSequence outputString = font.textToPdf(s, FontContext4a.DEFAULT, this.orthography);
this.write(outputString);
this.write("ET");
Modified: trunk/foray/foray-pdf/src/test/java/org/foray/pdf/object/PdfDocumentTests.java
===================================================================
--- trunk/foray/foray-pdf/src/test/java/org/foray/pdf/object/PdfDocumentTests.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-pdf/src/test/java/org/foray/pdf/object/PdfDocumentTests.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -29,7 +29,6 @@
package org.foray.pdf.object;
import org.foray.common.FontContext4a;
-import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.sequence.ByteArray;
import org.foray.common.sequence.ByteArrayBuilder;
import org.foray.font.ConsumerFont4a;
@@ -102,7 +101,7 @@
contentStream.setCursor(36, heightPoints - 36);
contentStream.openTextObject();
- contentStream.drawText("Hello World!", FontContext4a.DEFAULT, WritingSystem4a.USA);
+ contentStream.drawText("Hello World!", FontContext4a.DEFAULT, null);
contentStream.closeTextObject();
contentStream.close();
Modified: trunk/foray/foray-render/src/main/java/org/foray/render/pdf/PdfRenderer.java
===================================================================
--- trunk/foray/foray-render/src/main/java/org/foray/render/pdf/PdfRenderer.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-render/src/main/java/org/foray/render/pdf/PdfRenderer.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -38,7 +38,6 @@
import org.axsl.common.FormattedIntegerType;
import org.axsl.common.PositiveIntegerFormatter;
-import org.axsl.common.i18n.WritingSystem;
import org.axsl.common.value.LinkType;
import org.axsl.common.value.RuleStyle;
import org.axsl.font.Font;
@@ -61,6 +60,7 @@
import org.axsl.graphic.GraphicLink;
import org.axsl.graphic.MathGraphic;
import org.axsl.graphic.SvgGraphic;
+import org.axsl.orthography.Orthography;
import org.axsl.output.DocumentPdfConfiguration;
import org.axsl.output.OutputException;
import org.axsl.pdf.PdfColor;
@@ -304,7 +304,7 @@
final Rectangle2D.Float pdfClipRectangle = convertMillipointRectangle(
clipRectangle);
try {
- final PdfXobject xObject = this.pdfDoc.createXobject(image, area.writingSystem(), getFontConsumer());
+ final PdfXobject xObject = this.pdfDoc.createXobject(image, area.orthography(), getFontConsumer());
getContentStream().drawXobject(xObject, pdfContentRectangle, pdfClipRectangle);
} catch (final PdfException e) {
throw new GalleyVisitorException(e);
@@ -320,7 +320,7 @@
toPoints(foreign.referenceBpd()));
final SvgGraphic svgGraphic = area.getGraphic();
try {
- final PdfXobject xObject = this.pdfDoc.createXobject(svgGraphic, area.getParent().writingSystem(),
+ final PdfXobject xObject = this.pdfDoc.createXobject(svgGraphic, area.getParent().orthography(),
getFontConsumer());
getContentStream().drawXobject(xObject, contentRectangle, null);
} catch (final PdfException e) {
@@ -338,7 +338,7 @@
toPoints(foreign.referenceBpd()));
final MathGraphic mathGraphic = area.getGraphic();
try {
- final PdfXobject xObject = this.pdfDoc.createXobject(mathGraphic, area.getParent().writingSystem(),
+ final PdfXobject xObject = this.pdfDoc.createXobject(mathGraphic, area.getParent().orthography(),
getFontConsumer());
getContentStream().drawXobject(xObject, contentRectangle, null);
} catch (final PdfException e) {
@@ -401,7 +401,7 @@
if (newFont != currentFont) {
/* Font has changed. Write the text so far. */
final int size = i - startIndex;
- paintText(area, currentFont, text, startIndex, size, area.fontContext(), area.writingSystem());
+ paintText(area, currentFont, text, startIndex, size, area.fontContext(), area.orthography());
if (size > 0) {
startIndex = i;
}
@@ -410,7 +410,7 @@
}
if (startIndex < text.length()) {
paintText(area, currentFont, text, startIndex, text.length() - startIndex,
- area.fontContext(), area.writingSystem());
+ area.fontContext(), area.orthography());
}
} catch (final PdfException e) {
throw new GalleyVisitorException(e);
@@ -473,7 +473,7 @@
*/
private void paintText(final TextArea area, final FontUse fontUse,
final CharSequence text, final int startIndex, final int size,
- final FontContext fontContext, final WritingSystem orthography) throws PdfException {
+ final FontContext fontContext, final Orthography orthography) throws PdfException {
if (size < 1) {
return;
}
Modified: trunk/foray/foray-render/src/main/java/org/foray/render/ps/PsRenderer.java
===================================================================
--- trunk/foray/foray-render/src/main/java/org/foray/render/ps/PsRenderer.java 2021-11-19 15:44:38 UTC (rev 12098)
+++ trunk/foray/foray-render/src/main/java/org/foray/render/ps/PsRenderer.java 2021-11-19 16:56:31 UTC (rev 12099)
@@ -898,7 +898,7 @@
write(moveTo(area));
StringBuilder sb = new StringBuilder();
final IntSequence glyphIndexes =
- area.getPrimaryFont().encode(text, 0, text.length(), area.fontContext(), area.writingSystem());
+ area.getPrimaryFont().encode(text, 0, text.length(), area.fontContext(), area.orthography());
for (int i = 0; i < glyphIndexes.length(); i++) {
final int glyphIndex = glyphIndexes.intAt(i);
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-19 15:44:41
|
Revision: 12098
http://sourceforge.net/p/foray/code/12098
Author: victormote
Date: 2021-11-19 15:44:38 +0000 (Fri, 19 Nov 2021)
Log Message:
-----------
Conform to aXSL change: Re-separate Orthography (document-driven) from the font options/context items (user-driven).
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java 2021-11-19 13:40:39 UTC (rev 12097)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java 2021-11-19 15:44:38 UTC (rev 12098)
@@ -28,15 +28,8 @@
package org.foray.common;
-import org.axsl.common.para.ParaBranch;
import org.axsl.font.FontContext;
-import org.axsl.orthography.Orthography;
-import org.axsl.orthography.Word;
-import org.axsl.orthography.Word.PartOfSpeech;
-import org.axsl.orthography.optional.Dictionary;
-import java.util.List;
-
/**
* FOray implementation of {@link FontContext}.
*/
@@ -43,36 +36,6 @@
public class FontContext4a implements FontContext {
/**
- * A default orthography.
- */
- public static final Orthography DEFAULT_ORTHOGRAPHY = new Orthography() {
-
- @Override
- public Word recognizeWord(final CharSequence wordChars, final int offset, final int length,
- final PartOfSpeech pos, final List<Dictionary> adhocDictionaries) {
- return null;
- }
-
- @Override
- public boolean isRecognizedWord(final CharSequence wordChars, final int offset, final int length,
- final PartOfSpeech pos, final List<Dictionary> adhocDictionaries) {
- return false;
- }
-
- @Override
- public Word hyphenateUnrecognizedWord(final CharSequence wordChars, final int offset, final int length) {
- return null;
- }
-
- @Override
- public ParaBranch tokenizeWordSequence(final CharSequence wordSequenceChars, final int offset,
- final int length) {
- return null;
- }
-
- };
-
- /**
* Immutable implementation that can be used as a default.
*/
public static final FontContext DEFAULT = new FontContext() {
@@ -91,10 +54,6 @@
public boolean anySubsitutionActive() {
return true;
}
-
- public Orthography getOrthography() {
- return DEFAULT_ORTHOGRAPHY;
- }
};
/** Indicates whether kerning should be used. */
@@ -130,9 +89,4 @@
return true;
}
- @Override
- public Orthography getOrthography() {
- return DEFAULT_ORTHOGRAPHY;
- }
-
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-19 13:40:42
|
Revision: 12097
http://sourceforge.net/p/foray/code/12097
Author: victormote
Date: 2021-11-19 13:40:39 +0000 (Fri, 19 Nov 2021)
Log Message:
-----------
Conform to aXSL change: Add Orthography to the FontContext.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java 2021-11-19 13:17:16 UTC (rev 12096)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java 2021-11-19 13:40:39 UTC (rev 12097)
@@ -28,8 +28,15 @@
package org.foray.common;
+import org.axsl.common.para.ParaBranch;
import org.axsl.font.FontContext;
+import org.axsl.orthography.Orthography;
+import org.axsl.orthography.Word;
+import org.axsl.orthography.Word.PartOfSpeech;
+import org.axsl.orthography.optional.Dictionary;
+import java.util.List;
+
/**
* FOray implementation of {@link FontContext}.
*/
@@ -36,6 +43,36 @@
public class FontContext4a implements FontContext {
/**
+ * A default orthography.
+ */
+ public static final Orthography DEFAULT_ORTHOGRAPHY = new Orthography() {
+
+ @Override
+ public Word recognizeWord(final CharSequence wordChars, final int offset, final int length,
+ final PartOfSpeech pos, final List<Dictionary> adhocDictionaries) {
+ return null;
+ }
+
+ @Override
+ public boolean isRecognizedWord(final CharSequence wordChars, final int offset, final int length,
+ final PartOfSpeech pos, final List<Dictionary> adhocDictionaries) {
+ return false;
+ }
+
+ @Override
+ public Word hyphenateUnrecognizedWord(final CharSequence wordChars, final int offset, final int length) {
+ return null;
+ }
+
+ @Override
+ public ParaBranch tokenizeWordSequence(final CharSequence wordSequenceChars, final int offset,
+ final int length) {
+ return null;
+ }
+
+ };
+
+ /**
* Immutable implementation that can be used as a default.
*/
public static final FontContext DEFAULT = new FontContext() {
@@ -55,6 +92,9 @@
return true;
}
+ public Orthography getOrthography() {
+ return DEFAULT_ORTHOGRAPHY;
+ }
};
/** Indicates whether kerning should be used. */
@@ -90,4 +130,9 @@
return true;
}
+ @Override
+ public Orthography getOrthography() {
+ return DEFAULT_ORTHOGRAPHY;
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-19 13:17:19
|
Revision: 12096
http://sourceforge.net/p/foray/code/12096
Author: victormote
Date: 2021-11-19 13:17:16 +0000 (Fri, 19 Nov 2021)
Log Message:
-----------
Move default implementation of FontContext from aXSL to FOray.
Modified Paths:
--------------
trunk/foray/foray-areatree/src/main/java/org/foray/area/AbstractAncestralInlineArea.java
trunk/foray/foray-areatree/src/main/java/org/foray/area/ScalingValueCitationArea.java
trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java
trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java
trunk/foray/foray-font/src/main/java/org/foray/font/Font4a.java
trunk/foray/foray-font/src/test/java/org/foray/font/format/ttf/OtfLookupGsubTests.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java
trunk/foray/foray-graphic/src/main/java/org/foray/graphic/batik/PdfTextPainter.java
trunk/foray/foray-pdf/src/main/java/org/foray/pdf/util/PdfGraphics2D.java
trunk/foray/foray-pdf/src/test/java/org/foray/pdf/object/PdfDocumentTests.java
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/AbstractAncestralInlineArea.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/AbstractAncestralInlineArea.java 2021-11-19 13:02:18 UTC (rev 12095)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/AbstractAncestralInlineArea.java 2021-11-19 13:17:16 UTC (rev 12096)
@@ -28,6 +28,8 @@
package org.foray.area;
+import org.foray.common.FontContext4a;
+
import org.axsl.area.AreaTreeException;
import org.axsl.fo.fo.BasicLink;
import org.axsl.fo.fo.BidiOverride;
@@ -213,7 +215,7 @@
*/
public FontContext fontContext() {
/* TODO: Get this from the FOTree. */
- return FontContext.DEFAULT;
+ return FontContext4a.DEFAULT;
}
}
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/ScalingValueCitationArea.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/ScalingValueCitationArea.java 2021-11-19 13:02:18 UTC (rev 12095)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/ScalingValueCitationArea.java 2021-11-19 13:17:16 UTC (rev 12096)
@@ -28,6 +28,8 @@
package org.foray.area;
+import org.foray.common.FontContext4a;
+
import org.axsl.area.AreaTreeException;
import org.axsl.fo.Fo;
import org.axsl.fo.fo.GraftingPoint;
@@ -315,7 +317,7 @@
@Override
public FontContext fontContext() {
/* TODO: Get this from the FOTree. */
- return FontContext.DEFAULT;
+ return FontContext4a.DEFAULT;
}
}
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java 2021-11-19 13:02:18 UTC (rev 12095)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java 2021-11-19 13:17:16 UTC (rev 12096)
@@ -29,6 +29,7 @@
package org.foray.area;
import org.foray.common.CharSequenceSubset;
+import org.foray.common.FontContext4a;
import org.foray.common.primitive.XmlCharacterUtils;
import org.axsl.area.AreaTreeException;
@@ -673,7 +674,7 @@
@Override
public FontContext fontContext() {
/* TODO: Get this from the FOTree. */
- return FontContext.DEFAULT;
+ return FontContext4a.DEFAULT;
}
@Override
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java 2021-11-19 13:02:18 UTC (rev 12095)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java 2021-11-19 13:17:16 UTC (rev 12096)
@@ -35,6 +35,28 @@
*/
public class FontContext4a implements FontContext {
+ /**
+ * Immutable implementation that can be used as a default.
+ */
+ public static final FontContext DEFAULT = new FontContext() {
+
+ @Override
+ public boolean isKerning() {
+ return true;
+ }
+
+ @Override
+ public boolean isFeatureActive(final String feature) {
+ return true;
+ }
+
+ @Override
+ public boolean anySubsitutionActive() {
+ return true;
+ }
+
+ };
+
/** Indicates whether kerning should be used. */
private boolean isKerning;
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/Font4a.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/Font4a.java 2021-11-19 13:02:18 UTC (rev 12095)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/Font4a.java 2021-11-19 13:17:16 UTC (rev 12096)
@@ -27,6 +27,7 @@
*/
package org.foray.font;
+import org.foray.common.FontContext4a;
import org.foray.common.WellKnownConstants;
import org.foray.font.config.RegisteredFont;
import org.foray.font.format.Kerning;
@@ -178,7 +179,7 @@
public int width(final IntSequence word, final int offset, final int length, final int fontSize,
final int letterSpacing, final int wordSpacing, final FontContext requestedFontContext,
final WritingSystem orthography) {
- final FontContext fontContext = requestedFontContext == null ? FontContext.DEFAULT : requestedFontContext;
+ final FontContext fontContext = requestedFontContext == null ? FontContext4a.DEFAULT : requestedFontContext;
if (word == null) {
return 0;
}
Modified: trunk/foray/foray-font/src/test/java/org/foray/font/format/ttf/OtfLookupGsubTests.java
===================================================================
--- trunk/foray/foray-font/src/test/java/org/foray/font/format/ttf/OtfLookupGsubTests.java 2021-11-19 13:02:18 UTC (rev 12095)
+++ trunk/foray/foray-font/src/test/java/org/foray/font/format/ttf/OtfLookupGsubTests.java 2021-11-19 13:17:16 UTC (rev 12096)
@@ -28,6 +28,7 @@
package org.foray.font.format.ttf;
+import org.foray.common.FontContext4a;
import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.io.SimpleDataSource;
import org.foray.common.primitive.CharSequenceUtils;
@@ -39,7 +40,6 @@
import org.foray.font.format.ttf.OtfLookupGsubx04x01.LigatureSet;
import org.axsl.common.i18n.WritingSystem;
-import org.axsl.font.FontContext;
import org.axsl.font.FontException;
import org.axsl.ps.Encoding;
@@ -267,7 +267,7 @@
Assert.assertArrayEquals(expectedInitialGlyphIndexes, glyphIndexes.toArray());
final TtfTableGsub gsub = ttfFont.getGsubTable();
- gsub.makeSubstitutions(glyphIndexes, FontContext.DEFAULT, orthography);
+ gsub.makeSubstitutions(glyphIndexes, FontContext4a.DEFAULT, orthography);
final int[] expectedFinalGlyphIndexes = new int[] {68, 2249, 81, 76, 87, 92};
Assert.assertArrayEquals(expectedFinalGlyphIndexes, glyphIndexes.toArray());
}
@@ -288,7 +288,7 @@
Assert.assertArrayEquals(expectedInitialGlyphIndexes, glyphIndexes.toArray());
final TtfTableGsub gsub = ttfFont.getGsubTable();
- gsub.makeSubstitutions(glyphIndexes, FontContext.DEFAULT, orthography);
+ gsub.makeSubstitutions(glyphIndexes, FontContext4a.DEFAULT, orthography);
final int[] expectedFinalGlyphIndexes = new int[] {68, 2249};
Assert.assertArrayEquals(expectedFinalGlyphIndexes, glyphIndexes.toArray());
}
@@ -320,7 +320,7 @@
Assert.assertArrayEquals(expectedInitialGlyphIndexes, glyphIndexes.toArray());
final TtfTableGsub gsub = ttfFont.getGsubTable();
- gsub.makeSubstitutions(glyphIndexes, FontContext.DEFAULT, orthography);
+ gsub.makeSubstitutions(glyphIndexes, FontContext4a.DEFAULT, orthography);
final int[] expectedFinalGlyphIndexes = new int[] {
36, 2250, 72, 70, 78, 3, // "Affleck "
68, 2249, 81, 76, 87, 92, 3, // "affinity "
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java 2021-11-19 13:02:18 UTC (rev 12095)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java 2021-11-19 13:17:16 UTC (rev 12096)
@@ -28,6 +28,7 @@
package org.foray.fotree;
+import org.foray.common.FontContext4a;
import org.foray.common.FontUtil;
import org.foray.common.WellKnownConstants;
import org.foray.common.data.AbstractOrderedTreeNode;
@@ -129,7 +130,6 @@
import org.axsl.fo.fo.prop.WritingModePa;
import org.axsl.font.Font;
import org.axsl.font.FontConsumer;
-import org.axsl.font.FontContext;
import org.axsl.font.FontUse;
import org.axsl.graphic.Graphic;
import org.axsl.graphic.GraphicServer;
@@ -3899,7 +3899,7 @@
public int getWidth(final ParaLeaf leaf) {
final FontUse fontUse = resolvePrimaryFont(null);
return fontUse.width(leaf.getText(), 0, leaf.getText().length(), this.traitFontSize(null), 0, 0,
- FontContext.DEFAULT, getWritingSystem());
+ FontContext4a.DEFAULT, getWritingSystem());
}
@Override
Modified: trunk/foray/foray-graphic/src/main/java/org/foray/graphic/batik/PdfTextPainter.java
===================================================================
--- trunk/foray/foray-graphic/src/main/java/org/foray/graphic/batik/PdfTextPainter.java 2021-11-19 13:02:18 UTC (rev 12095)
+++ trunk/foray/foray-graphic/src/main/java/org/foray/graphic/batik/PdfTextPainter.java 2021-11-19 13:17:16 UTC (rev 12096)
@@ -33,12 +33,12 @@
package org.foray.graphic.batik;
+import org.foray.common.FontContext4a;
import org.foray.common.WellKnownConstants;
import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.Font;
import org.axsl.font.FontConsumer;
-import org.axsl.font.FontContext;
import org.axsl.font.FontException;
import org.axsl.font.FontUtility;
@@ -284,7 +284,7 @@
final int letterSpacing = 0;
/* TODO: Pass the font options below instead of hard-coding them. */
final int width = fontToUse.width(txt, 0, txt.length(),
- awtFontSize * WellKnownConstants.MILLIPOINTS_PER_POINT, letterSpacing, 0, FontContext.DEFAULT,
+ awtFontSize * WellKnownConstants.MILLIPOINTS_PER_POINT, letterSpacing, 0, FontContext4a.DEFAULT,
this.writingSystem);
final float advance = WellKnownConstants.millipointsToPoints(width);
float tx = 0;
Modified: trunk/foray/foray-pdf/src/main/java/org/foray/pdf/util/PdfGraphics2D.java
===================================================================
--- trunk/foray/foray-pdf/src/main/java/org/foray/pdf/util/PdfGraphics2D.java 2021-11-19 13:02:18 UTC (rev 12095)
+++ trunk/foray/foray-pdf/src/main/java/org/foray/pdf/util/PdfGraphics2D.java 2021-11-19 13:17:16 UTC (rev 12096)
@@ -34,6 +34,7 @@
package org.foray.pdf.util;
import org.foray.common.CharacterOutputStream;
+import org.foray.common.FontContext4a;
import org.foray.common.Gradient4a;
import org.foray.common.WellKnownConstants;
import org.foray.common.ps.PsColor;
@@ -43,7 +44,6 @@
import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.FontConsumer;
-import org.axsl.font.FontContext;
import org.axsl.font.FontUse;
import org.axsl.font.FontUtility;
import org.axsl.graphic.output.GraphicOutputContext;
@@ -680,7 +680,7 @@
this.write(matrixString + "cm");
this.write("1 0 0 -1 0 0 Tm ");
- final CharSequence outputString = font.textToPdf(s, FontContext.DEFAULT, this.writingSystem);
+ final CharSequence outputString = font.textToPdf(s, FontContext4a.DEFAULT, this.writingSystem);
this.write(outputString);
this.write("ET");
Modified: trunk/foray/foray-pdf/src/test/java/org/foray/pdf/object/PdfDocumentTests.java
===================================================================
--- trunk/foray/foray-pdf/src/test/java/org/foray/pdf/object/PdfDocumentTests.java 2021-11-19 13:02:18 UTC (rev 12095)
+++ trunk/foray/foray-pdf/src/test/java/org/foray/pdf/object/PdfDocumentTests.java 2021-11-19 13:17:16 UTC (rev 12096)
@@ -28,6 +28,7 @@
package org.foray.pdf.object;
+import org.foray.common.FontContext4a;
import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.sequence.ByteArray;
import org.foray.common.sequence.ByteArrayBuilder;
@@ -43,7 +44,6 @@
import org.foray.ps.encode.EncodingStandard;
import org.axsl.font.Font;
-import org.axsl.font.FontContext;
import org.axsl.font.FontException;
import org.axsl.font.FontUse;
import org.axsl.pdf.PdfException;
@@ -102,7 +102,7 @@
contentStream.setCursor(36, heightPoints - 36);
contentStream.openTextObject();
- contentStream.drawText("Hello World!", FontContext.DEFAULT, WritingSystem4a.USA);
+ contentStream.drawText("Hello World!", FontContext4a.DEFAULT, WritingSystem4a.USA);
contentStream.closeTextObject();
contentStream.close();
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-19 13:02:20
|
Revision: 12095
http://sourceforge.net/p/foray/code/12095
Author: victormote
Date: 2021-11-19 13:02:18 +0000 (Fri, 19 Nov 2021)
Log Message:
-----------
Rename FOray implementation, for consistency.
Modified Paths:
--------------
trunk/foray/foray-app/src/test/java/org/foray/app/area/TestBlock.java
trunk/foray/foray-font/src/test/java/org/foray/font/Font4aTests.java
Added Paths:
-----------
trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java
Removed Paths:
-------------
trunk/foray/foray-common/src/main/java/org/foray/common/FontOptions4a.java
Modified: trunk/foray/foray-app/src/test/java/org/foray/app/area/TestBlock.java
===================================================================
--- trunk/foray/foray-app/src/test/java/org/foray/app/area/TestBlock.java 2021-11-19 12:53:31 UTC (rev 12094)
+++ trunk/foray/foray-app/src/test/java/org/foray/app/area/TestBlock.java 2021-11-19 13:02:18 UTC (rev 12095)
@@ -36,7 +36,7 @@
import org.foray.area.NormalFlowRa;
import org.foray.area.PageCollection;
import org.foray.area.TextArea;
-import org.foray.common.FontOptions4a;
+import org.foray.common.FontContext4a;
import org.foray.common.i18n.WritingSystem4a;
import org.foray.core.ForayException;
@@ -54,7 +54,7 @@
public class TestBlock extends AbstractAreaTreeTest {
/** The font options. */
- private FontOptions4a fontOptionsWithKerning;
+ private FontContext4a fontOptionsWithKerning;
/**
* Setup the fixtures needed by the test.
@@ -61,7 +61,7 @@
*/
@Before
public void setup() {
- this.fontOptionsWithKerning = new FontOptions4a();
+ this.fontOptionsWithKerning = new FontContext4a();
this.fontOptionsWithKerning.setKerning(true);
}
Copied: trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java (from rev 12094, trunk/foray/foray-common/src/main/java/org/foray/common/FontOptions4a.java)
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java (rev 0)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/FontContext4a.java 2021-11-19 13:02:18 UTC (rev 12095)
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2017 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.common;
+
+import org.axsl.font.FontContext;
+
+/**
+ * FOray implementation of {@link FontContext}.
+ */
+public class FontContext4a implements FontContext {
+
+ /** Indicates whether kerning should be used. */
+ private boolean isKerning;
+
+ /**
+ * Default constructor.
+ */
+ public FontContext4a() {
+ this.isKerning = true;
+ }
+
+ @Override
+ public boolean isKerning() {
+ return this.isKerning;
+ }
+
+ /**
+ * Sets the isKerning value.
+ * @param isKerning The new isKerning value.
+ */
+ public void setKerning(final boolean isKerning) {
+ this.isKerning = isKerning;
+ }
+
+ @Override
+ public boolean isFeatureActive(final String feature) {
+ return true;
+ }
+
+ @Override
+ public boolean anySubsitutionActive() {
+ return true;
+ }
+
+}
Deleted: trunk/foray/foray-common/src/main/java/org/foray/common/FontOptions4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/FontOptions4a.java 2021-11-19 12:53:31 UTC (rev 12094)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/FontOptions4a.java 2021-11-19 13:02:18 UTC (rev 12095)
@@ -1,71 +0,0 @@
-/*
- * Copyright 2017 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-package org.foray.common;
-
-import org.axsl.font.FontContext;
-
-/**
- * FOray implementation of {@link FontContext}.
- */
-public class FontOptions4a implements FontContext {
-
- /** Indicates whether kerning should be used. */
- private boolean isKerning;
-
- /**
- * Default constructor.
- */
- public FontOptions4a() {
- this.isKerning = true;
- }
-
- @Override
- public boolean isKerning() {
- return this.isKerning;
- }
-
- /**
- * Sets the isKerning value.
- * @param isKerning The new isKerning value.
- */
- public void setKerning(final boolean isKerning) {
- this.isKerning = isKerning;
- }
-
- @Override
- public boolean isFeatureActive(final String feature) {
- return true;
- }
-
- @Override
- public boolean anySubsitutionActive() {
- return true;
- }
-
-}
Modified: trunk/foray/foray-font/src/test/java/org/foray/font/Font4aTests.java
===================================================================
--- trunk/foray/foray-font/src/test/java/org/foray/font/Font4aTests.java 2021-11-19 12:53:31 UTC (rev 12094)
+++ trunk/foray/foray-font/src/test/java/org/foray/font/Font4aTests.java 2021-11-19 13:02:18 UTC (rev 12095)
@@ -28,7 +28,7 @@
package org.foray.font;
-import org.foray.common.FontOptions4a;
+import org.foray.common.FontContext4a;
import org.foray.common.i18n.WritingSystem4a;
import org.foray.font.config.RegisteredFont;
@@ -79,9 +79,9 @@
*/
@Test
public void testWidth() throws FontException {
- final FontOptions4a fontOptionsWithKerning = new FontOptions4a();
+ final FontContext4a fontOptionsWithKerning = new FontContext4a();
fontOptionsWithKerning.setKerning(true);
- final FontOptions4a fontOptionsWithoutKerning = new FontOptions4a();
+ final FontContext4a fontOptionsWithoutKerning = new FontContext4a();
fontOptionsWithoutKerning.setKerning(false);
final FontServer4a server = FontServer4aTests.getServer();
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-19 12:53:35
|
Revision: 12094
http://sourceforge.net/p/foray/code/12094
Author: victormote
Date: 2021-11-19 12:53:31 +0000 (Fri, 19 Nov 2021)
Log Message:
-----------
Conform to aXSL change: Rename FontOptions to FontContext, for clarity.
Modified Paths:
--------------
trunk/foray/foray-areatree/src/main/java/org/foray/area/AbstractAncestralInlineArea.java
trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberArea.java
trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationArea.java
trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationLastArea.java
trunk/foray/foray-areatree/src/main/java/org/foray/area/ScalingValueCitationArea.java
trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java
trunk/foray/foray-common/src/main/java/org/foray/common/FontOptions4a.java
trunk/foray/foray-common/src/main/java/org/foray/common/para/ParaConfig4a.java
trunk/foray/foray-font/src/main/java/org/foray/font/Font4a.java
trunk/foray/foray-font/src/main/java/org/foray/font/FontUse4a.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubSubtable.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx01x01.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx01x02.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx02x01.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx03x01.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx04x01.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx05x01.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx05x02.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx05x03.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx06x01.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx06x02.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx06x03.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx07x01.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx08x01.java
trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/TtfTableGsub.java
trunk/foray/foray-font/src/main/java/org/foray/font/util/MockFont.java
trunk/foray/foray-font/src/test/java/org/foray/font/format/ttf/OtfLookupGsubTests.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoLineText.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java
trunk/foray/foray-graphic/src/main/java/org/foray/graphic/batik/PdfTextPainter.java
trunk/foray/foray-linebreak/src/main/java/org/foray/text/line/LineBreaker.java
trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfContentStream4a.java
trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfFont4a.java
trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfString.java
trunk/foray/foray-pdf/src/main/java/org/foray/pdf/util/PdfGraphics2D.java
trunk/foray/foray-pdf/src/test/java/org/foray/pdf/object/PdfDocumentTests.java
trunk/foray/foray-render/src/main/java/org/foray/render/pdf/PdfRenderer.java
trunk/foray/foray-render/src/main/java/org/foray/render/ps/PsRenderer.java
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/AbstractAncestralInlineArea.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/AbstractAncestralInlineArea.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/AbstractAncestralInlineArea.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -43,7 +43,7 @@
import org.axsl.fo.fo.PageNumberCitation;
import org.axsl.fo.fo.PageNumberCitationLast;
import org.axsl.fo.fo.ScalingValueCitation;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import java.util.ArrayList;
import java.util.List;
@@ -211,9 +211,9 @@
* Returns the font options.
* @return The font options.
*/
- public FontOptions fontOptions() {
+ public FontContext fontContext() {
/* TODO: Get this from the FOTree. */
- return FontOptions.DEFAULT;
+ return FontContext.DEFAULT;
}
}
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberArea.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberArea.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberArea.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -239,7 +239,7 @@
final FontUse fontUse = getPrimaryFont();
fontUse.registerCharsUsed(word);
return fontUse.width(word, 0, word.length(), traitFontSize(), traitGeneratedBy().traitLetterSpacingOpt(this),
- traitGeneratedBy().traitWordSpacingOpt(this), fontOptions(), writingSystem());
+ traitGeneratedBy().traitWordSpacingOpt(this), fontContext(), writingSystem());
}
@Override
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationArea.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationArea.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationArea.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -266,7 +266,7 @@
final FontUse fontUse = getPrimaryFont();
fontUse.registerCharsUsed(word);
return fontUse.width(word, 0, word.length(), traitFontSize(), traitGeneratedBy().traitLetterSpacingOpt(this),
- traitGeneratedBy().traitWordSpacingOpt(this), fontOptions(), writingSystem());
+ traitGeneratedBy().traitWordSpacingOpt(this), fontContext(), writingSystem());
}
@Override
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationLastArea.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationLastArea.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/PageNumberCitationLastArea.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -267,7 +267,7 @@
final FontUse fontUse = getPrimaryFont();
fontUse.registerCharsUsed(word);
return fontUse.width(word, 0, word.length(), traitFontSize(), traitGeneratedBy().traitLetterSpacingOpt(this),
- traitGeneratedBy().traitWordSpacingOpt(this), fontOptions(), writingSystem());
+ traitGeneratedBy().traitWordSpacingOpt(this), fontContext(), writingSystem());
}
@Override
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/ScalingValueCitationArea.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/ScalingValueCitationArea.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/ScalingValueCitationArea.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -32,7 +32,7 @@
import org.axsl.fo.Fo;
import org.axsl.fo.fo.GraftingPoint;
import org.axsl.fo.fo.ScalingValueCitation;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import org.axsl.font.FontUse;
import org.axsl.galley.GalleyVisitorException;
import org.axsl.galley.RenderVisitor;
@@ -250,7 +250,7 @@
final FontUse fontUse = getPrimaryFont();
fontUse.registerCharsUsed(word);
return fontUse.width(word, 0, word.length(), traitFontSize(), traitGeneratedBy().traitLetterSpacingOpt(this),
- traitGeneratedBy().traitWordSpacingOpt(this), fontOptions(), writingSystem());
+ traitGeneratedBy().traitWordSpacingOpt(this), fontContext(), writingSystem());
}
@Override
@@ -313,9 +313,9 @@
}
@Override
- public FontOptions fontOptions() {
+ public FontContext fontContext() {
/* TODO: Get this from the FOTree. */
- return FontOptions.DEFAULT;
+ return FontContext.DEFAULT;
}
}
Modified: trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java
===================================================================
--- trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-areatree/src/main/java/org/foray/area/TextArea.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -38,7 +38,7 @@
import org.axsl.fo.fo.Block;
import org.axsl.fo.fo.CharacterSequence;
import org.axsl.fo.fo.GraftingPoint;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import org.axsl.font.FontUse;
import org.axsl.galley.GalleyVisitorException;
import org.axsl.galley.RenderVisitor;
@@ -611,7 +611,7 @@
private int recomputeProgressionDimension() {
final CharSequence text = getText();
final int pd = getPrimaryFont().width(text, 0, text.length(), traitFontSize(), traitLetterSpacingOpt(),
- traitWordSpacingOpt(), fontOptions(), writingSystem());
+ traitWordSpacingOpt(), fontContext(), writingSystem());
return pd;
}
@@ -671,9 +671,9 @@
}
@Override
- public FontOptions fontOptions() {
+ public FontContext fontContext() {
/* TODO: Get this from the FOTree. */
- return FontOptions.DEFAULT;
+ return FontContext.DEFAULT;
}
@Override
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/FontOptions4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/FontOptions4a.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/FontOptions4a.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -28,12 +28,12 @@
package org.foray.common;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
/**
- * FOray implementation of {@link FontOptions}.
+ * FOray implementation of {@link FontContext}.
*/
-public class FontOptions4a implements FontOptions {
+public class FontOptions4a implements FontContext {
/** Indicates whether kerning should be used. */
private boolean isKerning;
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/para/ParaConfig4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/para/ParaConfig4a.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/para/ParaConfig4a.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -32,7 +32,7 @@
import org.axsl.common.para.ParaConfig;
import org.axsl.common.para.ParaGlue;
import org.axsl.common.para.ParaLeaf;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import org.axsl.font.FontUse;
/**
@@ -46,8 +46,8 @@
/** The size, in milliponts, of the font. */
private int fontSize;
- /** Options indicating how the font should use its features. */
- private FontOptions fontOptions;
+ /** Context indicating how the font should use its features. */
+ private FontContext fontContext;
/** The size, in millipoints, of any stretchability. */
private int stretchability;
@@ -80,7 +80,7 @@
@Override
public int getWidth(final ParaLeaf leaf) {
- return this.fontUse.width(leaf.getText(), 0, leaf.getText().length(), fontSize, 0, 0, fontOptions,
+ return this.fontUse.width(leaf.getText(), 0, leaf.getText().length(), fontSize, 0, 0, fontContext,
orthography);
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/Font4a.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/Font4a.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/Font4a.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -34,7 +34,7 @@
import org.axsl.common.i18n.WritingSystem;
import org.axsl.common.sequence.IntSequence;
import org.axsl.font.Font;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import org.axsl.ps.CharSet;
import org.slf4j.Logger;
@@ -176,9 +176,9 @@
@Override
public int width(final IntSequence word, final int offset, final int length, final int fontSize,
- final int letterSpacing, final int wordSpacing, final FontOptions requestedFontOptions,
+ final int letterSpacing, final int wordSpacing, final FontContext requestedFontContext,
final WritingSystem orthography) {
- final FontOptions fontOptions = requestedFontOptions == null ? FontOptions.DEFAULT : requestedFontOptions;
+ final FontContext fontContext = requestedFontContext == null ? FontContext.DEFAULT : requestedFontContext;
if (word == null) {
return 0;
}
@@ -193,7 +193,7 @@
width += charWidth;
}
// Add any kerning.
- if (fontOptions.isKerning()
+ if (fontContext.isKerning()
&& i < word.length() - 1) {
final int kerning = kern(word.intAt(i), word.intAt(i + 1));
/* Kerning is computed in 1/1000 of a text-space unit, regardless of how the value
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/FontUse4a.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/FontUse4a.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/FontUse4a.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -43,8 +43,8 @@
import org.axsl.common.sequence.IntSequence;
import org.axsl.font.Font;
import org.axsl.font.FontConsumer;
+import org.axsl.font.FontContext;
import org.axsl.font.FontException;
-import org.axsl.font.FontOptions;
import org.axsl.font.FontServer;
import org.axsl.font.FontUse;
import org.axsl.ps.CharSet;
@@ -140,7 +140,7 @@
@Override
public IntArrayBuilder encode(final CharSequence chars, final int offset, final int length,
- final FontOptions options, final WritingSystem orthography) {
+ final FontContext fontContext, final WritingSystem orthography) {
/* Convert the chars to code points. */
/* Some substitutions add glyphs, so make the output a bit bigger than the input to avoid array copying. */
final IntArrayBuilder codePoints = CharSequenceUtils.toCodepoints(chars, offset, length, 2);
@@ -170,11 +170,11 @@
}
if (gsub == null
- || ! options.anySubsitutionActive()) {
+ || ! fontContext.anySubsitutionActive()) {
return glyphIndexes;
}
- gsub.makeSubstitutions(glyphIndexes, options, orthography);
+ gsub.makeSubstitutions(glyphIndexes, fontContext, orthography);
return glyphIndexes;
}
@@ -194,7 +194,7 @@
@Override
public int width(final CharSequence chars, final int offset, final int length, final int fontSize,
- final int letterSpacing, final int wordSpacing, final FontOptions options,
+ final int letterSpacing, final int wordSpacing, final FontContext fontContext,
final WritingSystem orthography) {
final IntArrayBuilder metricIndexes = new IntArrayBuilder(chars.length());
for (int i = 0; i < chars.length(); i ++) {
@@ -205,7 +205,7 @@
metricIndexes.append(metricIndex);
}
}
- return getFont().width(metricIndexes, offset, length, fontSize, letterSpacing, wordSpacing, options,
+ return getFont().width(metricIndexes, offset, length, fontSize, letterSpacing, wordSpacing, fontContext,
orthography);
}
@@ -579,10 +579,10 @@
}
@Override
- public String textToPdf(final CharSequence theString, final FontOptions fontOptions,
+ public String textToPdf(final CharSequence theString, final FontContext fontContext,
final WritingSystem orthography) {
final Font font = getFOrayFont();
- final IntSequence glyphIndexes = encode(theString, 0, theString.length(), fontOptions, orthography);
+ final IntSequence glyphIndexes = encode(theString, 0, theString.length(), fontContext, orthography);
final StringBuilder buffer = new StringBuilder();
buffer.append("[");
buffer.append(startTextDelimiter(font));
@@ -590,7 +590,7 @@
final int glyphIndex = glyphIndexes.intAt(i);
addCharToBuffer(buffer, glyphIndex, font);
if (i + 1 < theString.length()
- && fontOptions.isKerning()) {
+ && fontContext.isKerning()) {
addKerning(font, encode(theString.charAt(i)), encode(theString.charAt(i + 1)), buffer);
}
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubSubtable.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubSubtable.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubSubtable.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -30,7 +30,7 @@
import org.foray.common.sequence.IntArrayBuilder;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
/**
* Abstract superclass for all GSUB subtables.
@@ -39,10 +39,12 @@
/**
* Process the substitution for this subtable.
+ * Looks through the possible substitution in this font, and for those that the client wants, converts the glyph
+ * indexes for those subsitutions.
* @param glyphIndexes The glyph indexes for which substitutions are being considered.
- * @param options The font options, such as ligatures and small-caps that should be used when computing the glyphs
- * for this word.
+ * @param fontContext The font context, such as ligatures and small-caps that should be used when computing the
+ * glyphs for this word.
*/
- abstract void makeSubstitutions(IntArrayBuilder glyphIndexes, FontOptions options);
+ abstract void makeSubstitutions(IntArrayBuilder glyphIndexes, FontContext fontContext);
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx01x01.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx01x01.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx01x01.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -31,7 +31,7 @@
import org.foray.common.io.RandomAccessInput;
import org.foray.common.sequence.IntArrayBuilder;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import java.io.IOException;
@@ -60,7 +60,7 @@
}
@Override
- void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontOptions options) {
+ void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontContext fontContext) {
/* TODO: Implement this. */
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx01x02.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx01x02.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx01x02.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -31,7 +31,7 @@
import org.foray.common.io.RandomAccessInput;
import org.foray.common.sequence.IntArrayBuilder;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import java.io.IOException;
@@ -60,7 +60,7 @@
}
@Override
- void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontOptions options) {
+ void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontContext fontContext) {
/* TODO: Implement this. */
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx02x01.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx02x01.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx02x01.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -31,7 +31,7 @@
import org.foray.common.io.RandomAccessInput;
import org.foray.common.sequence.IntArrayBuilder;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import java.io.IOException;
@@ -60,7 +60,7 @@
}
@Override
- void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontOptions options) {
+ void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontContext fontContext) {
/* TODO: Implement this. */
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx03x01.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx03x01.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx03x01.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -31,7 +31,7 @@
import org.foray.common.io.RandomAccessInput;
import org.foray.common.sequence.IntArrayBuilder;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import java.io.IOException;
@@ -60,7 +60,7 @@
}
@Override
- void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontOptions options) {
+ void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontContext fontContext) {
/* TODO: Implement this. */
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx04x01.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx04x01.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx04x01.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -31,7 +31,7 @@
import org.foray.common.io.RandomAccessInput;
import org.foray.common.sequence.IntArrayBuilder;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import java.io.IOException;
@@ -229,14 +229,8 @@
return this.ligatureSets;
}
- /**
- * Looks through the possible substitution in this font, and for those that the client wants, converts the glyph
- * indexes for those subsitutions.
- * @param glyphIndexes The glyph indexes for which substitutions are being considered.
- * @param options The font options, such as ligatures and small-caps that should be used when computing the glyphs
- * for this word.
- */
- public void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontOptions options) {
+ @Override
+ public void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontContext fontContext) {
final OtfCoverage coverage = getCoverage();
final LigatureSet[] ligatureSets = getLigatureSets();
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx05x01.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx05x01.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx05x01.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -31,7 +31,7 @@
import org.foray.common.io.RandomAccessInput;
import org.foray.common.sequence.IntArrayBuilder;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import java.io.IOException;
@@ -60,7 +60,7 @@
}
@Override
- void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontOptions options) {
+ void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontContext fontContext) {
/* TODO: Implement this. */
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx05x02.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx05x02.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx05x02.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -31,7 +31,7 @@
import org.foray.common.io.RandomAccessInput;
import org.foray.common.sequence.IntArrayBuilder;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import java.io.IOException;
@@ -60,7 +60,7 @@
}
@Override
- void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontOptions options) {
+ void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontContext fontContext) {
/* TODO: Implement this. */
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx05x03.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx05x03.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx05x03.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -31,7 +31,7 @@
import org.foray.common.io.RandomAccessInput;
import org.foray.common.sequence.IntArrayBuilder;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import java.io.IOException;
@@ -60,7 +60,7 @@
}
@Override
- void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontOptions options) {
+ void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontContext fontContext) {
/* TODO: Implement this. */
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx06x01.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx06x01.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx06x01.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -31,7 +31,7 @@
import org.foray.common.io.RandomAccessInput;
import org.foray.common.sequence.IntArrayBuilder;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import java.io.IOException;
@@ -60,7 +60,7 @@
}
@Override
- void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontOptions options) {
+ void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontContext fontContext) {
/* TODO: Implement this. */
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx06x02.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx06x02.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx06x02.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -31,7 +31,7 @@
import org.foray.common.io.RandomAccessInput;
import org.foray.common.sequence.IntArrayBuilder;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import java.io.IOException;
@@ -60,7 +60,7 @@
}
@Override
- void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontOptions options) {
+ void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontContext fontContext) {
/* TODO: Implement this. */
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx06x03.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx06x03.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx06x03.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -31,7 +31,7 @@
import org.foray.common.io.RandomAccessInput;
import org.foray.common.sequence.IntArrayBuilder;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import java.io.IOException;
@@ -60,7 +60,7 @@
}
@Override
- void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontOptions options) {
+ void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontContext fontContext) {
/* TODO: Implement this. */
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx07x01.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx07x01.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx07x01.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -31,7 +31,7 @@
import org.foray.common.io.RandomAccessInput;
import org.foray.common.sequence.IntArrayBuilder;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import java.io.IOException;
@@ -60,7 +60,7 @@
}
@Override
- void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontOptions options) {
+ void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontContext fontContext) {
/* TODO: Implement this. */
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx08x01.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx08x01.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/OtfLookupGsubx08x01.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -31,7 +31,7 @@
import org.foray.common.io.RandomAccessInput;
import org.foray.common.sequence.IntArrayBuilder;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import java.io.IOException;
@@ -60,7 +60,7 @@
}
@Override
- void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontOptions options) {
+ void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontContext fontContext) {
/* TODO: Implement this. */
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/TtfTableGsub.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/TtfTableGsub.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/format/ttf/TtfTableGsub.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -32,7 +32,7 @@
import org.foray.common.sequence.IntArrayBuilder;
import org.axsl.common.i18n.WritingSystem;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import java.io.IOException;
import java.util.ArrayList;
@@ -135,11 +135,11 @@
* Looks through the possible substitution in this font, and for those that the client wants, converts the glyph
* indexes for those substitutions.
* @param glyphIndexes The glyph indexes for which substitutions are being considered.
- * @param options The font options, such as ligatures and small-caps that should be used when computing the glyphs
+ * @param context The font context, such as ligatures and small-caps that should be used when computing the glyphs
* for this word.
* @param orthography The orthography that should be used when applying subsitutions.
*/
- public void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontOptions options,
+ public void makeSubstitutions(final IntArrayBuilder glyphIndexes, final FontContext context,
final WritingSystem orthography) {
/* "To implement features, a client applies the lookups in the order the lookup definitions occur in the
@@ -157,7 +157,7 @@
final OtfLookupGsubSubtable[] lookupSubtables = lookup.getSubtables();
for (int subtableIndex = 0; subtableIndex < lookupSubtables.length; subtableIndex ++) {
final OtfLookupGsubSubtable lookupSubtable = lookupSubtables[subtableIndex];
- lookupSubtable.makeSubstitutions(glyphIndexes, options);
+ lookupSubtable.makeSubstitutions(glyphIndexes, context);
}
}
}
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/util/MockFont.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/util/MockFont.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/util/MockFont.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -35,7 +35,7 @@
import org.axsl.common.i18n.WritingSystem;
import org.axsl.common.sequence.IntSequence;
import org.axsl.font.Font;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import org.axsl.font.Panose;
import org.axsl.ps.Encoding;
@@ -78,7 +78,7 @@
@Override
public int width(final IntSequence metricIndexes, final int offset, final int length, final int fontSize,
- final int letterSpacing, final int wordSpacing, final FontOptions options,
+ final int letterSpacing, final int wordSpacing, final FontContext options,
final WritingSystem orthography) {
// TODO Auto-generated method stub
return 0;
Modified: trunk/foray/foray-font/src/test/java/org/foray/font/format/ttf/OtfLookupGsubTests.java
===================================================================
--- trunk/foray/foray-font/src/test/java/org/foray/font/format/ttf/OtfLookupGsubTests.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-font/src/test/java/org/foray/font/format/ttf/OtfLookupGsubTests.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -39,8 +39,8 @@
import org.foray.font.format.ttf.OtfLookupGsubx04x01.LigatureSet;
import org.axsl.common.i18n.WritingSystem;
+import org.axsl.font.FontContext;
import org.axsl.font.FontException;
-import org.axsl.font.FontOptions;
import org.axsl.ps.Encoding;
import org.junit.Assert;
@@ -267,7 +267,7 @@
Assert.assertArrayEquals(expectedInitialGlyphIndexes, glyphIndexes.toArray());
final TtfTableGsub gsub = ttfFont.getGsubTable();
- gsub.makeSubstitutions(glyphIndexes, FontOptions.DEFAULT, orthography);
+ gsub.makeSubstitutions(glyphIndexes, FontContext.DEFAULT, orthography);
final int[] expectedFinalGlyphIndexes = new int[] {68, 2249, 81, 76, 87, 92};
Assert.assertArrayEquals(expectedFinalGlyphIndexes, glyphIndexes.toArray());
}
@@ -288,7 +288,7 @@
Assert.assertArrayEquals(expectedInitialGlyphIndexes, glyphIndexes.toArray());
final TtfTableGsub gsub = ttfFont.getGsubTable();
- gsub.makeSubstitutions(glyphIndexes, FontOptions.DEFAULT, orthography);
+ gsub.makeSubstitutions(glyphIndexes, FontContext.DEFAULT, orthography);
final int[] expectedFinalGlyphIndexes = new int[] {68, 2249};
Assert.assertArrayEquals(expectedFinalGlyphIndexes, glyphIndexes.toArray());
}
@@ -320,7 +320,7 @@
Assert.assertArrayEquals(expectedInitialGlyphIndexes, glyphIndexes.toArray());
final TtfTableGsub gsub = ttfFont.getGsubTable();
- gsub.makeSubstitutions(glyphIndexes, FontOptions.DEFAULT, orthography);
+ gsub.makeSubstitutions(glyphIndexes, FontContext.DEFAULT, orthography);
final int[] expectedFinalGlyphIndexes = new int[] {
36, 2250, 72, 70, 78, 3, // "Affleck "
68, 2249, 81, 76, 87, 92, 3, // "affinity "
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoLineText.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoLineText.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoLineText.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -31,7 +31,7 @@
import org.axsl.common.i18n.WritingSystem;
import org.axsl.fo.FoContext;
import org.axsl.fo.fo.GraftingPoint;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import org.axsl.font.FontUse;
import org.axsl.text.line.LineText;
@@ -224,7 +224,7 @@
}
@Override
- public FontOptions inlineFontOptions() {
+ public FontContext inlineFontContext() {
// TODO Auto-generated method stub
return null;
}
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -129,7 +129,7 @@
import org.axsl.fo.fo.prop.WritingModePa;
import org.axsl.font.Font;
import org.axsl.font.FontConsumer;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import org.axsl.font.FontUse;
import org.axsl.graphic.Graphic;
import org.axsl.graphic.GraphicServer;
@@ -3899,7 +3899,7 @@
public int getWidth(final ParaLeaf leaf) {
final FontUse fontUse = resolvePrimaryFont(null);
return fontUse.width(leaf.getText(), 0, leaf.getText().length(), this.traitFontSize(null), 0, 0,
- FontOptions.DEFAULT, getWritingSystem());
+ FontContext.DEFAULT, getWritingSystem());
}
@Override
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -48,7 +48,7 @@
import org.axsl.fo.fo.CharacterSequence;
import org.axsl.fo.fo.GraftingPoint;
import org.axsl.font.Font;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import org.axsl.font.FontServer;
import org.axsl.font.FontUse;
import org.axsl.text.line.LineText;
@@ -879,7 +879,7 @@
}
@Override
- public FontOptions inlineFontOptions() {
+ public FontContext inlineFontContext() {
// TODO Auto-generated method stub
return null;
}
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -42,7 +42,7 @@
import org.axsl.fo.ProxyFactory;
import org.axsl.fo.fo.GraftingPoint;
import org.axsl.font.Font;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import org.axsl.font.FontUse;
import org.axsl.text.line.LineText;
@@ -408,7 +408,7 @@
}
@Override
- public FontOptions inlineFontOptions() {
+ public FontContext inlineFontContext() {
// TODO Auto-generated method stub
return null;
}
Modified: trunk/foray/foray-graphic/src/main/java/org/foray/graphic/batik/PdfTextPainter.java
===================================================================
--- trunk/foray/foray-graphic/src/main/java/org/foray/graphic/batik/PdfTextPainter.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-graphic/src/main/java/org/foray/graphic/batik/PdfTextPainter.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -38,8 +38,8 @@
import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.Font;
import org.axsl.font.FontConsumer;
+import org.axsl.font.FontContext;
import org.axsl.font.FontException;
-import org.axsl.font.FontOptions;
import org.axsl.font.FontUtility;
import org.apache.batik.bridge.Mark;
@@ -284,7 +284,7 @@
final int letterSpacing = 0;
/* TODO: Pass the font options below instead of hard-coding them. */
final int width = fontToUse.width(txt, 0, txt.length(),
- awtFontSize * WellKnownConstants.MILLIPOINTS_PER_POINT, letterSpacing, 0, FontOptions.DEFAULT,
+ awtFontSize * WellKnownConstants.MILLIPOINTS_PER_POINT, letterSpacing, 0, FontContext.DEFAULT,
this.writingSystem);
final float advance = WellKnownConstants.millipointsToPoints(width);
float tx = 0;
Modified: trunk/foray/foray-linebreak/src/main/java/org/foray/text/line/LineBreaker.java
===================================================================
--- trunk/foray/foray-linebreak/src/main/java/org/foray/text/line/LineBreaker.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-linebreak/src/main/java/org/foray/text/line/LineBreaker.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -332,7 +332,7 @@
final FontUse fontUse = lineText.inlinePrimaryFont();
fontUse.registerCharsUsed(word);
return fontUse.width(word, offset, length, lineText.inlineFontSize(), lineText.inlineLetterSpacingOptimum(), 0,
- lineText.inlineFontOptions(), lineText.inlineWritingSystem());
+ lineText.inlineFontContext(), lineText.inlineWritingSystem());
}
/**
Modified: trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfContentStream4a.java
===================================================================
--- trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfContentStream4a.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfContentStream4a.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -33,7 +33,7 @@
import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.FontConsumer;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import org.axsl.graphic.Graphic;
import org.axsl.graphic.GraphicException;
import org.axsl.pdf.PdfColor;
@@ -110,7 +110,7 @@
}
@Override
- public synchronized void drawText(final CharSequence text, final FontOptions fontOptions,
+ public synchronized void drawText(final CharSequence text, final FontContext fontContext,
final WritingSystem orthography) throws PdfException {
if (text.length() < 1) {
return;
@@ -117,7 +117,7 @@
}
openTextObject();
final CharSequence stringOut =
- this.getCurrentGraphicsState().getTextState().getFont().textToPdf(text, fontOptions, orthography);
+ this.getCurrentGraphicsState().getTextState().getFont().textToPdf(text, fontContext, orthography);
write(stringOut);
}
Modified: trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfFont4a.java
===================================================================
--- trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfFont4a.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfFont4a.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -32,7 +32,7 @@
import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.Font;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import org.axsl.font.FontUse;
import org.axsl.pdf.PdfFont;
import org.axsl.ps.Encoding;
@@ -431,10 +431,10 @@
}
@Override
- public CharSequence textToPdf(final CharSequence theString, final FontOptions fontOptions,
+ public CharSequence textToPdf(final CharSequence theString, final FontContext fontContext,
final WritingSystem orthography) {
/* If available, delegate this to the FontUse instance. */
- return this.fsFont == null ? theString : this.fsFont.textToPdf(theString, fontOptions, orthography);
+ return this.fsFont == null ? theString : this.fsFont.textToPdf(theString, fontContext, orthography);
}
@Override
Modified: trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfString.java
===================================================================
--- trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfString.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-pdf/src/main/java/org/foray/pdf/object/PdfString.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -29,7 +29,7 @@
package org.foray.pdf.object;
import org.axsl.common.i18n.WritingSystem;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import org.axsl.pdf.PdfFont;
/**
@@ -49,7 +49,7 @@
/** The font options, such as ligatures and small-caps that should be used when computing the glyphs for this
* word. */
- private FontOptions fontOptions;
+ private FontContext fontContext;
/** The orthography that should be used when applying font features like subsitutions. */
private WritingSystem orthography;
@@ -58,21 +58,21 @@
* Constructor.
* @param string The String being encapsulated.
* @param font The font in which this String should be written.
- * @param fontOptions The font options, such as ligatures and small-caps that should be used when computing the
+ * @param fontContext The font options, such as ligatures and small-caps that should be used when computing the
* glyphs for this word.
* @param orthography The orthography that should be used when applying font features like subsitutions.
*/
- public PdfString(final String string, final PdfFont font, final FontOptions fontOptions,
+ public PdfString(final String string, final PdfFont font, final FontContext fontContext,
final WritingSystem orthography) {
this.theString = string;
this.font = font;
- this.fontOptions = fontOptions;
+ this.fontContext = fontContext;
this.orthography = orthography;
}
@Override
public String toPDF(final PdfDocument4a doc) {
- return this.font.textToPdf(this.theString, this.fontOptions, this.orthography).toString();
+ return this.font.textToPdf(this.theString, this.fontContext, this.orthography).toString();
}
/**
Modified: trunk/foray/foray-pdf/src/main/java/org/foray/pdf/util/PdfGraphics2D.java
===================================================================
--- trunk/foray/foray-pdf/src/main/java/org/foray/pdf/util/PdfGraphics2D.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-pdf/src/main/java/org/foray/pdf/util/PdfGraphics2D.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -43,7 +43,7 @@
import org.axsl.common.i18n.WritingSystem;
import org.axsl.font.FontConsumer;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import org.axsl.font.FontUse;
import org.axsl.font.FontUtility;
import org.axsl.graphic.output.GraphicOutputContext;
@@ -680,7 +680,7 @@
this.write(matrixString + "cm");
this.write("1 0 0 -1 0 0 Tm ");
- final CharSequence outputString = font.textToPdf(s, FontOptions.DEFAULT, this.writingSystem);
+ final CharSequence outputString = font.textToPdf(s, FontContext.DEFAULT, this.writingSystem);
this.write(outputString);
this.write("ET");
Modified: trunk/foray/foray-pdf/src/test/java/org/foray/pdf/object/PdfDocumentTests.java
===================================================================
--- trunk/foray/foray-pdf/src/test/java/org/foray/pdf/object/PdfDocumentTests.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-pdf/src/test/java/org/foray/pdf/object/PdfDocumentTests.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -43,8 +43,8 @@
import org.foray.ps.encode.EncodingStandard;
import org.axsl.font.Font;
+import org.axsl.font.FontContext;
import org.axsl.font.FontException;
-import org.axsl.font.FontOptions;
import org.axsl.font.FontUse;
import org.axsl.pdf.PdfException;
import org.axsl.pdf.PdfFont;
@@ -102,7 +102,7 @@
contentStream.setCursor(36, heightPoints - 36);
contentStream.openTextObject();
- contentStream.drawText("Hello World!", FontOptions.DEFAULT, WritingSystem4a.USA);
+ contentStream.drawText("Hello World!", FontContext.DEFAULT, WritingSystem4a.USA);
contentStream.closeTextObject();
contentStream.close();
Modified: trunk/foray/foray-render/src/main/java/org/foray/render/pdf/PdfRenderer.java
===================================================================
--- trunk/foray/foray-render/src/main/java/org/foray/render/pdf/PdfRenderer.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-render/src/main/java/org/foray/render/pdf/PdfRenderer.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -42,7 +42,7 @@
import org.axsl.common.value.LinkType;
import org.axsl.common.value.RuleStyle;
import org.axsl.font.Font;
-import org.axsl.font.FontOptions;
+import org.axsl.font.FontContext;
import org.axsl.font.FontUse;
import org.axsl.galley.Area;
import org.axsl.galley.AreaNode;
@@ -401,7 +401,7 @@
if (newFont != currentFont) {
/* Font has changed. Write the text so far. */
final int size = i - startIndex;
- paintText(area, currentFont, text, startIndex, size, area.fontOptions(), area.writingSystem());
+ paintText(area, currentFont, text, startIndex, size, area.fontContext(), area.writingSystem());
if (size > 0) {
startIndex = i;
}
@@ -410,7 +410,7 @@
}
if (startIndex < text.length()) {
paintText(area, currentFont, text, startIndex, text.length() - startIndex,
- area.fontOptions(), area.writingSystem());
+ area.fontContext(), area.writingSystem());
}
} catch (final PdfException e) {
throw new GalleyVisitorException(e);
@@ -466,7 +466,7 @@
* @param startIndex The starting index into {@code text} which should
* be rendered.
* @param size The size of {@code text} which should be rendered.
- * @param fontOptions The font options, such as ligatures and small-caps that should be used when computing the
+ * @param fontContext The font options, such as ligatures and small-caps that should be used when computing the
* glyphs for this word.
* @param orthography The orthography that should be used when applying font features like subsitutions.
* @throws PdfException For errors painting the text.
@@ -473,7 +473,7 @@
*/
private void paintText(final TextArea area, final FontUse fontUse,
final CharSequence text, final int startIndex, final int size,
- final FontOptions fontOptions, final WritingSystem orthography) throws PdfException {
+ final FontContext fontContext, final WritingSystem orthography) throws PdfException {
if (size < 1) {
return;
}
@@ -482,7 +482,7 @@
/* Paint the text. */
final CharSequence textToWrite = text.subSequence(startIndex, startIndex + size);
- getContentStream().drawText(textToWrite, fontOptions, orthography);
+ getContentStream().drawText(textToWrite, fontContext, orthography);
}
@Override
Modified: trunk/foray/foray-render/src/main/java/org/foray/render/ps/PsRenderer.java
===================================================================
--- trunk/foray/foray-render/src/main/java/org/foray/render/ps/PsRenderer.java 2021-11-19 11:53:18 UTC (rev 12093)
+++ trunk/foray/foray-render/src/main/java/org/foray/render/ps/PsRenderer.java 2021-11-19 12:53:31 UTC (rev 12094)
@@ -898,7 +898,7 @@
write(moveTo(area));
StringBuilder sb = new StringBuilder();
final IntSequence glyphIndexes =
- area.getPrimaryFont().encode(text, 0, text.length(), area.fontOptions(), area.writingSystem());
+ area.getPrimaryFont().encode(text, 0, text.length(), area.fontContext(), area.writingSystem());
for (int i = 0; i < glyphIndexes.length(); i++) {
final int glyphIndex = glyphIndexes.intAt(i);
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-19 11:53:22
|
Revision: 12093
http://sourceforge.net/p/foray/code/12093
Author: victormote
Date: 2021-11-19 11:53:18 +0000 (Fri, 19 Nov 2021)
Log Message:
-----------
1. Add LDML DTDs for visibility. 2. Improve some doc.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/natural-languages/mah-language.xml
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/package-info.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NaturalLanguage.java
Added Paths:
-----------
trunk/foray/foray-orthography/src/main/schema/
trunk/foray/foray-orthography/src/main/schema/ldml/
trunk/foray/foray-orthography/src/main/schema/ldml/00-readme.txt
trunk/foray/foray-orthography/src/main/schema/ldml/cldrTest.dtd
trunk/foray/foray-orthography/src/main/schema/ldml/ldml.dtd
trunk/foray/foray-orthography/src/main/schema/ldml/ldmlBCP47.dtd
trunk/foray/foray-orthography/src/main/schema/ldml/ldmlICIR.dtd
trunk/foray/foray-orthography/src/main/schema/ldml/ldmlICU.dtd
trunk/foray/foray-orthography/src/main/schema/ldml/ldmlOpenOffice.dtd
trunk/foray/foray-orthography/src/main/schema/ldml/ldmlSupplemental.dtd
trunk/foray/foray-orthography/src/main/schema/ldml/unicode-license.txt
Modified: trunk/foray/foray-orthography/src/main/data/natural-languages/mah-language.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/natural-languages/mah-language.xml 2021-11-19 00:07:19 UTC (rev 12092)
+++ trunk/foray/foray-orthography/src/main/data/natural-languages/mah-language.xml 2021-11-19 11:53:18 UTC (rev 12093)
@@ -7,6 +7,7 @@
<!--
Natural language definition for the Marshallese language.
+TODO: This should probably be converted to the LDML format (see src/main/schema/ldml).
-->
<axsl-natural-language iso-639="mah">
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-19 00:07:19 UTC (rev 12092)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-19 11:53:18 UTC (rev 12093)
@@ -60,6 +60,11 @@
* Segmentation</a>
*/
public abstract class Lexer4a implements Lexer {
+ /*
+ * TODO: After this class was written, I found the following interesting blog post, which should be considered
+ * further:
+ * https://sujitpal.blogspot.com/2008/05/tokenizing-text-with-icu4js.html
+ */
/**
* Enumeration of possible character types, as they relate to word-breaking.
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/package-info.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/package-info.java 2021-11-19 00:07:19 UTC (rev 12092)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/package-info.java 2021-11-19 11:53:18 UTC (rev 12093)
@@ -44,7 +44,7 @@
* word-recognition/spell-checking, and reporting of optional hyphenation points.
* However, these functions are very data- and configuration-dependent.
* Support for any given writing system may be nonexistent or deficient.
- * It also provides some limited part-of-speech data, mostly to disambiguate words with the same characters that have
+ * FOray also provides some limited part-of-speech data, mostly to disambiguate words with the same characters that have
* different hyphenation possibilities depending on part-of-speech.</p>
*
* <p>When defining a writing system, FOray has adopted the naming convention of identifying the components in the
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java 2021-11-19 00:07:19 UTC (rev 12092)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java 2021-11-19 11:53:18 UTC (rev 12093)
@@ -26,11 +26,6 @@
* $LastChangedBy$
*/
-/*
- * Known contributors:
- * Carlos Villegas <ca...@un...> (Original author)
- */
-
package org.foray.orthography.util;
import org.foray.common.AxslDtdUtil;
@@ -59,6 +54,9 @@
* from an XML file.
*/
public class NatLangParser extends SaxParser {
+ /*
+ * TODO: This class should probably be converted to work with the CLDR/LDML (see src/main/schema/ldml).
+ */
/** The natural language instance being parsed. */
private NaturalLanguage nl;
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NaturalLanguage.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NaturalLanguage.java 2021-11-19 00:07:19 UTC (rev 12092)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NaturalLanguage.java 2021-11-19 11:53:18 UTC (rev 12093)
@@ -50,6 +50,9 @@
* other.
*/
public final class NaturalLanguage {
+ /*
+ * TODO: This class should probably be converted to work with the CLDR/LDML (see src/main/schema/ldml).
+ */
/**
* Inner class for storing one grapheme cluster. A grapheme cluster is a
Added: trunk/foray/foray-orthography/src/main/schema/ldml/00-readme.txt
===================================================================
--- trunk/foray/foray-orthography/src/main/schema/ldml/00-readme.txt (rev 0)
+++ trunk/foray/foray-orthography/src/main/schema/ldml/00-readme.txt 2021-11-19 11:53:18 UTC (rev 12093)
@@ -0,0 +1,28 @@
+Except as noted below, the files in this directory were downloaded from the
+following website on November 19, 2021:
+https://www.unicode.org/cldr/dtd/40/
+
+They are the DTDs for release 40 (the current release) of the Unicode Locale
+Data Markup Language (LDML). More information about the uses of the LDML can be
+found at these locations:
+https://www.unicode.org/reports/tr35/
+https://cldr.unicode.org/index/downloads
+
+LDML is a specification of the Unicode CLDR (Common Locale Data Repository):
+https://cldr.unicode.org
+
+ICU provides some services for getting access to at least some of such data:
+https://unicode-org.github.io/icu/userguide/strings/unicodeset.html
+https://unicode-org.github.io/icu/
+
+
+The license file was downloaded here, also November 19, 2021:
+https://github.com/unicode-org/cldr/blob/release-40/unicode-license.txt
+from a link on this page:
+https://github.com/unicode-org/cldr/tree/release-40
+
+
+Victor Mote
+November 19, 2021
+
+# End of memo
Property changes on: trunk/foray/foray-orthography/src/main/schema/ldml/00-readme.txt
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Added: trunk/foray/foray-orthography/src/main/schema/ldml/cldrTest.dtd
===================================================================
--- trunk/foray/foray-orthography/src/main/schema/ldml/cldrTest.dtd (rev 0)
+++ trunk/foray/foray-orthography/src/main/schema/ldml/cldrTest.dtd 2021-11-19 11:53:18 UTC (rev 12093)
@@ -0,0 +1,51 @@
+<!--
+Copyright © 2003-2010 Unicode, Inc. and others. All rights reserved. Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of the Unicode data files and any associated documentation (the "Data Files") or Unicode software and any associated documentation (the "Software") to deal in the Data Files or Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, and/or sell copies of the Data Files or Software, and to permit persons to whom the Data Files or Software are furnished to do so, provided that (a) the above copyright notice(s) and this permission notice appear with all copies of the Data Files or Software, (b) both the above copyright notice(s) and this permission notice appear in associated documentation, and (c) there is clear notice in each modified Data File or in the Software as well as in the documentation associated with the Data File(s) or Software that the data or software has been modified.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall not be used in advertising or otherwise to promote the sale, use or other dealings in these Data Files or Software without prior written authorization of the copyright holder.
+-->
+
+<!ELEMENT cldrTest ((number | date | zoneFields | collation | likelySubtags)*) >
+<!ATTLIST cldrTest version CDATA #REQUIRED>
+<!ATTLIST cldrTest base CDATA #REQUIRED>
+
+<!ELEMENT number (result*) >
+<!ATTLIST number locales CDATA #IMPLIED>
+
+<!ELEMENT date (result*) >
+<!ATTLIST date locales CDATA #IMPLIED>
+
+<!ELEMENT zoneFields (result*) >
+<!ATTLIST zoneFields locales CDATA #IMPLIED>
+
+<!ELEMENT collation (result*) >
+<!ATTLIST collation locales CDATA #IMPLIED>
+
+<!ELEMENT likelySubtags (result*) >
+
+<!ELEMENT result ( #PCDATA ) >
+
+<!-- common result attributes -->
+<!ATTLIST result input CDATA #IMPLIED>
+<!ATTLIST result draft NMTOKENS #IMPLIED> <!-- approved contributed provisional unconfirmed, or any combination -->
+
+<!-- number result attributes -->
+<!ATTLIST result numberType (standard | integer | decimal | percent | scientific) #IMPLIED>
+
+<!-- date result attributes -->
+<!ATTLIST result dateType (none | short | medium | long | full) #IMPLIED>
+<!ATTLIST result timeType (none | short | medium | long | full) #IMPLIED>
+
+<!-- zoneFields result attributes -->
+<!ATTLIST result zone CDATA #IMPLIED>
+<!ATTLIST result date NMTOKEN #IMPLIED>
+<!ATTLIST result field NMTOKEN #IMPLIED>
+<!ATTLIST result parse CDATA #IMPLIED>
+
+<!-- likelySubtags result attributes -->
+<!ATTLIST result add NMTOKEN #IMPLIED>
+<!ATTLIST result remove NMTOKEN #IMPLIED>
+
Property changes on: trunk/foray/foray-orthography/src/main/schema/ldml/cldrTest.dtd
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Added: trunk/foray/foray-orthography/src/main/schema/ldml/ldml.dtd
===================================================================
--- trunk/foray/foray-orthography/src/main/schema/ldml/ldml.dtd (rev 0)
+++ trunk/foray/foray-orthography/src/main/schema/ldml/ldml.dtd 2021-11-19 11:53:18 UTC (rev 12093)
@@ -0,0 +1,3208 @@
+<!--
+Copyright © 1991-2021 Unicode, Inc.
+For terms of use, see http://www.unicode.org/copyright.html
+SPDX-License-Identifier: Unicode-DFS-2016
+CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
+-->
+
+<!ELEMENT ldml ( identity, ( alias | ( fallback*, localeDisplayNames?, layout?, contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, listPatterns?, collations?, posix?, characterLabels?, segmentations?, rbnf?, typographicNames?, annotations?, metadata?, references?, special* ) ) ) >
+<!ATTLIST ldml version CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST ldml draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!-- ######################################################### -->
+
+<!ELEMENT identity ( alias | ( version, generation?, language, script?, territory?, variant?, special* ) ) >
+<!ATTLIST identity draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!-- ######################################################### -->
+<!-- # These elements are common to almost all elements defined -->
+
+<!ELEMENT alias ( special* ) >
+<!ATTLIST alias source NMTOKEN #REQUIRED >
+ <!--@MATCH:literal/locale-->
+ <!--@VALUE-->
+<!ATTLIST alias path CDATA #IMPLIED >
+ <!--@MATCH:regex/\.\..*-->
+ <!--@VALUE-->
+<!ATTLIST alias alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST alias draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT special ANY >
+
+<!ELEMENT version EMPTY >
+<!ATTLIST version number CDATA #REQUIRED >
+ <!--@MATCH:regex/\$Revision.*\$-->
+ <!--@METADATA-->
+<!ATTLIST version cldrVersion CDATA #FIXED "40" >
+ <!--@MATCH:any-->
+ <!--@VALUE-->
+<!ATTLIST version draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT generation EMPTY >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST generation date CDATA #REQUIRED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST generation draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT language ( #PCDATA ) >
+<!ATTLIST language type NMTOKEN #REQUIRED >
+ <!--@MATCH:validity/locale-->
+<!ATTLIST language alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/long, secondary, short, variant, menu-->
+<!ATTLIST language draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED:true, false-->
+<!ATTLIST language references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT script ( #PCDATA ) >
+<!ATTLIST script type NMTOKEN #REQUIRED >
+ <!--@MATCH:validity/script-->
+<!ATTLIST script alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/secondary, short, stand-alone, variant-->
+<!ATTLIST script draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED:true, false-->
+<!ATTLIST script references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT territory ( #PCDATA ) >
+<!ATTLIST territory type NMTOKEN #REQUIRED >
+ <!--@MATCH:validity/region-->
+<!ATTLIST territory alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/short, variant-->
+<!ATTLIST territory draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED:true, false-->
+<!ATTLIST territory references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT variant ( #PCDATA ) >
+<!ATTLIST variant type NMTOKEN #REQUIRED >
+ <!--@MATCH:validity/variant-->
+<!ATTLIST variant alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/secondary, variant-->
+<!ATTLIST variant draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED:true, false-->
+<!ATTLIST variant references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!-- ######################################################### -->
+
+<!ELEMENT fallback ( #PCDATA ) >
+ <!--@DEPRECATED-->
+<!ATTLIST fallback alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST fallback draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST fallback references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT localeDisplayNames ( alias | ( localeDisplayPattern?, languages?, scripts?, territories?, subdivisions?, variants?, keys?, types?, transformNames?, measurementSystemNames?, codePatterns?, special* ) ) >
+<!ATTLIST localeDisplayNames draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT localeDisplayPattern ( alias | ( localePattern*, localeSeparator*, localeKeyTypePattern*, special* ) ) >
+<!ATTLIST localeDisplayPattern alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST localeDisplayPattern draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST localeDisplayPattern references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT localePattern ( #PCDATA ) >
+<!ATTLIST localePattern alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST localePattern draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST localePattern references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT localeSeparator ( #PCDATA ) >
+<!ATTLIST localeSeparator alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST localeSeparator draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST localeSeparator references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT localeKeyTypePattern ( #PCDATA ) >
+<!ATTLIST localeKeyTypePattern alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST localeKeyTypePattern draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST localeKeyTypePattern references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!-- # Either 1 alias OR any specials, any order, zero or more language -->
+
+<!ELEMENT languages ( alias | ( language | special )* ) >
+<!ATTLIST languages draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST languages standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST languages references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST languages validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!-- # Either 1 alias OR any specials, any order, zero or more script -->
+
+<!ELEMENT scripts ( alias | ( script | special )* ) >
+<!ATTLIST scripts draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST scripts standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST scripts references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST scripts validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!-- # Either 1 alias OR any specials, any order, zero or more territory -->
+
+<!ELEMENT territories ( alias | ( territory | special )* ) >
+<!ATTLIST territories draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST territories standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST territories references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST territories validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT subdivisions ( alias | ( subdivision | special )* ) >
+<!ATTLIST subdivisions draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST subdivisions references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT subdivision ( #PCDATA ) >
+<!ATTLIST subdivision type NMTOKEN #REQUIRED >
+ <!--@MATCH:or/validity/subdivision||literal/AS, AW, AX, BL, CP, CW, GF, GP, GU, HK, IC, MF, MO, MP, MQ, NC, PF, PM, PR, RE, SX, TA, TF, TW, UM, VI, WF, YT, itsd, no50-->
+<!ATTLIST subdivision alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST subdivision draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+
+<!-- # Either 1 alias OR any specials, any order, zero or more variant -->
+
+<!ELEMENT variants ( alias | ( variant | special )* ) >
+<!ATTLIST variants draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST variants standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST variants references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST variants validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!-- # Either 1 alias OR any specials, any order, zero or more key -->
+
+<!ELEMENT keys ( alias | ( key | special )* ) >
+<!ATTLIST keys draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST keys standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST keys references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST keys validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT key ( #PCDATA ) >
+<!ATTLIST key type NMTOKEN #REQUIRED >
+ <!--@MATCH:or/bcp47/anykey||literal/t-->
+<!ATTLIST key alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST key draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED:true, false-->
+<!ATTLIST key references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!-- # Either 1 alias OR any specials, any order, zero or more type -->
+
+<!ELEMENT types ( alias | ( type | special )* ) >
+<!ATTLIST types draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST types standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST types references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST types validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT type ( #PCDATA ) >
+<!ATTLIST type key NMTOKEN #REQUIRED >
+ <!--@MATCH:bcp47/anykey-->
+<!ATTLIST type type NMTOKEN #REQUIRED >
+ <!--@MATCH:bcp47/anyvalue-->
+<!ATTLIST type alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/short, variant-->
+<!ATTLIST type draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED:true, false-->
+<!ATTLIST type references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT transformNames ( alias | ( transformName | special )* ) >
+ <!--@DEPRECATED-->
+<!ATTLIST transformNames draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST transformNames references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT transformName ( #PCDATA ) >
+ <!--@DEPRECATED-->
+<!ATTLIST transformName type NMTOKEN #REQUIRED >
+ <!--@DEPRECATED-->
+<!ATTLIST transformName alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST transformName draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST transformName references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!-- # Either 1 alias OR any specials, any order, zero or more measurementSystemName -->
+
+<!ELEMENT measurementSystemNames ( alias | ( measurementSystemName | special )* ) >
+<!ATTLIST measurementSystemNames draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST measurementSystemNames references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST measurementSystemNames validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT measurementSystemName ( #PCDATA ) >
+<!ATTLIST measurementSystemName type (US | metric | UK) #REQUIRED >
+<!ATTLIST measurementSystemName alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST measurementSystemName draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED:true, false-->
+<!ATTLIST measurementSystemName references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT codePatterns ( alias | ( codePattern | special )* ) >
+
+<!ELEMENT codePattern ( #PCDATA ) >
+<!ATTLIST codePattern type NMTOKEN #REQUIRED >
+ <!--@MATCH:literal/language, script, territory-->
+<!ATTLIST codePattern alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST codePattern draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST codePattern references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!-- ######################################################### -->
+<!-- # layout and orientation are script specific, so validSublocales attribute is not required -->
+
+<!ELEMENT layout ( alias | ( orientation*, inList*, inText*, special* ) ) >
+<!ATTLIST layout draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST layout references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT orientation ( alias | ( characterOrder*, lineOrder*, special* ) ) >
+<!ATTLIST orientation characters (left-to-right | right-to-left | top-to-bottom | bottom-to-top) #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST orientation lines (left-to-right | right-to-left | top-to-bottom | bottom-to-top) #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST orientation alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST orientation draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST orientation standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST orientation references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT characterOrder ( #PCDATA ) >
+<!ATTLIST characterOrder alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST characterOrder draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT lineOrder ( #PCDATA ) >
+<!ATTLIST lineOrder alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST lineOrder draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT inList ( #PCDATA ) >
+ <!--@DEPRECATED-->
+<!ATTLIST inList casing (titlecase-words | titlecase-firstword | lowercase-words | mixed) #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST inList alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST inList draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST inList references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT inText ( #PCDATA ) >
+ <!--@DEPRECATED-->
+<!ATTLIST inText type (languages | scripts | territories | variants | keys | types | measurementSystemNames | monthWidth | dayWidth | quarterWidth | long | fields | currency) #IMPLIED >
+ <!--@DEPRECATED-->
+<!ATTLIST inText alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST inText draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST inText references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!-- ######################################################### -->
+
+<!ELEMENT contextTransforms ( alias | ( contextTransformUsage*, special* ) ) >
+<!ATTLIST contextTransforms alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST contextTransforms draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST contextTransforms references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST contextTransforms validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT contextTransformUsage ( alias | ( contextTransform*, special* ) ) >
+<!ATTLIST contextTransformUsage type CDATA #REQUIRED >
+ <!--@MATCH:literal/calendar-field, currencyName, day-format-except-narrow, day-standalone-except-narrow, era-abbr, era-name, keyValue, languages, month-format-except-narrow, month-standalone-except-narrow, number-spellout, relative, script, typographicNames-->
+<!ATTLIST contextTransformUsage alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST contextTransformUsage draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST contextTransformUsage references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST contextTransformUsage validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT contextTransform ( #PCDATA ) >
+<!ATTLIST contextTransform type (uiListOrMenu | stand-alone) #REQUIRED >
+<!ATTLIST contextTransform alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST contextTransform draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST contextTransform references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!-- ######################################################### -->
+
+<!ELEMENT characters ( alias | ( exemplarCharacters*, ellipsis*, moreInformation*, stopwords*, indexLabels*, mapping*, parseLenients*, special* ) ) >
+<!ATTLIST characters draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT exemplarCharacters ( #PCDATA | cp )* >
+<!ATTLIST exemplarCharacters type (auxiliary | standard | punctuation | currencySymbol | index | numbers) #IMPLIED >
+ <!--@DEPRECATED:currencySymbol-->
+<!ATTLIST exemplarCharacters alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST exemplarCharacters draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST exemplarCharacters standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST exemplarCharacters references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST exemplarCharacters validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!-- # This element can occur anywhere there may be localizable data -->
+
+<!ELEMENT cp ( special* ) >
+ <!--@DEPRECATED-->
+<!ATTLIST cp hex NMTOKEN #REQUIRED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT ellipsis ( #PCDATA ) >
+<!ATTLIST ellipsis type (initial | medial | final | word-initial | word-medial | word-final) #REQUIRED >
+<!ATTLIST ellipsis alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST ellipsis draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST ellipsis references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT moreInformation ( #PCDATA ) >
+<!ATTLIST moreInformation alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST moreInformation draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST moreInformation references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT stopwords ( stopwordList* ) >
+ <!--@DEPRECATED-->
+
+<!ELEMENT stopwordList ( #PCDATA ) >
+ <!--@DEPRECATED-->
+<!ATTLIST stopwordList type NMTOKEN #REQUIRED >
+ <!--@DEPRECATED-->
+<!ATTLIST stopwordList alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST stopwordList draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST stopwordList references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT indexLabels ( indexSeparator*, compressedIndexSeparator*, indexRangePattern*, indexLabelBefore*, indexLabelAfter*, indexLabel* ) >
+ <!--@DEPRECATED-->
+
+<!ELEMENT indexSeparator ( #PCDATA ) >
+ <!--@DEPRECATED-->
+<!ATTLIST indexSeparator alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST indexSeparator draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST indexSeparator references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT compressedIndexSeparator ( #PCDATA ) >
+ <!--@DEPRECATED-->
+<!ATTLIST compressedIndexSeparator alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST compressedIndexSeparator draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST compressedIndexSeparator references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT indexRangePattern ( #PCDATA ) >
+ <!--@DEPRECATED-->
+<!ATTLIST indexRangePattern alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST indexRangePattern draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST indexRangePattern references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT indexLabelBefore ( #PCDATA ) >
+ <!--@DEPRECATED-->
+<!ATTLIST indexLabelBefore alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST indexLabelBefore draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST indexLabelBefore references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT indexLabelAfter ( #PCDATA ) >
+ <!--@DEPRECATED-->
+<!ATTLIST indexLabelAfter alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST indexLabelAfter draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST indexLabelAfter references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT indexLabel ( #PCDATA ) >
+ <!--@DEPRECATED-->
+<!ATTLIST indexLabel indexSource CDATA #IMPLIED >
+ <!--@DEPRECATED-->
+<!ATTLIST indexLabel priority (1 | 2 | 3) #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST indexLabel alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST indexLabel draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST indexLabel references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT mapping ( special* ) >
+ <!--@DEPRECATED-->
+<!ATTLIST mapping registry NMTOKEN #REQUIRED >
+ <!--@DEPRECATED-->
+<!ATTLIST mapping type NMTOKEN #IMPLIED >
+ <!-- use choice instead -->
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST mapping choice NMTOKEN #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST mapping alt NMTOKENS #IMPLIED >
+ <!--@DEPRECATED-->
+<!ATTLIST mapping draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST mapping standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST mapping references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST mapping validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT parseLenients ( alias | ( parseLenient*, special* ) ) >
+<!ATTLIST parseLenients scope (general | number | date) #REQUIRED >
+<!ATTLIST parseLenients level (lenient | stricter) #REQUIRED >
+
+<!ELEMENT parseLenient ( #PCDATA ) >
+<!ATTLIST parseLenient sample CDATA #REQUIRED >
+ <!--@MATCH:any-->
+<!ATTLIST parseLenient alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST parseLenient draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+
+<!-- ######################################################### -->
+
+<!ELEMENT delimiters ( alias | ( quotationStart*, quotationEnd*, alternateQuotationStart*, alternateQuotationEnd*, special* ) ) >
+<!ATTLIST delimiters alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST delimiters draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST delimiters standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST delimiters references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST delimiters validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT quotationStart ( #PCDATA | cp )* >
+<!ATTLIST quotationStart alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST quotationStart draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST quotationStart references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT quotationEnd ( #PCDATA | cp )* >
+<!ATTLIST quotationEnd alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST quotationEnd draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST quotationEnd references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT alternateQuotationStart ( #PCDATA | cp )* >
+<!ATTLIST alternateQuotationStart alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST alternateQuotationStart draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST alternateQuotationStart references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT alternateQuotationEnd ( #PCDATA | cp )* >
+<!ATTLIST alternateQuotationEnd alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST alternateQuotationEnd draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST alternateQuotationEnd references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!-- ######################################################### -->
+
+<!ELEMENT measurement ( alias | ( measurementSystem*, paperSize*, special* ) ) >
+ <!-- use measurementData in supplemental instead -->
+ <!--@DEPRECATED-->
+<!ATTLIST measurement alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST measurement draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST measurement standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST measurement references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST measurement validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT measurementSystem ( special* ) >
+ <!-- use measurementSystem in supplemental instead -->
+ <!--@DEPRECATED-->
+<!ATTLIST measurementSystem type (metric | US | UK) #REQUIRED >
+ <!-- use choice instead -->
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST measurementSystem choice (metric | US | UK) #IMPLIED >
+ <!-- really required, but needs to be optional to support type also -->
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST measurementSystem alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST measurementSystem draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST measurementSystem standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST measurementSystem references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST measurementSystem validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT paperSize ( alias | ( height*, width*, special* ) ) >
+ <!-- use paperSize in supplemental instead -->
+ <!--@DEPRECATED-->
+<!ATTLIST paperSize alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST paperSize draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST paperSize standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST paperSize references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST paperSize validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT height ( #PCDATA ) >
+ <!--@DEPRECATED-->
+<!ATTLIST height alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST height draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST height references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT width ( #PCDATA ) >
+ <!--@DEPRECATED-->
+<!ATTLIST width alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST width draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST width references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!-- ######################################################### -->
+
+<!ELEMENT dates ( alias | ( localizedPatternChars*, dateRangePattern*, calendars?, fields?, timeZoneNames?, special* ) ) >
+<!ATTLIST dates alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST dates draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST dates standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST dates references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST dates validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT localizedPatternChars ( #PCDATA | cp )* >
+ <!--@DEPRECATED-->
+<!ATTLIST localizedPatternChars alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST localizedPatternChars draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST localizedPatternChars standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST localizedPatternChars references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST localizedPatternChars validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT dateRangePattern ( #PCDATA ) >
+ <!-- use intervalFormats. -->
+ <!--@DEPRECATED-->
+<!ATTLIST dateRangePattern alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST dateRangePattern draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST dateRangePattern standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST dateRangePattern references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST dateRangePattern validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT calendars ( alias | ( default*, calendar*, special* ) ) >
+ <!-- use calendarPreferenceData instead of default element -->
+<!ATTLIST calendars draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST calendars validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT default ( special* ) >
+ <!--@DEPRECATED-->
+<!ATTLIST default type NMTOKEN #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST default choice NMTOKEN #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST default alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST default draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST default references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT calendar ( alias | ( months?, monthNames?, monthAbbr?, monthPatterns?, days?, dayNames?, dayAbbr?, quarters?, week?, am*, pm*, dayPeriods?, eras?, cyclicNameSets?, dateFormats?, timeFormats?, dateTimeFormats?, fields*, special* ) ) >
+ <!-- use of fields is deprecated here -->
+<!ATTLIST calendar type NMTOKEN #REQUIRED >
+ <!--@MATCH:bcp47/ca-->
+<!ATTLIST calendar alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST calendar draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST calendar standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST calendar references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST calendar validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT months ( alias | ( default*, monthContext*, special* ) ) >
+<!ATTLIST months alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST months draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST months standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST months references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST months validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT monthContext ( alias | ( default*, monthWidth*, special* ) ) >
+<!ATTLIST monthContext type (format | stand-alone) #REQUIRED >
+<!ATTLIST monthContext alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST monthContext draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST monthContext standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST monthContext references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST monthContext validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT monthWidth ( alias | ( month*, special* ) ) >
+<!ATTLIST monthWidth type (abbreviated | narrow | wide) #REQUIRED >
+<!ATTLIST monthWidth alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST monthWidth draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST monthWidth standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST monthWidth references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST monthWidth validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT month ( #PCDATA | cp )* >
+<!ATTLIST month type (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13) #REQUIRED >
+<!ATTLIST month yeartype (standard | leap) #IMPLIED >
+<!ATTLIST month alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST month draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST month references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT monthNames ( alias | ( month*, special* ) ) >
+ <!--@DEPRECATED-->
+<!ATTLIST monthNames draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT monthAbbr ( alias | ( month*, special* ) ) >
+ <!--@DEPRECATED-->
+<!ATTLIST monthAbbr draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT monthPatterns ( alias | ( monthPatternContext*, special* ) ) >
+<!ATTLIST monthPatterns alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST monthPatterns draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST monthPatterns references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST monthPatterns validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT monthPatternContext ( alias | ( monthPatternWidth*, special* ) ) >
+<!ATTLIST monthPatternContext type (format | stand-alone | numeric) #REQUIRED >
+<!ATTLIST monthPatternContext alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST monthPatternContext draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST monthPatternContext references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST monthPatternContext validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT monthPatternWidth ( alias | ( monthPattern*, special* ) ) >
+<!ATTLIST monthPatternWidth type (abbreviated | narrow | wide | all) #REQUIRED >
+<!ATTLIST monthPatternWidth alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST monthPatternWidth draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST monthPatternWidth references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST monthPatternWidth validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT monthPattern ( #PCDATA ) >
+<!ATTLIST monthPattern type (leap | standardAfterLeap | combined) #REQUIRED >
+<!ATTLIST monthPattern alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST monthPattern draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST monthPattern references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT days ( alias | ( default*, dayContext*, special* ) ) >
+<!ATTLIST days alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST days draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST days standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST days references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST days validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT dayContext ( alias | ( default*, dayWidth*, special* ) ) >
+<!ATTLIST dayContext type (format | stand-alone) #REQUIRED >
+<!ATTLIST dayContext alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST dayContext draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST dayContext standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST dayContext references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST dayContext validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT dayWidth ( alias | ( day*, special* ) ) >
+<!ATTLIST dayWidth type (abbreviated | narrow | short | wide) #REQUIRED >
+<!ATTLIST dayWidth alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST dayWidth draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST dayWidth standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST dayWidth references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST dayWidth validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT day ( #PCDATA ) >
+<!ATTLIST day type (sun | mon | tue | wed | thu | fri | sat) #REQUIRED >
+<!ATTLIST day alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST day draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED:true, false-->
+<!ATTLIST day references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT dayNames ( alias | ( day*, special* ) ) >
+ <!--@DEPRECATED-->
+<!ATTLIST dayNames draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT dayAbbr ( alias | ( day*, special* ) ) >
+ <!--@DEPRECATED-->
+<!ATTLIST dayAbbr draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT quarters ( alias | ( default*, quarterContext*, special* ) ) >
+<!ATTLIST quarters alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST quarters draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST quarters references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST quarters validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT quarterContext ( alias | ( default*, quarterWidth*, special* ) ) >
+<!ATTLIST quarterContext type (format | stand-alone) #REQUIRED >
+<!ATTLIST quarterContext alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST quarterContext draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST quarterContext references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST quarterContext validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT quarterWidth ( alias | ( quarter*, special* ) ) >
+<!ATTLIST quarterWidth type (abbreviated | narrow | wide) #REQUIRED >
+<!ATTLIST quarterWidth alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST quarterWidth draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST quarterWidth references CDATA #IMPLIED >
+ <!--@METADATA-->
+<!ATTLIST quarterWidth validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT quarter ( #PCDATA ) >
+<!ATTLIST quarter type (1 | 2 | 3 | 4) #REQUIRED >
+<!ATTLIST quarter alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+<!ATTLIST quarter draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED:true, false-->
+<!ATTLIST quarter references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT week ( alias | ( minDays*, firstDay*, weekendStart*, weekendEnd*, special* ) ) >
+ <!-- use supplemental weekData -->
+ <!--@DEPRECATED-->
+<!ATTLIST week alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST week draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST week standard CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST week references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST week validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT minDays ( special* ) >
+ <!--@DEPRECATED-->
+<!ATTLIST minDays count (1 | 2 | 3 | 4 | 5 | 6 | 7) #REQUIRED >
+ <!--@DEPRECATED-->
+<!ATTLIST minDays alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST minDays draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST minDays references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT firstDay ( special* ) >
+ <!-- use supplemental data -->
+ <!--@DEPRECATED-->
+<!ATTLIST firstDay day (sun | mon | tue | wed | thu | fri | sat) #REQUIRED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST firstDay alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST firstDay draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST firstDay references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT weekendStart ( special* ) >
+ <!-- use supplemental data -->
+ <!--@DEPRECATED-->
+<!ATTLIST weekendStart day (sun | mon | tue | wed | thu | fri | sat) #REQUIRED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST weekendStart time CDATA "00:00" >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST weekendStart alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST weekendStart draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST weekendStart references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT weekendEnd ( special* ) >
+ <!-- use supplemental data -->
+ <!--@DEPRECATED-->
+<!ATTLIST weekendEnd day (sun | mon | tue | wed | thu | fri | sat) #REQUIRED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST weekendEnd time CDATA "24:00" >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+<!ATTLIST weekendEnd alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST weekendEnd draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST weekendEnd references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT am ( #PCDATA ) >
+ <!-- use dayPeriods -->
+ <!--@DEPRECATED-->
+<!ATTLIST am alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST am draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST am references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST am validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT pm ( #PCDATA ) >
+ <!-- use dayPeriods -->
+ <!--@DEPRECATED-->
+<!ATTLIST pm alt NMTOKENS #IMPLIED >
+ <!--@MATCH:literal/variant-->
+ <!--@DEPRECATED-->
+<!ATTLIST pm draft (approved | contributed | provisional | unconfirmed | true | false) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST pm references CDATA #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST pm validSubLocales CDATA #IMPLIED >
+ <!--@VALUE-->
+ <!--@DEPRECATED-->
+
+<!ELEMENT dayPeriods ( alias | ( dayPeriodContext*, special* ) ) >
+<!ATTLIST dayPeriods draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST dayPeriods references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT dayPeriodContext ( alias | ( dayPeriodWidth*, special* ) ) >
+<!ATTLIST dayPeriodContext type NMTOKEN #REQUIRED >
+ <!--@MATCH:literal/format, stand-alone-->
+<!ATTLIST dayPeriodContext draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST dayPeriodContext references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT dayPeriodWidth ( alias | ( dayPeriod*, special* ) ) >
+<!ATTLIST dayPeriodWidth type (abbreviated | narrow | wide) #REQUIRED >
+<!ATTLIST dayPeriodWidth draft (approved | contributed | provisional | unconfirmed) #IMPLIED >
+ <!--@METADATA-->
+ <!--@DEPRECATED-->
+<!ATTLIST dayPeriodWidth references CDATA #IMPLIED >
+ <!--@METADATA-->
+
+<!ELEMENT dayPeriod ( #PCDATA ) >
+<!ATTLIST dayPeriod type NMTOKEN #REQUIRED >
+ <!--@MATCH:literal/afternoon1, afternoon2, am, evening1, evening2, midnight, morning1, mo...
[truncated message content] |
|
From: <vic...@us...> - 2021-11-18 20:41:10
|
Revision: 12091
http://sourceforge.net/p/foray/code/12091
Author: victormote
Date: 2021-11-18 20:41:08 +0000 (Thu, 18 Nov 2021)
Log Message:
-----------
Conform to change in class package.
Modified Paths:
--------------
trunk/foray/foray-orthography/build.gradle
Modified: trunk/foray/foray-orthography/build.gradle
===================================================================
--- trunk/foray/foray-orthography/build.gradle 2021-11-18 20:39:37 UTC (rev 12090)
+++ trunk/foray/foray-orthography/build.gradle 2021-11-18 20:41:08 UTC (rev 12091)
@@ -28,7 +28,7 @@
outputs.upToDateWhen { false }
description = "Parse and serialize the hyphenation patterns"
classpath = sourceSets.main.runtimeClasspath
- main = 'org.foray.orthography.PatternSerializer'
+ main = 'org.foray.orthography.util.PatternSerializer'
args('--input')
args('src/main/data/hyph-patterns')
args('--output')
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-18 20:39:39
|
Revision: 12090
http://sourceforge.net/p/foray/code/12090
Author: victormote
Date: 2021-11-18 20:39:37 +0000 (Thu, 18 Nov 2021)
Log Message:
-----------
Move more utility classes to util package.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternConsumer.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternTree.java
trunk/foray/master/ide/eclipse/launch-configurations/DictionarySerializer.launch
trunk/foray/master/ide/eclipse/launch-configurations/PatternSerializer.launch
Added Paths:
-----------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionarySerializer.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/PatternGenerator.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/PatternParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/PatternSerializer.java
Removed Paths:
-------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/DictionarySerializer.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternGenerator.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternSerializer.java
Deleted: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/DictionarySerializer.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/DictionarySerializer.java 2021-11-18 20:29:32 UTC (rev 12089)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/DictionarySerializer.java 2021-11-18 20:39:37 UTC (rev 12090)
@@ -1,233 +0,0 @@
-/*
- * Copyright 2019 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-package org.foray.orthography;
-
-import org.foray.common.ForayConstants;
-import org.foray.common.IoUtil;
-import org.foray.orthography.util.DictionaryParser;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.CommandLineParser;
-import org.apache.commons.cli.DefaultParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.Option;
-import org.apache.commons.cli.Options;
-import org.apache.commons.cli.ParseException;
-import org.apache.commons.io.filefilter.IOFileFilter;
-import org.apache.commons.io.filefilter.RegexFileFilter;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.ObjectOutputStream;
-
-/**
- * Parse and serialize dictionary objects so that they can be packaged for runtime use without the need to parse them
- * then.
- */
-public class DictionarySerializer {
-
- /** The directory containing the patterns to be parsed. */
- private File sourceDir;
-
- /** Filter to be applied to files in {@link #sourceDir} to determine which files should be processed. */
- private IOFileFilter fileFilter;
-
- /** The directory in which the serialized output should be placed. */
- private File targetDir;
-
- /** The logger. */
- private Logger logger = LoggerFactory.getLogger(DictionarySerializer.class);;
-
- /**
- * Constructor.
- * @param sourceDir The directory containing the files to be processed.
- * @param fileFilter The filter, if any, that should be applied to the files in @code{sourceDir}.
- * This can be null.
- * @param targetDir The directory into which the output files should be written.
- * @throws IOException For invalid @code{sourceDir} or @code{targetDir}.
- */
- public DictionarySerializer(final File sourceDir, final IOFileFilter fileFilter, final File targetDir)
- throws IOException {
- if (! sourceDir.exists()
- || ! sourceDir.isDirectory()) {
- throw new IOException("Source directory does not exist or is not a directory: "
- + sourceDir.getAbsolutePath());
- }
- if (targetDir.exists()) {
- if (! targetDir.isDirectory()) {
- throw new IOException("Specified target is not a directory: " + targetDir.getAbsolutePath());
- }
- } else {
- targetDir.mkdirs();
- }
-
- this.sourceDir = sourceDir;
- this.fileFilter = fileFilter;
- this.targetDir = targetDir;
- }
-
- /**
- * Serialize the selected files.
- */
- public void process() {
- final String[] filesToProcess = this.sourceDir.list(this.fileFilter);
- for (int i = 0; i < filesToProcess.length; i++) {
- final String filename = filesToProcess[i];
- final File infile = new File(this.sourceDir, filename);
- final String baseFilename = IoUtil.baseFileName(filesToProcess[i]);
- final File outfile = new File(this.targetDir, baseFilename + "."
- + ForayConstants.BINARY_SERIALIZATION_EXTENSION);
- buildDictionary(infile, outfile);
- }
- }
-
- /**
- * Parses an input file, creates a {@link SegmentDictionary} instance for it, and then serializes the instance to an
- * output file.
- * @param infile The input file (word list).
- * @param outfile The output file (serialized POJO).
- */
- private void buildDictionary(final File infile, final File outfile) {
- this.logger.info("Processing " + infile);
- final DictionaryParser parser = new DictionaryParser();
- SegmentDictionary dictionary = null;
-
- /* Parse the dictionary. */
- try {
- final InputStream inputStream = infile.toURI().toURL().openStream();
- dictionary = parser.parse(inputStream, infile.getAbsolutePath());
- } catch (final IOException e) {
- this.logger.error(e.getMessage());
- }
-
- /* Serialize the dictionary. */
- ObjectOutputStream out = null;
- try {
- out = new ObjectOutputStream(new FileOutputStream(outfile));
- out.writeObject(dictionary);
- } catch (final IOException ioe) {
- this.logger.error("Can't write compiled dictionary file: " + outfile);
- this.logger.error(ioe.toString());
- } finally {
- if (out != null) {
- try {
- out.close();
- } catch (final IOException e) {
- /* Ignore. */
- }
- }
- }
- }
-
- /**
- * Returns the command-line options for the {@link #main(String[])} method.
- * @return Command-line options.
- */
- static Options getCommandLineOptions() {
- final Options clOptions = new Options();
-
- /* Input directory. */
- final Option inputDir = new Option("i", "input", true, "path to the input directory");
- inputDir.setRequired(true);
- inputDir.setArgName("input-dir");
- clOptions.addOption(inputDir);
-
- /* Output directory. */
- final Option outputDir = new Option("o", "output", true, "path to the output directory");
- outputDir.setRequired(true);
- outputDir.setArgName("output-dir");
- clOptions.addOption(outputDir);
-
- /* Include pattern. */
- final Option includePattern = new Option("p", "pattern", true,
- "input include pattern (regex)");
- includePattern.setRequired(true);
- includePattern.setArgName("pattern-regex");
- clOptions.addOption(includePattern);
-
- return clOptions;
- }
-
- /**
- * Parses a command-line.
- * @param commandLineOptions The command-line options controlling the parsing.
- * @param args The command-line arguments to be parsed.
- * @return The results of the parsing.
- * @throws ParseException For errors in {@code args}.
- */
- public static CommandLine parseCommandLine(final Options commandLineOptions, final String[] args)
- throws ParseException {
- final CommandLineParser commandLineParser = new DefaultParser();
- final CommandLine parsedCommandLine = commandLineParser.parse(commandLineOptions, args);
-
- return parsedCommandLine;
- }
-
- /**
- * Command-line interface.
- * @param args The command-line arguments.
- */
- public static void main(final String[] args) {
- final Logger logger = LoggerFactory.getLogger(DictionarySerializer.class);
- final Options commandLineOptions = DictionarySerializer.getCommandLineOptions();
- CommandLine parsedCommandLine = null;
- try {
- parsedCommandLine = DictionarySerializer.parseCommandLine(commandLineOptions, args);
- } catch (final ParseException e) {
- logger.error(e.getMessage(), e);
- final HelpFormatter helpFormatter = new HelpFormatter();
- helpFormatter.printHelp("java -cp $FORAY_CLASSPATH " + DictionarySerializer.class.getName(),
- commandLineOptions, true);
- /* CheckStyle: Allow System.exit() in main method. */
- System.exit(1);
- }
-
- final String inputDirString = parsedCommandLine.getOptionValue("input");
- final String outputDirString = parsedCommandLine.getOptionValue("output");
- final String includePattern = parsedCommandLine.getOptionValue("pattern");
-
- final File inputDir = new File(inputDirString);
- final File outputDir = new File(outputDirString);
- final RegexFileFilter fileFilter = new RegexFileFilter(includePattern);
-
- try {
- final DictionarySerializer serializer = new DictionarySerializer(inputDir, fileFilter, outputDir);
- serializer.process();
- } catch (final IOException e) {
- logger.error(e.getMessage(), e);
- /* CheckStyle: Allow System.exit() in main method. */
- System.exit(1);
- }
- }
-
-}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternConsumer.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternConsumer.java 2021-11-18 20:29:32 UTC (rev 12089)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternConsumer.java 2021-11-18 20:39:37 UTC (rev 12090)
@@ -33,6 +33,8 @@
package org.foray.orthography;
+import org.foray.orthography.util.PatternParser;
+
import org.axsl.orthography.OrthographyException;
/**
Deleted: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternGenerator.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternGenerator.java 2021-11-18 20:29:32 UTC (rev 12089)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternGenerator.java 2021-11-18 20:39:37 UTC (rev 12090)
@@ -1,39 +0,0 @@
-/*
- * Copyright 2010 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-package org.foray.orthography;
-
-import org.foray.orthography.zzarchive.PatGen;
-
-/**
- * A refactored/rewritten version of {@link PatGen} to be more object-oriented.
- */
-public class PatternGenerator {
- /* TODO: After the PatGen class is complete and tested, copy its contents here and begin refactoring. */
-
-}
Deleted: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternParser.java 2021-11-18 20:29:32 UTC (rev 12089)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternParser.java 2021-11-18 20:39:37 UTC (rev 12090)
@@ -1,321 +0,0 @@
-/*
- * Copyright 2004 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-/*
- * Known contributors:
- * Carlos Villegas <ca...@un...> (Original author)
- */
-
-package org.foray.orthography;
-
-import org.foray.common.ForayConstants;
-import org.foray.common.xml.SaxParser;
-
-import org.axsl.orthography.OrthographyException;
-
-import org.xml.sax.Attributes;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-import org.xml.sax.SAXParseException;
-import org.xml.sax.XMLReader;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URL;
-
-import javax.xml.parsers.ParserConfigurationException;
-
-/**
- * A SAX document handler to read and parse hyphenation patterns
- * from a XML file.
- */
-public class PatternParser extends SaxParser {
-
- /** Constant indicating that the current element is "classes". */
- static final int ELEM_CLASSES = 1;
-
- /** Constant indicating that the current element is "exceptions". */
- static final int ELEM_EXCEPTIONS = 2;
-
- /** Constant indicating that the current element is "patterns".*/
- static final int ELEM_PATTERNS = 3;
-
- /** Constant indicating that the current element is "hyphen". */
- static final int ELEM_HYPHEN = 4;
-
- /** The SAX parser. */
- private XMLReader parser;
-
- /** The current element being parsed. */
- private int currElement;
-
- /** The PatternConsumer implementation. */
- private PatternConsumer consumer;
-
- /** The current token being parsed. */
- private StringBuilder token;
-
- /** The last error message generated. Used to enhance the messages that
- * are returned with exceptions. */
- private String errMsg;
-
- /**
- * Constructor.
- * @param consumer The pattern consumer implementation that will accept the data created by this parser.
- * @throws OrthographyException For errors during construction.
- */
- public PatternParser(final PatternConsumer consumer) throws OrthographyException {
- if (consumer == null) {
- throw new NullPointerException("PatternConsumer may not be null.");
- }
- this.token = new StringBuilder();
- try {
- this.parser = createSax2Parser();
- } catch (final SAXException | ParserConfigurationException e) {
- throw new OrthographyException(e);
- }
- this.parser.setContentHandler(this);
- this.parser.setErrorHandler(this);
- this.consumer = consumer;
- }
-
- /**
- * Parses a given URL, using the PatternConsumer to consume the data retrieved during the parse.
- * @param url The URL to parse.
- * @throws OrthographyException For errors during parsing.
- */
- public void parse(final URL url) throws OrthographyException {
- InputStream inputStream = null;
- try {
- inputStream = url.openStream();
- } catch (final IOException e) {
- /* This is a normal condition, and just means that the URL to the
- * pattern file does not exist. */
- getLogger().error("Cannot open hyphenation pattern: "
- + url.toString() + ForayConstants.LOG_NEWLINE
- + e.getMessage());
- return;
- }
- parse(inputStream);
- }
-
- /**
- * Parses a given input stream, using the PatternConsumer to consume the data retrieved during the parse.
- * @param inputStream The input stream to parse.
- * @throws OrthographyException For errors during parsing.
- */
- public void parse(final InputStream inputStream) throws OrthographyException {
- final InputSource uri = new InputSource(inputStream);
-
- try {
- this.parser.parse(uri);
- } catch (final SAXException e) {
- throw new OrthographyException(this.errMsg);
- } catch (final IOException e) {
- throw new OrthographyException(e.getMessage());
- } catch (final NullPointerException e) {
- throw new OrthographyException("SAX parser not available");
- }
- }
-
- /**
- * Returns the next token from the buffer.
- * @param chars The buffer which is being parsed.
- * @return The next token.
- */
- protected String readToken(final StringBuilder chars) {
- String word;
- boolean space = false;
- int i;
- for (i = 0; i < chars.length(); i++) {
- if (Character.isWhitespace(chars.charAt(i))) {
- space = true;
- } else {
- break;
- }
- }
- if (space) {
- for (int countr = i; countr < chars.length(); countr++) {
- chars.setCharAt(countr - i, chars.charAt(countr));
- }
- chars.setLength(chars.length() - i);
- if (this.token.length() > 0) {
- word = this.token.toString();
- this.token.setLength(0);
- return word;
- }
- }
- space = false;
- for (i = 0; i < chars.length(); i++) {
- if (Character.isWhitespace(chars.charAt(i))) {
- space = true;
- break;
- }
- }
- this.token.append(chars.toString().substring(0, i));
- for (int countr = i; countr < chars.length(); countr++) {
- chars.setCharAt(countr - i, chars.charAt(countr));
- }
- chars.setLength(chars.length() - i);
- if (space) {
- word = this.token.toString();
- this.token.setLength(0);
- return word;
- }
- this.token.append(chars);
- return null;
- }
-
- @Override
- public void startElement(final String uri, final String local,
- final String raw, final Attributes attrs) {
- if (local.equals("hyphen-char")) {
- final String h = attrs.getValue("value");
- if (h != null && h.length() == 1) {
- this.consumer.setHyphenChar(h.charAt(0));
- }
- } else if (local.equals("classes")) {
- this.currElement = PatternParser.ELEM_CLASSES;
- } else if (local.equals("patterns")) {
- this.currElement = PatternParser.ELEM_PATTERNS;
- } else if (local.equals("exceptions")) {
- this.currElement = PatternParser.ELEM_EXCEPTIONS;
- } else if (local.equals("hyphen")) {
- if (this.token.length() > 0) {
- this.consumer.addException(this.token.toString(), 0);
- }
- try {
- this.consumer.addMorphException(attrs.getValue("no"),
- attrs.getValue("pre"), attrs.getValue("post"),
- attrs.getValue("no"));
- } catch (final OrthographyException e) {
- this.error(new SAXParseException("", null, e));
- }
- this.currElement = PatternParser.ELEM_HYPHEN;
- }
- this.token.setLength(0);
- }
-
- @Override
- public void endElement(final String uri, final String local,
- final String raw) {
- if (this.token.length() > 0) {
- final String word = this.token.toString();
- switch (this.currElement) {
- case PatternParser.ELEM_CLASSES:
- this.consumer.addClass(word);
- break;
- case PatternParser.ELEM_EXCEPTIONS:
- this.consumer.addException(word, 0);
- break;
- case PatternParser.ELEM_PATTERNS:
- this.consumer.addPattern(word);
- break;
- case PatternParser.ELEM_HYPHEN:
- break;
- }
- if (this.currElement != PatternParser.ELEM_HYPHEN) {
- this.token.setLength(0);
- }
- }
- if (this.currElement == PatternParser.ELEM_HYPHEN) {
- this.currElement = PatternParser.ELEM_EXCEPTIONS;
- } else {
- this.currElement = 0;
- }
-
- }
-
- @Override
- public void characters(final char ch[], final int start, final int length) {
- final StringBuilder chars = new StringBuilder(length);
- chars.append(ch, start, length);
- String word = readToken(chars);
- while (word != null) {
- switch (this.currElement) {
- case PatternParser.ELEM_CLASSES:
- this.consumer.addClass(word);
- break;
- case PatternParser.ELEM_EXCEPTIONS:
- this.consumer.addException(word, 0);
- break;
- case PatternParser.ELEM_PATTERNS:
- this.consumer.addPattern(word);
- break;
- }
- word = readToken(chars);
- }
-
- }
-
- @Override
- public void warning(final SAXParseException ex) {
- this.errMsg = "[Warning] " + getLocationString(ex) + ": "
- + ex.getMessage();
- }
-
- @Override
- public void error(final SAXParseException ex) {
- this.errMsg = "[Error] " + getLocationString(ex) + ": "
- + ex.getMessage();
- }
-
- @Override
- public void fatalError(final SAXParseException ex) throws SAXException {
- this.errMsg = "[Fatal Error] " + getLocationString(ex) + ": "
- + ex.getMessage();
- throw ex;
- }
-
- /**
- * For a given exception, returns a string description of the document
- * location that caused the exception.
- * @param ex The exception whose location is needed.
- * @return The string description of the document location.
- */
- private String getLocationString(final SAXParseException ex) {
- final StringBuilder str = new StringBuilder();
-
- String systemId = ex.getSystemId();
- if (systemId != null) {
- final int index = systemId.lastIndexOf('/');
- if (index != -1) {
- systemId = systemId.substring(index + 1);
- }
- str.append(systemId);
- }
- str.append(':');
- str.append(ex.getLineNumber());
- str.append(':');
- str.append(ex.getColumnNumber());
-
- return str.toString();
-
- }
-
-}
Deleted: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternSerializer.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternSerializer.java 2021-11-18 20:29:32 UTC (rev 12089)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternSerializer.java 2021-11-18 20:39:37 UTC (rev 12090)
@@ -1,240 +0,0 @@
-/*
- * Copyright 2004 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-package org.foray.orthography;
-
-import org.foray.common.ForayConstants;
-import org.foray.common.IoUtil;
-
-import org.axsl.orthography.OrthographyException;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.CommandLineParser;
-import org.apache.commons.cli.DefaultParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.Option;
-import org.apache.commons.cli.Options;
-import org.apache.commons.cli.ParseException;
-import org.apache.commons.io.filefilter.IOFileFilter;
-import org.apache.commons.io.filefilter.RegexFileFilter;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.ObjectOutputStream;
-import java.net.MalformedURLException;
-import java.net.URL;
-
-/**
- * Parse and serialize hyphenation patterns so that they can be packaged for runtime use without the need to parse them
- * then.
- */
-public class PatternSerializer {
-
- /** The directory containing the patterns to be parsed. */
- private File sourceDir;
-
- /** Filter to be applied to files in {@link #sourceDir} to determine which files should be processed. */
- private IOFileFilter fileFilter;
-
- /** The directory in which the serialized output should be placed. */
- private File targetDir;
-
- /** The logger. */
- private Logger logger;
-
- /**
- * Constructor.
- * @param sourceDir The directory containing the files to be processed.
- * @param fileFilter The filter, if any, that should be applied to the files in @code{sourceDir}.
- * This can be null.
- * @param targetDir The directory into which the output files should be written.
- * @throws IOException For invalid @code{sourceDir} or @code{targetDir}.
- */
- public PatternSerializer(final File sourceDir, final IOFileFilter fileFilter, final File targetDir)
- throws IOException {
- if (! sourceDir.exists()
- || ! sourceDir.isDirectory()) {
- throw new IOException("Source directory does not exist or is not a directory: "
- + sourceDir.getAbsolutePath());
- }
- if (targetDir.exists()) {
- if (! targetDir.isDirectory()) {
- throw new IOException("Specified target is not a directory: " + targetDir.getAbsolutePath());
- }
- } else {
- targetDir.mkdirs();
- }
-
- this.sourceDir = sourceDir;
- this.fileFilter = fileFilter;
- this.targetDir = targetDir;
- }
-
- /**
- * Serialize the selected files.
- */
- public void process() {
- final String[] filesToProcess = this.sourceDir.list(this.fileFilter);
- for (int i = 0; i < filesToProcess.length; i++) {
- final String filename = filesToProcess[i];
- final File infile = new File(this.sourceDir, filename);
- final String baseFilename = IoUtil.baseFileName(filesToProcess[i]);
- final File outfile = new File(this.targetDir, baseFilename + "."
- + ForayConstants.BINARY_SERIALIZATION_EXTENSION);
- buildPatternFile(infile, outfile);
- }
- }
-
- /**
- * Parses an input file, creates a {@link PatternTree} instance for it, and then serializes the instance to an
- * output file.
- * @param infile The input file (XML pattern file).
- * @param outfile The output file (serialized POJO).
- */
- private void buildPatternFile(final File infile, final File outfile) {
- getLogger().info("Processing " + infile);
- final PatternTree hTree = new PatternTree();
- try {
- final URL url = infile.toURI().toURL();
- hTree.loadPatterns(url.openStream(), getLogger());
- } catch (final OrthographyException ex) {
- getLogger().error("Can't load patterns from xml file " + infile + " - Maybe hyphenation.dtd is missing?");
- getLogger().error(ex.toString());
- } catch (final MalformedURLException e) {
- getLogger().error(e.getMessage());
- } catch (final IOException e) {
- getLogger().error(e.getMessage());
- }
- /* Serialize the object. */
- try {
- final ObjectOutputStream out =
- new ObjectOutputStream(new FileOutputStream(outfile));
- out.writeObject(hTree);
- out.close();
- } catch (final IOException ioe) {
- getLogger().error("Can't write compiled pattern file: " + outfile);
- getLogger().error(ioe.toString());
- }
- }
-
- /**
- * Returns the logger.
- * @return The logger.
- */
- public Logger getLogger() {
- if (this.logger == null) {
- this.logger = LoggerFactory.getLogger(PatternSerializer.class);
- }
- return this.logger;
- }
-
- /**
- * Returns the command-line options for the {@link #main(String[])} method.
- * @return Command-line options.
- */
- static Options getCommandLineOptions() {
- final Options clOptions = new Options();
-
- /* Input directory. */
- final Option inputDir = new Option("i", "input", true, "path to the input directory");
- inputDir.setRequired(true);
- inputDir.setArgName("input-dir");
- clOptions.addOption(inputDir);
-
- /* Output directory. */
- final Option outputDir = new Option("o", "output", true, "path to the output directory");
- outputDir.setRequired(true);
- outputDir.setArgName("output-dir");
- clOptions.addOption(outputDir);
-
- /* Include pattern. */
- final Option includePattern = new Option("p", "pattern", true,
- "input include pattern (regex)");
- includePattern.setRequired(true);
- includePattern.setArgName("pattern-regex");
- clOptions.addOption(includePattern);
-
- return clOptions;
- }
-
- /**
- * Parses a command-line.
- * @param commandLineOptions The command-line options controlling the parsing.
- * @param args The command-line arguments to be parsed.
- * @return The results of the parsing.
- * @throws ParseException For errors in {@code args}.
- */
- public static CommandLine parseCommandLine(final Options commandLineOptions, final String[] args)
- throws ParseException {
- final CommandLineParser commandLineParser = new DefaultParser();
- final CommandLine parsedCommandLine = commandLineParser.parse(commandLineOptions, args);
-
- return parsedCommandLine;
- }
-
- /**
- * Command-line interface.
- * @param args The command-line arguments.
- */
- public static void main(final String[] args) {
- final Logger logger = LoggerFactory.getLogger(PatternSerializer.class);
- final Options commandLineOptions = PatternSerializer.getCommandLineOptions();
- CommandLine parsedCommandLine = null;
- try {
- parsedCommandLine = PatternSerializer.parseCommandLine(commandLineOptions, args);
- } catch (final ParseException e) {
- logger.error(e.getMessage(), e);
- final HelpFormatter helpFormatter = new HelpFormatter();
- helpFormatter.printHelp("java -cp $FORAY_CLASSPATH " + PatternSerializer.class.getName(),
- commandLineOptions, true);
- /* CheckStyle: Allow System.exit() in main method. */
- System.exit(1);
- }
-
- final String inputDirString = parsedCommandLine.getOptionValue("input");
- final String outputDirString = parsedCommandLine.getOptionValue("output");
- final String includePattern = parsedCommandLine.getOptionValue("pattern");
-
- final File inputDir = new File(inputDirString);
- final File outputDir = new File(outputDirString);
- final RegexFileFilter fileFilter = new RegexFileFilter(includePattern);
-
- try {
- final PatternSerializer serializer = new PatternSerializer(inputDir, fileFilter, outputDir);
- serializer.process();
- } catch (final IOException e) {
- logger.error(e.getMessage(), e);
- /* CheckStyle: Allow System.exit() in main method. */
- System.exit(1);
- }
- }
-
-}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternTree.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternTree.java 2021-11-18 20:29:32 UTC (rev 12089)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternTree.java 2021-11-18 20:39:37 UTC (rev 12090)
@@ -37,6 +37,7 @@
import org.foray.common.data.TernaryTree;
import org.foray.common.primitive.BitUtils;
import org.foray.common.primitive.StringUtils;
+import org.foray.orthography.util.PatternParser;
import org.axsl.orthography.OrthographyException;
@@ -179,7 +180,7 @@
* @param logger A logger for user messages.
* @throws OrthographyException For errors obtaining a parser or errors during the parsing.
*/
- void loadPatterns(final InputStream inputStream, final Logger logger) throws OrthographyException {
+ public void loadPatterns(final InputStream inputStream, final Logger logger) throws OrthographyException {
this.source = PatternTree.Source.PARSED;
final PatternParser pp = new PatternParser(this);
this.tempInterletterValues = new TernaryTree();
Copied: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionarySerializer.java (from rev 12032, trunk/foray/foray-orthography/src/main/java/org/foray/orthography/DictionarySerializer.java)
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionarySerializer.java (rev 0)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionarySerializer.java 2021-11-18 20:39:37 UTC (rev 12090)
@@ -0,0 +1,233 @@
+/*
+ * Copyright 2019 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography.util;
+
+import org.foray.common.ForayConstants;
+import org.foray.common.IoUtil;
+import org.foray.orthography.SegmentDictionary;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.io.filefilter.IOFileFilter;
+import org.apache.commons.io.filefilter.RegexFileFilter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.ObjectOutputStream;
+
+/**
+ * Parse and serialize dictionary objects so that they can be packaged for runtime use without the need to parse them
+ * then.
+ */
+public class DictionarySerializer {
+
+ /** The directory containing the patterns to be parsed. */
+ private File sourceDir;
+
+ /** Filter to be applied to files in {@link #sourceDir} to determine which files should be processed. */
+ private IOFileFilter fileFilter;
+
+ /** The directory in which the serialized output should be placed. */
+ private File targetDir;
+
+ /** The logger. */
+ private Logger logger = LoggerFactory.getLogger(DictionarySerializer.class);;
+
+ /**
+ * Constructor.
+ * @param sourceDir The directory containing the files to be processed.
+ * @param fileFilter The filter, if any, that should be applied to the files in @code{sourceDir}.
+ * This can be null.
+ * @param targetDir The directory into which the output files should be written.
+ * @throws IOException For invalid @code{sourceDir} or @code{targetDir}.
+ */
+ public DictionarySerializer(final File sourceDir, final IOFileFilter fileFilter, final File targetDir)
+ throws IOException {
+ if (! sourceDir.exists()
+ || ! sourceDir.isDirectory()) {
+ throw new IOException("Source directory does not exist or is not a directory: "
+ + sourceDir.getAbsolutePath());
+ }
+ if (targetDir.exists()) {
+ if (! targetDir.isDirectory()) {
+ throw new IOException("Specified target is not a directory: " + targetDir.getAbsolutePath());
+ }
+ } else {
+ targetDir.mkdirs();
+ }
+
+ this.sourceDir = sourceDir;
+ this.fileFilter = fileFilter;
+ this.targetDir = targetDir;
+ }
+
+ /**
+ * Serialize the selected files.
+ */
+ public void process() {
+ final String[] filesToProcess = this.sourceDir.list(this.fileFilter);
+ for (int i = 0; i < filesToProcess.length; i++) {
+ final String filename = filesToProcess[i];
+ final File infile = new File(this.sourceDir, filename);
+ final String baseFilename = IoUtil.baseFileName(filesToProcess[i]);
+ final File outfile = new File(this.targetDir, baseFilename + "."
+ + ForayConstants.BINARY_SERIALIZATION_EXTENSION);
+ buildDictionary(infile, outfile);
+ }
+ }
+
+ /**
+ * Parses an input file, creates a {@link SegmentDictionary} instance for it, and then serializes the instance to an
+ * output file.
+ * @param infile The input file (word list).
+ * @param outfile The output file (serialized POJO).
+ */
+ private void buildDictionary(final File infile, final File outfile) {
+ this.logger.info("Processing " + infile);
+ final DictionaryParser parser = new DictionaryParser();
+ SegmentDictionary dictionary = null;
+
+ /* Parse the dictionary. */
+ try {
+ final InputStream inputStream = infile.toURI().toURL().openStream();
+ dictionary = parser.parse(inputStream, infile.getAbsolutePath());
+ } catch (final IOException e) {
+ this.logger.error(e.getMessage());
+ }
+
+ /* Serialize the dictionary. */
+ ObjectOutputStream out = null;
+ try {
+ out = new ObjectOutputStream(new FileOutputStream(outfile));
+ out.writeObject(dictionary);
+ } catch (final IOException ioe) {
+ this.logger.error("Can't write compiled dictionary file: " + outfile);
+ this.logger.error(ioe.toString());
+ } finally {
+ if (out != null) {
+ try {
+ out.close();
+ } catch (final IOException e) {
+ /* Ignore. */
+ }
+ }
+ }
+ }
+
+ /**
+ * Returns the command-line options for the {@link #main(String[])} method.
+ * @return Command-line options.
+ */
+ static Options getCommandLineOptions() {
+ final Options clOptions = new Options();
+
+ /* Input directory. */
+ final Option inputDir = new Option("i", "input", true, "path to the input directory");
+ inputDir.setRequired(true);
+ inputDir.setArgName("input-dir");
+ clOptions.addOption(inputDir);
+
+ /* Output directory. */
+ final Option outputDir = new Option("o", "output", true, "path to the output directory");
+ outputDir.setRequired(true);
+ outputDir.setArgName("output-dir");
+ clOptions.addOption(outputDir);
+
+ /* Include pattern. */
+ final Option includePattern = new Option("p", "pattern", true,
+ "input include pattern (regex)");
+ includePattern.setRequired(true);
+ includePattern.setArgName("pattern-regex");
+ clOptions.addOption(includePattern);
+
+ return clOptions;
+ }
+
+ /**
+ * Parses a command-line.
+ * @param commandLineOptions The command-line options controlling the parsing.
+ * @param args The command-line arguments to be parsed.
+ * @return The results of the parsing.
+ * @throws ParseException For errors in {@code args}.
+ */
+ public static CommandLine parseCommandLine(final Options commandLineOptions, final String[] args)
+ throws ParseException {
+ final CommandLineParser commandLineParser = new DefaultParser();
+ final CommandLine parsedCommandLine = commandLineParser.parse(commandLineOptions, args);
+
+ return parsedCommandLine;
+ }
+
+ /**
+ * Command-line interface.
+ * @param args The command-line arguments.
+ */
+ public static void main(final String[] args) {
+ final Logger logger = LoggerFactory.getLogger(DictionarySerializer.class);
+ final Options commandLineOptions = DictionarySerializer.getCommandLineOptions();
+ CommandLine parsedCommandLine = null;
+ try {
+ parsedCommandLine = DictionarySerializer.parseCommandLine(commandLineOptions, args);
+ } catch (final ParseException e) {
+ logger.error(e.getMessage(), e);
+ final HelpFormatter helpFormatter = new HelpFormatter();
+ helpFormatter.printHelp("java -cp $FORAY_CLASSPATH " + DictionarySerializer.class.getName(),
+ commandLineOptions, true);
+ /* CheckStyle: Allow System.exit() in main method. */
+ System.exit(1);
+ }
+
+ final String inputDirString = parsedCommandLine.getOptionValue("input");
+ final String outputDirString = parsedCommandLine.getOptionValue("output");
+ final String includePattern = parsedCommandLine.getOptionValue("pattern");
+
+ final File inputDir = new File(inputDirString);
+ final File outputDir = new File(outputDirString);
+ final RegexFileFilter fileFilter = new RegexFileFilter(includePattern);
+
+ try {
+ final DictionarySerializer serializer = new DictionarySerializer(inputDir, fileFilter, outputDir);
+ serializer.process();
+ } catch (final IOException e) {
+ logger.error(e.getMessage(), e);
+ /* CheckStyle: Allow System.exit() in main method. */
+ System.exit(1);
+ }
+ }
+
+}
Copied: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/PatternGenerator.java (from rev 12032, trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternGenerator.java)
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/PatternGenerator.java (rev 0)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/PatternGenerator.java 2021-11-18 20:39:37 UTC (rev 12090)
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2010 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography.util;
+
+import org.foray.orthography.zzarchive.PatGen;
+
+/**
+ * A refactored/rewritten version of {@link PatGen} to be more object-oriented.
+ */
+public class PatternGenerator {
+ /* TODO: After the PatGen class is complete and tested, copy its contents here and begin refactoring. */
+
+}
Copied: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/PatternParser.java (from rev 12084, trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternParser.java)
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/PatternParser.java (rev 0)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/PatternParser.java 2021-11-18 20:39:37 UTC (rev 12090)
@@ -0,0 +1,322 @@
+/*
+ * Copyright 2004 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+/*
+ * Known contributors:
+ * Carlos Villegas <ca...@un...> (Original author)
+ */
+
+package org.foray.orthography.util;
+
+import org.foray.common.ForayConstants;
+import org.foray.common.xml.SaxParser;
+import org.foray.orthography.PatternConsumer;
+
+import org.axsl.orthography.OrthographyException;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.XMLReader;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+/**
+ * A SAX document handler to read and parse hyphenation patterns
+ * from a XML file.
+ */
+public class PatternParser extends SaxParser {
+
+ /** Constant indicating that the current element is "classes". */
+ static final int ELEM_CLASSES = 1;
+
+ /** Constant indicating that the current element is "exceptions". */
+ static final int ELEM_EXCEPTIONS = 2;
+
+ /** Constant indicating that the current element is "patterns".*/
+ static final int ELEM_PATTERNS = 3;
+
+ /** Constant indicating that the current element is "hyphen". */
+ static final int ELEM_HYPHEN = 4;
+
+ /** The SAX parser. */
+ private XMLReader parser;
+
+ /** The current element being parsed. */
+ private int currElement;
+
+ /** The PatternConsumer implementation. */
+ private PatternConsumer consumer;
+
+ /** The current token being parsed. */
+ private StringBuilder token;
+
+ /** The last error message generated. Used to enhance the messages that
+ * are returned with exceptions. */
+ private String errMsg;
+
+ /**
+ * Constructor.
+ * @param consumer The pattern consumer implementation that will accept the data created by this parser.
+ * @throws OrthographyException For errors during construction.
+ */
+ public PatternParser(final PatternConsumer consumer) throws OrthographyException {
+ if (consumer == null) {
+ throw new NullPointerException("PatternConsumer may not be null.");
+ }
+ this.token = new StringBuilder();
+ try {
+ this.parser = createSax2Parser();
+ } catch (final SAXException | ParserConfigurationException e) {
+ throw new OrthographyException(e);
+ }
+ this.parser.setContentHandler(this);
+ this.parser.setErrorHandler(this);
+ this.consumer = consumer;
+ }
+
+ /**
+ * Parses a given URL, using the PatternConsumer to consume the data retrieved during the parse.
+ * @param url The URL to parse.
+ * @throws OrthographyException For errors during parsing.
+ */
+ public void parse(final URL url) throws OrthographyException {
+ InputStream inputStream = null;
+ try {
+ inputStream = url.openStream();
+ } catch (final IOException e) {
+ /* This is a normal condition, and just means that the URL to the
+ * pattern file does not exist. */
+ getLogger().error("Cannot open hyphenation pattern: "
+ + url.toString() + ForayConstants.LOG_NEWLINE
+ + e.getMessage());
+ return;
+ }
+ parse(inputStream);
+ }
+
+ /**
+ * Parses a given input stream, using the PatternConsumer to consume the data retrieved during the parse.
+ * @param inputStream The input stream to parse.
+ * @throws OrthographyException For errors during parsing.
+ */
+ public void parse(final InputStream inputStream) throws OrthographyException {
+ final InputSource uri = new InputSource(inputStream);
+
+ try {
+ this.parser.parse(uri);
+ } catch (final SAXException e) {
+ throw new OrthographyException(this.errMsg);
+ } catch (final IOException e) {
+ throw new OrthographyException(e.getMessage());
+ } catch (final NullPointerException e) {
+ throw new OrthographyException("SAX parser not available");
+ }
+ }
+
+ /**
+ * Returns the next token from the buffer.
+ * @param chars The buffer which is being parsed.
+ * @return The next token.
+ */
+ protected String readToken(final StringBuilder chars) {
+ String word;
+ boolean space = false;
+ int i;
+ for (i = 0; i < chars.length(); i++) {
+ if (Character.isWhitespace(chars.charAt(i))) {
+ space = true;
+ } else {
+ break;
+ }
+ }
+ if (space) {
+ for (int countr = i; countr < chars.length(); countr++) {
+ chars.setCharAt(countr - i, chars.charAt(countr));
+ }
+ chars.setLength(chars.length() - i);
+ if (this.token.length() > 0) {
+ word = this.token.toString();
+ this.token.setLength(0);
+ return word;
+ }
+ }
+ space = false;
+ for (i = 0; i < chars.length(); i++) {
+ if (Character.isWhitespace(chars.charAt(i))) {
+ space = true;
+ break;
+ }
+ }
+ this.token.append(chars.toString().substring(0, i));
+ for (int countr = i; countr < chars.length(); countr++) {
+ chars.setCharAt(countr - i, chars.charAt(countr));
+ }
+ chars.setLength(chars.length() - i);
+ if (space) {
+ word = this.token.toString();
+ this.token.setLength(0);
+ return word;
+ }
+ this.token.append(chars);
+ return null;
+ }
+
+ @Override
+ public void startElement(final String uri, final String local,
+ final String raw, final Attributes attrs) {
+ if (local.equals("hyphen-char")) {
+ final String h = attrs.getValue("value");
+ if (h != null && h.length() == 1) {
+ this.consumer.setHyphenChar(h.charAt(0));
+ }
+ } else if (local.equals("classes")) {
+ this.currElement = PatternParser.ELEM_CLASSES;
+ } else if (local.equals("patterns")) {
+ this.currElement = PatternParser.ELEM_PATTERNS;
+ } else if (local.equals("exceptions")) {
+ this.currElement = PatternParser.ELEM_EXCEPTIONS;
+ } else if (local.equals("hyphen")) {
+ if (this.token.length() > 0) {
+ this.consumer.addException(this.token.toString(), 0);
+ }
+ try {
+ this.consumer.addMorphException(attrs.getValue("no"),
+ attrs.getValue("pre"), attrs.getValue("post"),
+ attrs.getValue("no"));
+ } catch (final OrthographyException e) {
+ this.error(new SAXParseException("", null, e));
+ }
+ this.currElement = PatternParser.ELEM_HYPHEN;
+ }
+ this.token.setLength(0);
+ }
+
+ @Override
+ public void endElement(final String uri, final String local,
+ final String raw) {
+ if (this.token.length() > 0) {
+ final String word = this.token.toString();
+ switch (this.currElement) {
+ case PatternParser.ELEM_CLASSES:
+ this.consumer.addClass(word);
+ break;
+ case PatternParser.ELEM_EXCEPTIONS:
+ this.consumer.addException(word, 0);
+ break;
+ case PatternParser.ELEM_PATTERNS:
+ this.consumer.addPattern(word);
+ break;
+ case PatternParser.ELEM_HYPHEN:
+ break;
+ }
+ if (this.currElement != PatternParser.ELEM_HYPHEN) {
+ this.token.setLength(0);
+ }
+ }
+ if (this.currElement == PatternParser.ELEM_HYPHEN) {
+ this.currElement = PatternParser.ELEM_EXCEPTIONS;
+ } else {
+ this.currElement = 0;
+ }
+
+ }
+
+ @Override
+ public void characters(final char ch[], final int start, final int length) {
+ final StringBuilder chars = new StringBuilder(length);
+ chars.append(ch, start, length);
+ String word = readToken(chars);
+ while (word != null) {
+ switch (this.currElement) {
+ case PatternParser.ELEM_CLASSES:
+ this.consumer.addClass(word);
+ break;
+ case PatternParser.ELEM_EXCEPTIONS:
+ this.consumer.addException(word, 0);
+ break;
+ case PatternParser.ELEM_PATTERNS:
+ this.consumer.addPattern(word);
+ break;
+ }
+ word = readToken(chars);
+ }
+
+ }
+
+ @Override
+ public void warning(final SAXParseException ex) {
+ this.errMsg = "[Warning] " + getLocationString(ex) + ": "
+ + ex.getMessage();
+ }
+
+ @Override
+ public void error(final SAXParseException ex) {
+ this.errMsg = "[Error] " + getLocationString(ex) + ": "
+ + ex.getMessage();
+ }
+
+ @Override
+ public void fatalError(final SAXParseException ex) throws SAXException {
+ this.errMsg = "[Fatal Error] " + getLocationString(ex) + ": "
+ + ex.getMessage();
+ throw ex;
+ }
+
+ /**
+ * For a given exception, returns a string description of the document
+ * location that caused the exception.
+ * @param ex The exception whose location is needed.
+ * @return The string description of the document location.
+ */
+ private String getLocationString(final SAXParseException ex) {
+ final StringBuilder str = new StringBuilder();
+
+ String systemId = ex.getSystemId();
+ if (systemId != null) {
+ final int index = systemId.lastIndexOf('/');
+ if (index != -1) {
+ systemId = systemId.substring(index + 1);
+ }
+ str.append(systemId);
+ }
+ str.append(':');
+ str.append(ex.getLineNumber());
+ str.append(':');
+ str.append(ex.getColumnNumber());
+
+ return str.toString();
+
+ }
+
+}
Copied: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/PatternSerializer.java (from rev 12033, trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternSerializer.java)
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/PatternSerializer.java (rev 0)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/PatternSerializer.java 2021-11-18 20:39:37 UTC (rev 12090)
@@ -0,0 +1,241 @@
+/*
+ * Copyright 2004 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography.util;
+
+import org.foray.common.ForayConstants;
+import org.foray.common.IoUtil;
+import org.foray.orthography.PatternTree;
+
+import org.axsl.orthography.OrthographyException;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+impo...
[truncated message content] |
|
From: <vic...@us...> - 2021-11-18 20:29:34
|
Revision: 12089
http://sourceforge.net/p/foray/code/12089
Author: victormote
Date: 2021-11-18 20:29:32 +0000 (Thu, 18 Nov 2021)
Log Message:
-----------
Improvements to lexers.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-18 19:57:19 UTC (rev 12088)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-18 20:29:32 UTC (rev 12089)
@@ -200,10 +200,11 @@
}
/* Resolve attached leading punctuation. */
- for (int breakIndex = 1; breakIndex < breakTypes.length; breakIndex ++) {
+ for (int breakIndex = 0; breakIndex < breakTypes.length; breakIndex ++) {
final CharType currentBreakType = breakTypes[breakIndex];
if (currentBreakType == CharType.ATTACHED_LEADING_PUNCTUATION) {
- final CharType previousBreakType = breakTypes[breakIndex - 1];
+ final CharType previousBreakType = breakIndex < 1 ?
+ CharType.ATTACHED_LEADING_PUNCTUATION : breakTypes[breakIndex - 1];
final CharType nextBreakType = breakTypes[breakIndex + 1];
if (previousBreakType == CharType.BREAK_CHAR) {
if (nextBreakType == CharType.BREAK_CHAR) {
@@ -213,6 +214,8 @@
/* Combine it with the previous whitespace. */
breakTypes[breakIndex] = CharType.BREAK_CHAR;
}
+ } else {
+ breakTypes[breakIndex] = CharType.BREAK_CHAR;
}
}
}
@@ -234,6 +237,8 @@
/* Combine it with the previous whitespace. */
breakTypes[breakIndex] = CharType.BREAK_CHAR;
}
+ } else {
+ breakTypes[breakIndex] = CharType.BREAK_CHAR;
}
}
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2021-11-18 19:57:19 UTC (rev 12088)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2021-11-18 20:29:32 UTC (rev 12089)
@@ -233,10 +233,10 @@
}
/**
- * Test.
+ * Test of a number within a word.
*/
@Test
- public void testCorner001() {
+ public void testWordWithNumber() {
final String testString = "Appendix D.4)";
final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
Assert.assertEquals(4, actual.size());
@@ -247,7 +247,7 @@
}
/**
- * Test.
+ * Test of double trailing punctuation at the end of the text.
*/
@Test
public void testDoubleTrailingPunctuationAtEnd() {
@@ -260,4 +260,21 @@
Assert.assertEquals(".”", actual.get(3));
}
+ /**
+ * Test of leading punctuation at the beginning of the text.
+ */
+ @Test
+ public void testLeadingPunctuationAtStart() {
+ final String testString = "“Go ye into";
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
+ Assert.assertEquals(7, actual.size());
+ Assert.assertEquals("", actual.get(0));
+ Assert.assertEquals("“", actual.get(1));
+ Assert.assertEquals("Go", actual.get(2));
+ Assert.assertEquals(" ", actual.get(3));
+ Assert.assertEquals("ye", actual.get(4));
+ Assert.assertEquals(" ", actual.get(5));
+ Assert.assertEquals("into", actual.get(6));
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-18 19:57:21
|
Revision: 12088
http://sourceforge.net/p/foray/code/12088
Author: victormote
Date: 2021-11-18 19:57:19 +0000 (Thu, 18 Nov 2021)
Log Message:
-----------
Fix double attached trailing punctuation issue.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-18 19:50:20 UTC (rev 12087)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-18 19:57:19 UTC (rev 12088)
@@ -189,8 +189,7 @@
final CharType previousBreakType = breakTypes[breakIndex - 1];
final CharType nextBreakType = breakTypes[breakIndex + 1];
if (previousBreakType == CharType.WORD_CHAR) {
- if (nextBreakType == CharType.WORD_CHAR
- || nextBreakType == CharType.ATTACHED_TRAILING_PUNCTUATION) {
+ if (nextBreakType == CharType.WORD_CHAR) {
/* This also is part of the word. */
breakTypes[breakIndex] = CharType.WORD_CHAR;
} else {
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2021-11-18 19:50:20 UTC (rev 12087)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2021-11-18 19:57:19 UTC (rev 12088)
@@ -29,7 +29,6 @@
package org.foray.orthography;
import org.junit.Assert;
-import org.junit.Ignore;
import org.junit.Test;
import java.util.List;
@@ -251,7 +250,6 @@
* Test.
*/
@Test
- @Ignore
public void testDoubleTrailingPunctuationAtEnd() {
final String testString = "every creature.”";
final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-18 19:50:23
|
Revision: 12087
http://sourceforge.net/p/foray/code/12087
Author: victormote
Date: 2021-11-18 19:50:20 +0000 (Thu, 18 Nov 2021)
Log Message:
-----------
Improvements to numbers-as-words detection in Lexer.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java
trunk/foray/foray-common/src/main/java/org/foray/common/primitive/NumberUtils.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java 2021-11-18 15:56:31 UTC (rev 12086)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java 2021-11-18 19:50:20 UTC (rev 12087)
@@ -93,6 +93,27 @@
General_Punctuation_Block.EM_DASH,
});
+ /** The set of characters that are Arabic digits, i.e. "0" through "9". */
+ private static final String ARABIC_DIGIT = new String(new char[] {
+ Basic_Latin_Block.DIGIT_ZERO,
+ Basic_Latin_Block.DIGIT_ONE,
+ Basic_Latin_Block.DIGIT_TWO,
+ Basic_Latin_Block.DIGIT_THREE,
+ Basic_Latin_Block.DIGIT_FOUR,
+ Basic_Latin_Block.DIGIT_FIVE,
+ Basic_Latin_Block.DIGIT_SIX,
+ Basic_Latin_Block.DIGIT_SEVEN,
+ Basic_Latin_Block.DIGIT_EIGHT,
+ Basic_Latin_Block.DIGIT_NINE,
+ });
+
+ /** The set of characters that could be included with {@link #ARABIC_DIGIT} codepoints to form an Arabic Numeral. */
+ private static final String ARABIC_NUMERAL_ADDITIONAL = new String(new char[] {
+ Basic_Latin_Block.FULL_STOP,
+ Basic_Latin_Block.COMMA,
+ Basic_Latin_Block.HYPHEN_MINUS,
+ });
+
/**
* Private constructor. This is a utility class, and should never be instantiated.
*/
@@ -310,4 +331,32 @@
return isBreakableSpace(c) || isNonBreakableSpace(c);
}
+ /**
+ * Indicates whether a given codepoint is an Arabic digit, i.e. "0" through "9".
+ * @param c The codepoint to be tested.
+ * @return True if and only if {@code c} is an Arabic digit.
+ */
+ public static boolean isArabicDigit(final int c) {
+ return ARABIC_DIGIT.indexOf(c) > -1;
+ }
+
+ /**
+ * Indicates whether a given codepoint, while not being an Arabic digit, is valid as part of an Arabic numeral.
+ * @param c The codepoint to be tested.
+ * @return True if and only if {@code c} is not an Arabic digit, but is valid as part of an Arabic numeral.
+ */
+ public static boolean isArabicNumeralAdditional(final int c) {
+ return ARABIC_NUMERAL_ADDITIONAL.indexOf(c) > -1;
+ }
+
+ /**
+ * Indicates whether a given codepoint is an Arabic numeral character.
+ * @param c The codepoint to be tested.
+ * @return True if and only if {@code c} is an Arabic numeral character.
+ */
+ public static boolean isArabicNumeral(final int c) {
+ return ARABIC_DIGIT.indexOf(c) > -1
+ || ARABIC_NUMERAL_ADDITIONAL.indexOf(c) > -1;
+ }
+
}
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/primitive/NumberUtils.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/primitive/NumberUtils.java 2021-11-18 15:56:31 UTC (rev 12086)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/primitive/NumberUtils.java 2021-11-18 19:50:20 UTC (rev 12087)
@@ -48,4 +48,28 @@
return (input & 1) != 0;
}
+ /**
+ * Indicates whether a given sequence of characters could be parsed as an Arabic number, for example "1234" or
+ * "3.14159" or "-28".
+ * @param sequence The sequence containing the codepoints to be tested.
+ * @param start The first index to be tested.
+ * @param end The index after the last index to be tested.
+ * @return True if and only if the characters tested are all Arabic number characters.
+ */
+ public static boolean isArabicNumber(final CharSequence sequence, final int start, final int end) {
+ int digitCount = 0;
+ for (int index = start; index < end; index ++) {
+ final char c = sequence.charAt(index);
+ if (! CharacterUtils.isArabicNumeral(c)) {
+ return false;
+ }
+ if (CharacterUtils.isArabicDigit(c)) {
+ digitCount ++;
+ }
+ }
+
+ /* In addition to being all valid characters, at least one of them must be an Arabic digit. */
+ return digitCount > 0;
+ }
+
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-18 15:56:31 UTC (rev 12086)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-18 19:50:20 UTC (rev 12087)
@@ -30,6 +30,7 @@
import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.primitive.CharacterUtils;
+import org.foray.common.primitive.NumberUtils;
import org.foray.common.primitive.StringUtils;
import org.axsl.common.sequence.IntSequence;
@@ -155,6 +156,15 @@
breakTypes[breakIndex] = CharType.END;
} else {
final int sequenceIndex = rawBreaks.intAt(breakIndex);
+ final int end = rawBreaks.intAt(breakIndex + 1);
+
+ /* Special cases where the first char alone does not tell the whole story. */
+ if (NumberUtils.isArabicNumber(sequence, sequenceIndex, end)) {
+ breakTypes[breakIndex] = CharType.WORD_CHAR;
+ continue;
+ }
+
+ /* Interpret the sequence from the first char only. */
final int testChar = sequence.charAt(sequenceIndex);
breakTypes[breakIndex] = computeCharType(testChar);
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-18 15:56:33
|
Revision: 12086
http://sourceforge.net/p/foray/code/12086
Author: victormote
Date: 2021-11-18 15:56:31 +0000 (Thu, 18 Nov 2021)
Log Message:
-----------
Refactor Lexer tests to cover both the Java and ICU4J Lexers.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
Added Paths:
-----------
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java
Added: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java (rev 0)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java 2021-11-18 15:56:31 UTC (rev 12086)
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2019 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography;
+
+import org.foray.common.i18n.WritingSystem4a;
+
+import org.junit.Before;
+
+/**
+ * Tests of {@link LexerLatin1}.
+ */
+public class LexerEnglishIcu4jTests extends LexerEnglishTests {
+
+ /** The object under test. */
+ private LexerIcu4jBreakIterator out;
+
+ /**
+ * Setup the test.
+ */
+ @Before
+ public void setupTest() {
+ this.out = new LexerIcu4jBreakIterator(WritingSystem4a.USA);
+ }
+
+ @Override
+ public LexerIcu4jBreakIterator getObjectUnderTest() {
+ return this.out;
+ }
+
+}
Property changes on: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Added: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java (rev 0)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java 2021-11-18 15:56:31 UTC (rev 12086)
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2019 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography;
+
+import org.foray.common.i18n.WritingSystem4a;
+
+import org.junit.Before;
+
+/**
+ * Tests of {@link LexerLatin1}.
+ */
+public class LexerEnglishJavaTests extends LexerEnglishTests {
+
+ /** The object under test. */
+ private LexerJavaBreakIterator out;
+
+ /**
+ * Setup the test.
+ */
+ @Before
+ public void setupTest() {
+ this.out = new LexerJavaBreakIterator(WritingSystem4a.USA);
+ }
+
+ @Override
+ public LexerJavaBreakIterator getObjectUnderTest() {
+ return this.out;
+ }
+
+}
Property changes on: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2021-11-18 15:17:09 UTC (rev 12085)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2021-11-18 15:56:31 UTC (rev 12086)
@@ -28,10 +28,8 @@
package org.foray.orthography;
-import org.foray.common.i18n.WritingSystem4a;
-
import org.junit.Assert;
-import org.junit.Before;
+import org.junit.Ignore;
import org.junit.Test;
import java.util.List;
@@ -39,19 +37,13 @@
/**
* Tests of {@link LexerLatin1}.
*/
-public class LexerEnglishTests {
+public abstract class LexerEnglishTests {
- /** The object under test. */
- private LexerJavaBreakIterator out;
- /* TODO: This is temporarily referencing the wrong type. */
-
/**
- * Setup the test.
+ * Returns the Lexer object that is being tested.
+ * @return The Lexer object.
*/
- @Before
- public void setupTest() {
- this.out = new LexerJavaBreakIterator(WritingSystem4a.USA);
- }
+ public abstract Lexer4a getObjectUnderTest();
/**
* A simple test of {@link LexerLatin1#breakIntoWords(CharSequence)}.
@@ -59,7 +51,7 @@
@Test
public void testBreakSimple() {
final String testString = "Beware the ides of March.";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
Assert.assertEquals(10, actual.size());
Assert.assertEquals("Beware", actual.get(0));
Assert.assertEquals(" ", actual.get(1));
@@ -79,7 +71,7 @@
@Test
public void testMedium() {
final String testString = "39. It was the best of times. It was the worst of times. <----";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
Assert.assertEquals(27, actual.size());
Assert.assertEquals("39", actual.get(0));
Assert.assertEquals(". ", actual.get(1));
@@ -117,7 +109,7 @@
public void testWithCompoundWord() {
/* Spoken by Juliet, Romeo & Juliet, Act 3 Scene 2. */
final String testString = "Gallop apace, you fiery-footed steeds,";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
/* Compound word "fiery-footed" treated as one word. */
Assert.assertEquals(10, actual.size());
@@ -140,7 +132,7 @@
public void testWithMidWordContractionApostrophe() {
/* Spoken by Hamlet, Hamlet, Act 2, Scene 2. */
final String testString = "The play's the thing";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
Assert.assertEquals(7, actual.size());
Assert.assertEquals("The", actual.get(0));
Assert.assertEquals(" ", actual.get(1));
@@ -158,7 +150,7 @@
@Test
public void testWithSymbolsAsWords() {
final String testString = "! @ # $ %";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
Assert.assertEquals(9, actual.size());
Assert.assertEquals("!", actual.get(0));
Assert.assertEquals(" ", actual.get(1));
@@ -179,7 +171,7 @@
@Test
public void testWithAttachedPunctuation() {
final String testString = "Parentheses (as I stated earlier) are a matching pair of ( and ) characters.";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
Assert.assertEquals(28, actual.size());
Assert.assertEquals("Parentheses", actual.get(0));
Assert.assertEquals(" (", actual.get(1));
@@ -219,7 +211,7 @@
@Test
public void testUnicodeWordBoundariesExample() {
final String testString = "The quick (“brown”) fox can’t jump 32.3 feet, right?";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
Assert.assertEquals(18, actual.size());
Assert.assertEquals("The", actual.get(0));
Assert.assertEquals(" ", actual.get(1));
@@ -247,7 +239,7 @@
@Test
public void testCorner001() {
final String testString = "Appendix D.4)";
- final List<CharSequence> actual = this.out.tokenize(testString);
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
Assert.assertEquals(4, actual.size());
Assert.assertEquals("Appendix", actual.get(0));
Assert.assertEquals(" ", actual.get(1));
@@ -255,4 +247,19 @@
Assert.assertEquals(")", actual.get(3));
}
+ /**
+ * Test.
+ */
+ @Test
+ @Ignore
+ public void testDoubleTrailingPunctuationAtEnd() {
+ final String testString = "every creature.”";
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
+ Assert.assertEquals(4, actual.size());
+ Assert.assertEquals("every", actual.get(0));
+ Assert.assertEquals(" ", actual.get(1));
+ Assert.assertEquals("creature", actual.get(2));
+ Assert.assertEquals(".”", actual.get(3));
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-18 15:17:11
|
Revision: 12085
http://sourceforge.net/p/foray/code/12085
Author: victormote
Date: 2021-11-18 15:17:09 +0000 (Thu, 18 Nov 2021)
Log Message:
-----------
Convert remaining SAX parsers to subclass SaxParser.
Modified Paths:
--------------
trunk/foray/foray-core/src/main/java/org/foray/core/ConfigurationParser.java
trunk/foray/foray-font/src/main/java/org/foray/font/config/ConfigParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParserXml.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ValidateChars.java
trunk/foray/foray-xml/src/main/java/org/foray/xml/ForayPretty.java
Modified: trunk/foray/foray-core/src/main/java/org/foray/core/ConfigurationParser.java
===================================================================
--- trunk/foray/foray-core/src/main/java/org/foray/core/ConfigurationParser.java 2021-11-18 15:02:16 UTC (rev 12084)
+++ trunk/foray/foray-core/src/main/java/org/foray/core/ConfigurationParser.java 2021-11-18 15:17:09 UTC (rev 12085)
@@ -30,6 +30,7 @@
import org.foray.common.Configuration;
import org.foray.common.ConfigurationException;
+import org.foray.common.xml.SaxParser;
import org.xml.sax.Attributes;
import org.xml.sax.EntityResolver;
@@ -38,7 +39,6 @@
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
-import org.xml.sax.helpers.DefaultHandler;
import java.io.IOException;
import java.io.InputStream;
@@ -51,7 +51,7 @@
* in Configuration.
* Normally this class doesn't need to be accessed directly.
*/
-public class ConfigurationParser extends DefaultHandler {
+public class ConfigurationParser extends SaxParser {
/** Public ID of the FOray configuration DTD. */
public static final String FORAY_CONFIG_DTD_PUBLIC_ID = "-//FOray//DTD FOray Configuration V0.1//EN";
Modified: trunk/foray/foray-font/src/main/java/org/foray/font/config/ConfigParser.java
===================================================================
--- trunk/foray/foray-font/src/main/java/org/foray/font/config/ConfigParser.java 2021-11-18 15:02:16 UTC (rev 12084)
+++ trunk/foray/foray-font/src/main/java/org/foray/font/config/ConfigParser.java 2021-11-18 15:17:09 UTC (rev 12085)
@@ -30,6 +30,7 @@
import org.foray.common.AxslDtdUtil;
import org.foray.common.url.UrlFactory;
+import org.foray.common.xml.SaxParser;
import org.foray.font.FontServer4a;
import org.axsl.font.Font;
@@ -38,7 +39,6 @@
import org.axsl.ps.PsEncoding;
import org.axsl.ps.PsException;
-import org.slf4j.Logger;
import org.xml.sax.Attributes;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
@@ -47,7 +47,6 @@
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
-import org.xml.sax.helpers.DefaultHandler;
import java.io.File;
import java.io.IOException;
@@ -69,7 +68,7 @@
* SAX2 Handler which retrieves the font configuration information and stores it in a {@link FontServer4a} instance.
* Normally this class doesn't need to be accessed directly.
*/
-public class ConfigParser extends DefaultHandler {
+public class ConfigParser extends SaxParser {
/** Stateful variable tracking which RegisteredFontFamily is currently
* being parsed. */
@@ -892,14 +891,6 @@
}
/**
- * Return the logger.
- * @return The logger.
- */
- private Logger getLogger() {
- return this.fontServer.getLogger();
- }
-
- /**
* Provides a formatted string showing the current locator context, which
* is useful in user messages to indicate where in the document a condition
* arose.
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParserXml.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParserXml.java 2021-11-18 15:02:16 UTC (rev 12084)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParserXml.java 2021-11-18 15:17:09 UTC (rev 12085)
@@ -31,6 +31,7 @@
import org.foray.common.AxslDtdUtil;
import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.primitive.StringUtils;
+import org.foray.common.xml.SaxParser;
import org.foray.orthography.AmbiguousWord;
import org.foray.orthography.PosUtils;
import org.foray.orthography.SegmentDictionary;
@@ -53,7 +54,6 @@
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
-import org.xml.sax.ext.DefaultHandler2;
import java.io.IOException;
import java.util.ArrayList;
@@ -71,7 +71,7 @@
* Parses an axsl-dictionary XML document into a SegmentDictionary.
* @see DictionaryParser for a parser for similar data in simple text format.
*/
-public class DictionaryParserXml extends DefaultHandler2 {
+public class DictionaryParserXml extends SaxParser {
private class DictionaryElement {
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java 2021-11-18 15:02:16 UTC (rev 12084)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java 2021-11-18 15:17:09 UTC (rev 12085)
@@ -62,7 +62,6 @@
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
-import org.xml.sax.helpers.DefaultHandler;
import java.io.IOException;
import java.net.MalformedURLException;
@@ -511,9 +510,9 @@
/**
* Called by {@link #endElement(String, String, String)} so that we can make sure we get housekeeping done after
* this method has run.
- * @param uri See {@link DefaultHandler#endElement(String, String, String)}.
- * @param localName See {@link DefaultHandler#endElement(String, String, String)}.
- * @param qName See {@link DefaultHandler#endElement(String, String, String)}.
+ * @param uri See {@link #endElement(String, String, String)}.
+ * @param localName See {@link #endElement(String, String, String)}.
+ * @param qName See {@link #endElement(String, String, String)}.
*/
private void endElementInside(final String uri, final String localName, final String qName) {
switch(localName) {
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2021-11-18 15:02:16 UTC (rev 12084)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2021-11-18 15:17:09 UTC (rev 12085)
@@ -35,6 +35,7 @@
import org.foray.common.primitive.ObjectUtils;
import org.foray.common.primitive.StringUtils;
import org.foray.common.primitive.XmlUtils;
+import org.foray.common.xml.SaxParser;
import org.foray.orthography.Orthography4a;
import org.foray.orthography.OrthographyServer4a;
import org.foray.orthography.OrthographyServerConfig;
@@ -60,7 +61,6 @@
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
-import org.xml.sax.ext.DefaultHandler2;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
@@ -82,7 +82,7 @@
* The specified natural language can change at any time, and must be tracked
* General plan is to capture all of the text data in one CharSequence, then parse and compare to dictionary entries.
*/
-public class SpellChecker extends DefaultHandler2 {
+public class SpellChecker extends SaxParser {
private class Element {
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ValidateChars.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ValidateChars.java 2021-11-18 15:02:16 UTC (rev 12084)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ValidateChars.java 2021-11-18 15:17:09 UTC (rev 12085)
@@ -30,6 +30,7 @@
import org.foray.common.i18n.Language4a;
import org.foray.common.url.UrlFactory;
+import org.foray.common.xml.SaxParser;
import org.foray.orthography.OrthographyServer4a;
import org.foray.orthography.OrthographyServerConfig;
@@ -48,7 +49,6 @@
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
-import org.xml.sax.ext.DefaultHandler2;
import java.io.BufferedInputStream;
import java.io.File;
@@ -69,7 +69,7 @@
* misspelled or that are not encoded properly, so that they can be fixed in
* preparation for creating a word list.
*/
-public class ValidateChars extends DefaultHandler2 {
+public class ValidateChars extends SaxParser {
/** Command-line return status constant indicating that the number of
* arguments is wrong. */
@@ -115,9 +115,6 @@
/** The server used to find natural language resources. */
private OrthographyServer4a server;
- /** The logger (lazily created). Use {#getLogger()} to get the instance. */
- private Logger logger;
-
/**
* Constructor.
* @param server The server used to find natural language resources.
@@ -297,17 +294,6 @@
}
/**
- * Returns the logger.
- * @return The logger.
- */
- public Logger getLogger() {
- if (this.logger == null) {
- this.logger = LoggerFactory.getLogger(ValidateChars.class);
- }
- return this.logger;
- }
-
- /**
* Convenience method to log an error.
* @param message The message to be logged.
*/
Modified: trunk/foray/foray-xml/src/main/java/org/foray/xml/ForayPretty.java
===================================================================
--- trunk/foray/foray-xml/src/main/java/org/foray/xml/ForayPretty.java 2021-11-18 15:02:16 UTC (rev 12084)
+++ trunk/foray/foray-xml/src/main/java/org/foray/xml/ForayPretty.java 2021-11-18 15:17:09 UTC (rev 12085)
@@ -29,6 +29,7 @@
package org.foray.xml;
import org.foray.common.primitive.XmlUtils;
+import org.foray.common.xml.SaxParser;
import org.foray.xml.dtd.Dtd;
import org.foray.xml.dtd.DtdElement;
import org.foray.xml.dtd.ElementStack;
@@ -50,7 +51,6 @@
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
-import org.xml.sax.ext.DefaultHandler2;
import org.xml.sax.helpers.AttributesImpl;
import java.io.BufferedInputStream;
@@ -71,7 +71,7 @@
* Command-line application that reads an XML file and writes a pretty-printed
* version of it.
*/
-public class ForayPretty extends DefaultHandler2 {
+public class ForayPretty extends SaxParser {
/** Command-line status constant indicating that the command line itself was not properly formed. */
public static final byte STATUS_COMMAND_LINE_ERROR = 1;
@@ -115,9 +115,6 @@
/** A queue of unprocessed elements. */
private Queue queue = new Queue();
- /** The logger (lazily created). Use {#getLogger()} to get the instance. */
- private Logger logger;
-
/* Begin state variables. */
/** State variable tracking whether we are currently inside the Dtd or
@@ -712,17 +709,6 @@
}
/**
- * Returns the logger.
- * @return The logger.
- */
- public Logger getLogger() {
- if (this.logger == null) {
- this.logger = LoggerFactory.getLogger(ForayPretty.class);
- }
- return this.logger;
- }
-
- /**
* Convenience method to log an error.
* @param message The message to be logged.
*/
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-18 15:02:19
|
Revision: 12084
http://sourceforge.net/p/foray/code/12084
Author: victormote
Date: 2021-11-18 15:02:16 +0000 (Thu, 18 Nov 2021)
Log Message:
-----------
1. Move more reusable code to SaxParser. 2. Have SaxParser manage the logger.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/xml/SaxParser.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoTreeBuilder.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoTreeServer4a.java
trunk/foray/foray-fotree/src/test/java/org/foray/fotree/AbstractPropertyTest.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/xml/SaxParser.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/xml/SaxParser.java 2021-11-18 14:35:17 UTC (rev 12083)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/xml/SaxParser.java 2021-11-18 15:02:16 UTC (rev 12084)
@@ -28,10 +28,15 @@
package org.foray.common.xml;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.ext.DefaultHandler2;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
@@ -40,6 +45,9 @@
*/
public abstract class SaxParser extends DefaultHandler2 {
+ /** The logger. */
+ private Logger logger = LoggerFactory.getLogger(this.getClass());
+
/**
* Returns the name of the SAX Parser class that is found in the classpath.
* @return The name of the SAX Parser class that is found in the classpath.
@@ -56,6 +64,14 @@
}
/**
+ * Returns the logger for this parser.
+ * @return The logger;
+ */
+ public final Logger getLogger() {
+ return this.logger;
+ }
+
+ /**
* Creates a SAX2 parser with a standard configuration. Specifically,
* the parser is namespace-aware and has the "namespace-prefixes" feature
* set to true.
@@ -72,4 +88,53 @@
return xmlReader;
}
+ /**
+ * Instantiates an instance of a specified class using reflection, and ensures that it is a subtype of a given type.
+ * @param className The name of the class that should be instantiated.
+ * @param expectedType The expected superclass for {@code className}.
+ * @param <T> The type of the superclass object that is being instantiated.
+ * @param parameterTypes The array of parameter types.
+ * @param parameters The array of parameters.
+ * @return The new instance of {@code className}, or null if it could not be created.
+ * @throws SAXException Wraps a number of exceptions that can be thrown during instantiation by reflection.
+ */
+ protected <T extends Object> T instantiate(final String className, final Class<T> expectedType,
+ final Class<?>[] parameterTypes, final Object[] parameters) throws SAXException {
+ Class<?> theClass = null;
+ try {
+ theClass = Class.forName(className);
+ } catch (final ClassNotFoundException e) {
+ throw new SAXException(e);
+ }
+ if (! expectedType.isAssignableFrom(theClass)) {
+ getLogger().warn("Class \"{}\" is not a {} class.", className, expectedType.getName());
+ return null;
+ }
+
+ @SuppressWarnings("unchecked")
+ final Class<T> factoryClass = (Class<T>) theClass;
+ Constructor<T> constructor = null;
+ try {
+ constructor = factoryClass.getConstructor(parameterTypes);
+ } catch (final SecurityException e) {
+ throw new SAXException(e);
+ } catch (final NoSuchMethodException e) {
+ throw new SAXException(e);
+ }
+
+ T newInstance = null;
+ try {
+ newInstance = constructor.newInstance(parameters);
+ } catch (final IllegalArgumentException e) {
+ throw new SAXException(e);
+ } catch (final InstantiationException e) {
+ throw new SAXException(e);
+ } catch (final IllegalAccessException e) {
+ throw new SAXException(e);
+ } catch (final InvocationTargetException e) {
+ throw new SAXException(e);
+ }
+ return newInstance;
+ }
+
}
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoTreeBuilder.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoTreeBuilder.java 2021-11-18 14:35:17 UTC (rev 12083)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoTreeBuilder.java 2021-11-18 15:02:16 UTC (rev 12084)
@@ -47,8 +47,6 @@
import org.axsl.speech.SpeechServer;
import org.axsl.text.TextServer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
@@ -86,9 +84,6 @@
/** The parent server. */
private FoTreeServer4a server;
- /** The logger. */
- private Logger logger;
-
/** The graphic server. */
private GraphicServer graphicServer;
@@ -118,11 +113,9 @@
/**
* Constructor.
* @param server The parent server.
- * @param logger The logger.
*/
- public FoTreeBuilder(final FoTreeServer4a server, final Logger logger) {
+ public FoTreeBuilder(final FoTreeServer4a server) {
this.server = server;
- this.logger = logger;
}
@Override
@@ -296,17 +289,6 @@
}
/**
- * Returns the logger.
- * @return The logger.
- */
- public Logger getLogger() {
- if (this.logger == null) {
- this.logger = LoggerFactory.getLogger(FoTreeBuilder.class);
- }
- return this.logger;
- }
-
- /**
* Returns the line number currently showing in the SAX Locator.
* @return The line number currently showing in the SAX Locator.
*/
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoTreeServer4a.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoTreeServer4a.java 2021-11-18 14:35:17 UTC (rev 12083)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoTreeServer4a.java 2021-11-18 15:02:16 UTC (rev 12084)
@@ -280,8 +280,7 @@
@Override
public FoTreeBuilder makeFoTree() {
- final FoTreeBuilder foTreeBuilder = new FoTreeBuilder(this,
- this.getLogger());
+ final FoTreeBuilder foTreeBuilder = new FoTreeBuilder(this);
foTreeBuilder.setGraphicServer(this.graphicServer);
foTreeBuilder.setTextServer(this.textServer);
foTreeBuilder.setOrthographyServer(this.orthographyServer);
Modified: trunk/foray/foray-fotree/src/test/java/org/foray/fotree/AbstractPropertyTest.java
===================================================================
--- trunk/foray/foray-fotree/src/test/java/org/foray/fotree/AbstractPropertyTest.java 2021-11-18 14:35:17 UTC (rev 12083)
+++ trunk/foray/foray-fotree/src/test/java/org/foray/fotree/AbstractPropertyTest.java 2021-11-18 15:02:16 UTC (rev 12084)
@@ -94,7 +94,7 @@
* @return The newly-created FoObj.
*/
protected static final FoObj makeTestFObj() {
- final FoTreeBuilder foTreeBuilder = new FoTreeBuilder(null, null);
+ final FoTreeBuilder foTreeBuilder = new FoTreeBuilder(null);
final PropertyList propertyList = new PropertyList(0);
final Root root = new Root(propertyList);
root.setFOTreeBuilder(foTreeBuilder);
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2021-11-18 14:35:17 UTC (rev 12083)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2021-11-18 15:02:16 UTC (rev 12084)
@@ -255,7 +255,7 @@
this.logger.debug("reading " + naturalLanguageDir + filePrefix
+ ".xml");
}
- final NatLangParser parser = new NatLangParser(this.logger);
+ final NatLangParser parser = new NatLangParser();
nl = parser.parse(nlFile);
return nl;
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternParser.java 2021-11-18 14:35:17 UTC (rev 12083)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternParser.java 2021-11-18 15:02:16 UTC (rev 12084)
@@ -38,8 +38,6 @@
import org.axsl.orthography.OrthographyException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
@@ -86,9 +84,6 @@
* are returned with exceptions. */
private String errMsg;
- /** The logger. */
- private Logger logger = LoggerFactory.getLogger(this.getClass());
-
/**
* Constructor.
* @param consumer The pattern consumer implementation that will accept the data created by this parser.
@@ -121,7 +116,7 @@
} catch (final IOException e) {
/* This is a normal condition, and just means that the URL to the
* pattern file does not exist. */
- this.logger.error("Cannot open hyphenation pattern: "
+ getLogger().error("Cannot open hyphenation pattern: "
+ url.toString() + ForayConstants.LOG_NEWLINE
+ e.getMessage());
return;
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java 2021-11-18 14:35:17 UTC (rev 12083)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java 2021-11-18 15:02:16 UTC (rev 12084)
@@ -40,8 +40,6 @@
import org.axsl.orthography.OrthographyException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
@@ -72,25 +70,16 @@
* are returned with exceptions. */
private String errMsg;
- /** The logger. */
- private Logger logger;
-
/**
* Constructor.
- * @param logger The logger for user messages.
*/
- public NatLangParser(final Logger logger) {
- if (logger == null) {
- this.logger = LoggerFactory.getLogger(NatLangParser.class);
- } else {
- this.logger = logger;
- }
+ public NatLangParser() {
try {
this.parser = createSax2Parser();
} catch (final SAXException e) {
- this.logger.error(e.getMessage(), e);
+ getLogger().error(e.getMessage(), e);
} catch (final ParserConfigurationException e) {
- this.logger.error(e.getMessage(), e);
+ getLogger().error(e.getMessage(), e);
}
this.parser.setContentHandler(this);
this.parser.setErrorHandler(this);
@@ -112,7 +101,7 @@
} catch (final IOException e) {
/* This is a normal condition, and just means that the URL to the
* pattern file does not exist. */
- this.logger.error("Cannot open hyphenation pattern: "
+ getLogger().error("Cannot open hyphenation pattern: "
+ url.toString() + ForayConstants.LOG_NEWLINE
+ e.getMessage());
return null;
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java 2021-11-18 14:35:17 UTC (rev 12083)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java 2021-11-18 15:02:16 UTC (rev 12084)
@@ -37,6 +37,7 @@
import org.foray.common.resource.ResourceLocation;
import org.foray.common.resource.ResourceLocationClasspath;
import org.foray.common.resource.ResourceLocationUrl;
+import org.foray.common.xml.SaxParser;
import org.foray.orthography.DerivativePattern;
import org.foray.orthography.DerivativeRule;
import org.foray.orthography.DictionaryResource;
@@ -53,8 +54,6 @@
import org.axsl.orthography.Word.PosQualifier;
import org.axsl.orthography.optional.Lexer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
@@ -66,8 +65,6 @@
import org.xml.sax.helpers.DefaultHandler;
import java.io.IOException;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
@@ -84,11 +81,8 @@
* instance.
* Normally this class doesn't need to be accessed directly.
*/
-public class OrthographyConfigParser extends DefaultHandler {
+public class OrthographyConfigParser extends SaxParser {
- /** The logger. */
- private Logger logger = LoggerFactory.getLogger(this.getClass());
-
/** Stateful variable. */
private DictionaryResource currentDictionaryResource;
@@ -192,9 +186,9 @@
try {
parser.setFeature("http://xml.org/sax/features/validation", true);
} catch (final SAXNotRecognizedException e) {
- this.logger.warn("Parser does not recognize validation.");
+ getLogger().warn("Parser does not recognize validation.");
} catch (final SAXNotSupportedException e) {
- this.logger.warn("Parser does not support validation.");
+ getLogger().warn("Parser does not support validation.");
}
parser.setContentHandler(this);
final EntityResolver resolver = AxslDtdUtil.getEntityResolver();
@@ -225,7 +219,7 @@
final XMLReader xmlReader = spf.newSAXParser().getXMLReader();
final EntityResolver entityResolver = this.hyphenationServer.getEntityResolver();
xmlReader.setEntityResolver(entityResolver);
- this.logger.debug("Orthography Configuration Parsing: Using {} as SAX2 Parser",
+ getLogger().debug("Orthography Configuration Parsing: Using {} as SAX2 Parser",
xmlReader.getClass().getName());
return xmlReader;
} catch (final javax.xml.parsers.ParserConfigurationException e) {
@@ -319,8 +313,8 @@
final String reference = attributes.getValue("reference");
final DictionaryResource resource = this.dictionaries.get(reference);
if (resource == null) {
- this.logger.error("dictionary-resource not found: {}", reference);
- this.logger.error(getContextMessage());
+ getLogger().error("dictionary-resource not found: {}", reference);
+ getLogger().error(getContextMessage());
} else {
this.currentOrthographyConfig.setDictionaryResource(resource);
}
@@ -330,8 +324,8 @@
final String reference = attributes.getValue("reference");
final HyphenationPatternsResource resource = this.hyphenationPatterns.get(reference);
if (resource == null) {
- this.logger.error("hyphenation-patterns-resource not found: {}", reference);
- this.logger.error(getContextMessage());
+ getLogger().error("hyphenation-patterns-resource not found: {}", reference);
+ getLogger().error(getContextMessage());
} else {
this.currentOrthographyConfig.setHyphenationPatternsResource(resource);
}
@@ -341,8 +335,8 @@
final String reference = attributes.getValue("reference");
final List<Pattern> patterns = this.hyphenationServer.getMatchRules(reference);
if (patterns == null) {
- this.logger.error("match-rules not found: {}", reference);
- this.logger.error(getContextMessage());
+ getLogger().error("match-rules not found: {}", reference);
+ getLogger().error(getContextMessage());
} else {
this.currentOrthographyConfig.registerMatchRuleListId(reference);
}
@@ -352,8 +346,8 @@
final String reference = attributes.getValue("reference");
final List<DerivativePattern> rules = this.hyphenationServer.getDerivativePatterns(reference);
if (rules == null) {
- this.logger.error("derivative-rules not found: {}", reference);
- this.logger.error(getContextMessage());
+ getLogger().error("derivative-rules not found: {}", reference);
+ getLogger().error(getContextMessage());
} else {
this.currentOrthographyConfig.registerDerivativeRuleListId(reference);
}
@@ -363,8 +357,8 @@
final String reference = attributes.getValue("reference");
final List<WordWrapperFactory<?>> factories = this.derivativeLists.get(reference);
if (factories == null) {
- this.logger.error("derivative-factories not found: {}", reference);
- this.logger.error(getContextMessage());
+ getLogger().error("derivative-factories not found: {}", reference);
+ getLogger().error(getContextMessage());
} else {
this.currentOrthographyConfig.setWordWrapperFactories(factories);
}
@@ -472,7 +466,7 @@
}
default: {
/* Make sure user knows about unknown tag. */
- this.logger.error("Unknown tag in orthography configuration: {}", localName);
+ getLogger().error("Unknown tag in orthography configuration: {}", localName);
}
}
}
@@ -487,76 +481,27 @@
final String scriptString = attributes.getValue("script-iso-4char");
final Language4a language = Language4a.findFrom3Char(languageString);
if (language == null) {
- this.logger.error("Unable to find language for: {}", languageString);
- this.logger.error(getContextMessage());
+ getLogger().error("Unable to find language for: {}", languageString);
+ getLogger().error(getContextMessage());
}
final Script4a script = Script4a.findFromAlpha(scriptString);
if (script == null) {
- this.logger.error("Unable to find script for: {}", scriptString);
- this.logger.error(getContextMessage());
+ getLogger().error("Unable to find script for: {}", scriptString);
+ getLogger().error(getContextMessage());
}
final Country4a country = Country4a.findFrom3Char(countryString);
if (country == null) {
- this.logger.error("Unable to find country for: {}", countryString);
- this.logger.error(getContextMessage());
+ getLogger().error("Unable to find country for: {}", countryString);
+ getLogger().error(getContextMessage());
}
final WritingSystem4a writingSystem = WritingSystem4a.find(language, script, country);
if (writingSystem == null) {
- this.logger.error("Unable to find script for: {}_{}_{}", languageString, scriptString, countryString);
- this.logger.error(getContextMessage());
+ getLogger().error("Unable to find script for: {}_{}_{}", languageString, scriptString, countryString);
+ getLogger().error(getContextMessage());
}
this.hyphenationServer.registerOrthography(writingSystem, this.currentOrthographyConfig);
}
- /**
- * Instantiates an instance of a specified class using reflection, and ensures that it is a subtype of a given type.
- * @param className The name of the class that should be instantiated.
- * @param expectedType The expected superclass for {@code className}.
- * @param <T> The type of the superclass object that is being instantiated.
- * @param parameterTypes The array of parameter types.
- * @param parameters The array of parameters.
- * @return The new instance of {@code className}, or null if it could not be created.
- * @throws SAXException Wraps a number of exceptions that can be thrown during instantiation by reflection.
- */
- private <T extends Object> T instantiate(final String className, final Class<T> expectedType,
- final Class<?>[] parameterTypes, final Object[] parameters) throws SAXException {
- Class<?> theClass = null;
- try {
- theClass = Class.forName(className);
- } catch (final ClassNotFoundException e) {
- throw new SAXException(e);
- }
- if (! expectedType.isAssignableFrom(theClass)) {
- this.logger.warn("Class \"{}\" is not a {} class.", className, WordWrapperFactory.class.getName());
- return null;
- }
-
- @SuppressWarnings("unchecked")
- final Class<T> factoryClass = (Class<T>) theClass;
- Constructor<T> constructor = null;
- try {
- constructor = factoryClass.getConstructor(parameterTypes);
- } catch (final SecurityException e) {
- throw new SAXException(e);
- } catch (final NoSuchMethodException e) {
- throw new SAXException(e);
- }
-
- T newInstance = null;
- try {
- newInstance = constructor.newInstance(parameters);
- } catch (final IllegalArgumentException e) {
- throw new SAXException(e);
- } catch (final InstantiationException e) {
- throw new SAXException(e);
- } catch (final IllegalAccessException e) {
- throw new SAXException(e);
- } catch (final InvocationTargetException e) {
- throw new SAXException(e);
- }
- return newInstance;
- }
-
@Override
public void endElement(final String uri, final String localName, final String qName) {
endElementInside(uri, localName, qName);
@@ -749,8 +694,8 @@
try {
return new URL(urlString);
} catch (final MalformedURLException e) {
- this.logger.error("Invalid URL: {}", urlString);
- this.logger.error(getContextMessage());
+ getLogger().error("Invalid URL: {}", urlString);
+ getLogger().error(getContextMessage());
return null;
}
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-18 14:35:20
|
Revision: 12083
http://sourceforge.net/p/foray/code/12083
Author: victormote
Date: 2021-11-18 14:35:17 +0000 (Thu, 18 Nov 2021)
Log Message:
-----------
Convert SaxParser from a utility class to an abstract superclass, to promote sharing of code amongst parsers.
Modified Paths:
--------------
trunk/foray/foray-app/src/test/java/org/foray/app/fo/FoDocumentReader.java
trunk/foray/foray-common/src/main/java/org/foray/common/xml/SaxParser.java
trunk/foray/foray-core/src/main/java/org/foray/core/ForayDocument.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoTreeBuilder.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java
Modified: trunk/foray/foray-app/src/test/java/org/foray/app/fo/FoDocumentReader.java
===================================================================
--- trunk/foray/foray-app/src/test/java/org/foray/app/fo/FoDocumentReader.java 2021-11-18 13:52:42 UTC (rev 12082)
+++ trunk/foray/foray-app/src/test/java/org/foray/app/fo/FoDocumentReader.java 2021-11-18 14:35:17 UTC (rev 12083)
@@ -31,7 +31,6 @@
import org.foray.app.ForaySpecific;
import org.foray.common.Environment;
import org.foray.common.url.UrlFactory;
-import org.foray.common.xml.SaxParser;
import org.foray.core.ForayException;
import org.foray.core.SessionConfig;
import org.foray.fotree.FoTreeBuilder;
@@ -127,6 +126,7 @@
* @throws ForayException For errors building the FO Tree.
*/
public FoTreeBuilder buildFoTree(final String file) throws ForayException {
+ final FoTreeBuilder foTreeBuilder = this.treeServer.makeFoTree();
final File foFile = new File(this.testDirectory, file);
final InputStream foInputStream;
try {
@@ -137,17 +137,16 @@
final InputSource foInputSource = new InputSource(foInputStream);
final XMLReader xmlReader;
try {
- xmlReader = SaxParser.createSax2Parser();
+ xmlReader = foTreeBuilder.createSax2Parser();
} catch (final SAXException e) {
throw new ForayException(e);
} catch (final ParserConfigurationException e) {
throw new ForayException(e);
}
- final FoTreeBuilder foTree = this.treeServer.makeFoTree();
final FontServer fontServer = this.treeServer.getFontServer();
final FontConsumer fontConsumer = fontServer.makeFontConsumer();
- foTree.setFontConsumer(fontConsumer);
- xmlReader.setContentHandler(foTree);
+ foTreeBuilder.setFontConsumer(fontConsumer);
+ xmlReader.setContentHandler(foTreeBuilder);
try {
xmlReader.parse(foInputSource);
} catch (final IOException e) {
@@ -155,7 +154,7 @@
} catch (final SAXException e) {
throw new ForayException(e);
}
- return foTree;
+ return foTreeBuilder;
}
/**
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/xml/SaxParser.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/xml/SaxParser.java 2021-11-18 13:52:42 UTC (rev 12082)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/xml/SaxParser.java 2021-11-18 14:35:17 UTC (rev 12083)
@@ -30,22 +30,17 @@
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
+import org.xml.sax.ext.DefaultHandler2;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
/**
- * Utility class that provides information about an XML Parser.
+ * Abstract superclass for SAX parsers.
*/
-public final class SaxParser {
+public abstract class SaxParser extends DefaultHandler2 {
/**
- * Private constructor (should not be instantiated).
- */
- private SaxParser() {
- }
-
- /**
* Returns the name of the SAX Parser class that is found in the classpath.
* @return The name of the SAX Parser class that is found in the classpath.
*/
@@ -68,7 +63,7 @@
* @throws SAXException For errors creating the parser.
* @throws ParserConfigurationException For errors configuring the parser.
*/
- public static XMLReader createSax2Parser() throws SAXException, ParserConfigurationException {
+ public XMLReader createSax2Parser() throws SAXException, ParserConfigurationException {
final SAXParserFactory spf = javax.xml.parsers.SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
XMLReader xmlReader = null;
Modified: trunk/foray/foray-core/src/main/java/org/foray/core/ForayDocument.java
===================================================================
--- trunk/foray/foray-core/src/main/java/org/foray/core/ForayDocument.java 2021-11-18 13:52:42 UTC (rev 12082)
+++ trunk/foray/foray-core/src/main/java/org/foray/core/ForayDocument.java 2021-11-18 14:35:17 UTC (rev 12083)
@@ -54,6 +54,7 @@
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
@@ -458,7 +459,10 @@
XMLReader createParser() throws ForayException {
final XMLReader xmlReader;
try {
- xmlReader = SaxParser.createSax2Parser();
+ final SAXParserFactory spf = javax.xml.parsers.SAXParserFactory.newInstance();
+ spf.setNamespaceAware(true);
+ xmlReader = spf.newSAXParser().getXMLReader();
+ xmlReader.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
} catch (final SAXException e) {
throw new ForayException(e);
} catch (final ParserConfigurationException e) {
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoTreeBuilder.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoTreeBuilder.java 2021-11-18 13:52:42 UTC (rev 12082)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoTreeBuilder.java 2021-11-18 14:35:17 UTC (rev 12083)
@@ -28,6 +28,7 @@
package org.foray.fotree;
+import org.foray.common.xml.SaxParser;
import org.foray.fotree.axsl.NamespaceAxsl;
import org.foray.fotree.fo.NamespaceFo;
import org.foray.fotree.fo.obj.PageSequence;
@@ -51,7 +52,6 @@
import org.xml.sax.Attributes;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
import java.net.URL;
import java.util.ArrayList;
@@ -60,8 +60,7 @@
/**
* SAX Handler that builds the formatting object tree.
*/
-public class FoTreeBuilder extends DefaultHandler
- implements org.axsl.fo.FoTree {
+public class FoTreeBuilder extends SaxParser implements org.axsl.fo.FoTree {
/** The current formatting object being parsed. */
private FoObj currentFObj = null;
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternParser.java 2021-11-18 13:52:42 UTC (rev 12082)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/PatternParser.java 2021-11-18 14:35:17 UTC (rev 12083)
@@ -45,7 +45,6 @@
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
-import org.xml.sax.helpers.DefaultHandler;
import java.io.IOException;
import java.io.InputStream;
@@ -57,7 +56,7 @@
* A SAX document handler to read and parse hyphenation patterns
* from a XML file.
*/
-public class PatternParser extends DefaultHandler {
+public class PatternParser extends SaxParser {
/** Constant indicating that the current element is "classes". */
static final int ELEM_CLASSES = 1;
@@ -101,7 +100,7 @@
}
this.token = new StringBuilder();
try {
- this.parser = SaxParser.createSax2Parser();
+ this.parser = createSax2Parser();
} catch (final SAXException | ParserConfigurationException e) {
throw new OrthographyException(e);
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java 2021-11-18 13:52:42 UTC (rev 12082)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/NatLangParser.java 2021-11-18 14:35:17 UTC (rev 12083)
@@ -48,7 +48,6 @@
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
-import org.xml.sax.helpers.DefaultHandler;
import java.io.IOException;
import java.io.InputStream;
@@ -61,7 +60,7 @@
* A SAX document handler to read and parse natural-language descriptions
* from an XML file.
*/
-public class NatLangParser extends DefaultHandler {
+public class NatLangParser extends SaxParser {
/** The natural language instance being parsed. */
private NaturalLanguage nl;
@@ -87,7 +86,7 @@
this.logger = logger;
}
try {
- this.parser = SaxParser.createSax2Parser();
+ this.parser = createSax2Parser();
} catch (final SAXException e) {
this.logger.error(e.getMessage(), e);
} catch (final ParserConfigurationException e) {
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java 2021-11-18 13:52:42 UTC (rev 12082)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java 2021-11-18 14:35:17 UTC (rev 12083)
@@ -533,7 +533,6 @@
@SuppressWarnings("unchecked")
final Class<T> factoryClass = (Class<T>) theClass;
- /* For now, use only the no-args constructor. */
Constructor<T> constructor = null;
try {
constructor = factoryClass.getConstructor(parameterTypes);
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-18 13:52:44
|
Revision: 12082
http://sourceforge.net/p/foray/code/12082
Author: victormote
Date: 2021-11-18 13:52:42 +0000 (Thu, 18 Nov 2021)
Log Message:
-----------
Rename parser class for clarity.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
Added Paths:
-----------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java
Removed Paths:
-------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ConfigParser.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2021-11-18 13:42:26 UTC (rev 12081)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2021-11-18 13:52:42 UTC (rev 12082)
@@ -36,9 +36,9 @@
import org.foray.common.ForayConstants;
import org.foray.common.i18n.Language4a;
import org.foray.common.url.UrlFactory;
-import org.foray.orthography.util.ConfigParser;
import org.foray.orthography.util.NatLangParser;
import org.foray.orthography.util.NaturalLanguage;
+import org.foray.orthography.util.OrthographyConfigParser;
import org.axsl.common.i18n.Language;
import org.axsl.common.i18n.WritingSystem;
@@ -105,7 +105,7 @@
}
final InputSource inputSource = new InputSource(inputStream);
- final ConfigParser parser = new ConfigParser(this, inputSource);
+ final OrthographyConfigParser parser = new OrthographyConfigParser(this, inputSource);
parser.start();
}
}
Deleted: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ConfigParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ConfigParser.java 2021-11-18 13:42:26 UTC (rev 12081)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ConfigParser.java 2021-11-18 13:52:42 UTC (rev 12082)
@@ -1,774 +0,0 @@
-/*
- * Copyright 2019 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-package org.foray.orthography.util;
-
-import org.foray.common.AxslDtdUtil;
-import org.foray.common.i18n.Country4a;
-import org.foray.common.i18n.Language4a;
-import org.foray.common.i18n.Script4a;
-import org.foray.common.i18n.WritingSystem4a;
-import org.foray.common.primitive.StringUtils;
-import org.foray.common.resource.ResourceLocation;
-import org.foray.common.resource.ResourceLocationClasspath;
-import org.foray.common.resource.ResourceLocationUrl;
-import org.foray.orthography.DerivativePattern;
-import org.foray.orthography.DerivativeRule;
-import org.foray.orthography.DictionaryResource;
-import org.foray.orthography.HyphenationPatternsResource;
-import org.foray.orthography.Orthography4a;
-import org.foray.orthography.OrthographyServer4a;
-import org.foray.orthography.PosUtils;
-import org.foray.orthography.WordWrapperFactory;
-
-import org.axsl.orthography.OrthographyException;
-import org.axsl.orthography.Word;
-import org.axsl.orthography.Word.DerivativeType;
-import org.axsl.orthography.Word.PartOfSpeech;
-import org.axsl.orthography.Word.PosQualifier;
-import org.axsl.orthography.optional.Lexer;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.Attributes;
-import org.xml.sax.EntityResolver;
-import org.xml.sax.InputSource;
-import org.xml.sax.Locator;
-import org.xml.sax.SAXException;
-import org.xml.sax.SAXNotRecognizedException;
-import org.xml.sax.SAXNotSupportedException;
-import org.xml.sax.XMLReader;
-import org.xml.sax.helpers.DefaultHandler;
-
-import java.io.IOException;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Stack;
-import java.util.regex.Pattern;
-
-import javax.xml.parsers.SAXParserFactory;
-
-/**
- * SAX2 Handler which retrieves the orthography configuration information and stores it in a {@link OrthographyServer4a}
- * instance.
- * Normally this class doesn't need to be accessed directly.
- */
-public class ConfigParser extends DefaultHandler {
-
- /** The logger. */
- private Logger logger = LoggerFactory.getLogger(this.getClass());
-
- /** Stateful variable. */
- private DictionaryResource currentDictionaryResource;
-
- /** Stateful variable. */
- private DictionaryResource.WordListElement currentWordListElement;
-
- /** Stateful variable. */
- private HyphenationPatternsResource currentHyphenationPatternsResource;
-
- /** Stateful variable. */
- private List<Pattern> currentMatchRuleList;
-
- /** Stateful variable. */
- private List<DerivativePattern> currentDerivativePatternList;
-
- /** Stateful variable. */
- private List<DerivativeRule> currentDerivativeRuleList;
-
- /** Component of: derivative-rule. */
- private Word.PartOfSpeech currentPartOfSpeech;
-
- /** Component of: derivative-rule. */
- private PosQualifier currentQualifier;
-
- /** Component of: derivative-rule. */
- private List<DerivativeType> currentDerivativeTypeList;
-
- /** Stateful variable. */
- private Pattern currentDerivativeRuleMatch;
-
- /** Stateful variable. */
- private String currentDerivativeRuleReplace;
-
- /** Stateful variable. */
- private List<WordWrapperFactory<?>> currentDerivateFactoryList;
-
- /** Stateful variable. */
- private ResourceLocation.Type currentResourceLocationType;
-
- /** Stateful variable. */
- private ResourceLocation currentResourceLocation;
-
- /** Receives content of text nodes. */
- private StringBuilder textAccumulator = new StringBuilder();
-
- /** Stateful variable tracking the current orthography configuration. */
- private transient Orthography4a currentOrthographyConfig;
-
-// /** The map of match rule lists, keyed by id. */
-// private Map<String, List<Pattern>> matchRuleLists = new HashMap<String, List<Pattern>>();
-//
- /** The map of derivative factory lists, keyed by id. */
- private Map<String, List<WordWrapperFactory<?>>> derivativeLists =
- new HashMap<String, List<WordWrapperFactory<?>>>();
-
- /** The map of dictionary instances, keyed by id. */
- private Map<String, DictionaryResource> dictionaries = new HashMap<String, DictionaryResource>();
-
- /** The map of hyphenation pattern tree instances, keyed by id. */
- private Map<String, HyphenationPatternsResource> hyphenationPatterns =
- new HashMap<String, HyphenationPatternsResource>();
-
- /** The InputSource encapsulating the configuration file. */
- private InputSource filename;
-
- /** The hyphenation server receiving the parsed information. */
- private OrthographyServer4a hyphenationServer;
-
- /** The XML parser's Locator instance, used to indicate line and column numbers in user messages. */
- private Locator locator;
-
- /** The stack of elements currently being processed. */
- private Stack<String> elementStack = new Stack<String>();
-
- /**
- * Register the URLStreamHandler for classpath: URLs.
- * This has to be done only once, hence a static statement.
- */
- static {
- org.foray.common.url.classpath.Handler.register();
- }
-
- /**
- * Constructor.
- * @param server The hyphenation server which will capture the information from the parsed configuration.
- * @param filename The file which contains the configuration information
- * to be parsed.
- */
- public ConfigParser(final OrthographyServer4a server, final InputSource filename) {
- this.hyphenationServer = server;
- this.filename = filename;
- }
-
- /**
- * Parses the configuration file.
- * @throws OrthographyException For errors during parsing.
- */
- public void start() throws OrthographyException {
- final XMLReader parser = createParser();
- /* Turn on validation if it is available. */
- try {
- parser.setFeature("http://xml.org/sax/features/validation", true);
- } catch (final SAXNotRecognizedException e) {
- this.logger.warn("Parser does not recognize validation.");
- } catch (final SAXNotSupportedException e) {
- this.logger.warn("Parser does not support validation.");
- }
- parser.setContentHandler(this);
- final EntityResolver resolver = AxslDtdUtil.getEntityResolver();
- parser.setEntityResolver(resolver);
-
- try {
- parser.parse(this.filename);
- } catch (final SAXException e) {
- if (e.getException() instanceof OrthographyException) {
- throw (OrthographyException) e.getException();
- }
- throw new OrthographyException(e);
- } catch (final IOException e) {
- throw new OrthographyException(e);
- }
- }
-
- /**
- * Creates a SAX parser for parsing the configuration file.
- * @return The created SAX parser.
- * @throws OrthographyException For errors creating or configuring the parser.
- */
- private XMLReader createParser() throws OrthographyException {
- try {
- final SAXParserFactory spf =
- javax.xml.parsers.SAXParserFactory.newInstance();
- spf.setNamespaceAware(true);
- final XMLReader xmlReader = spf.newSAXParser().getXMLReader();
- final EntityResolver entityResolver = this.hyphenationServer.getEntityResolver();
- xmlReader.setEntityResolver(entityResolver);
- this.logger.debug("Orthography Configuration Parsing: Using {} as SAX2 Parser",
- xmlReader.getClass().getName());
- return xmlReader;
- } catch (final javax.xml.parsers.ParserConfigurationException e) {
- throw new OrthographyException(e);
- } catch (final org.xml.sax.SAXException e) {
- throw new OrthographyException(e);
- }
- }
-
- @Override
- public void startElement(final String uri, final String localName, final String qName,
- final Attributes attributes) throws SAXException {
- this.elementStack.push(localName);
- switch(localName) {
- case "axsl-orthography-config": {
- /* Nothing to do here. */
- return;
- }
- case "match-rule-list": {
- final String id = attributes.getValue("id");
- this.currentMatchRuleList = new ArrayList<Pattern>();
- this.hyphenationServer.registerMatchRules(id, currentMatchRuleList);
- return;
- }
- case "derivative-pattern-list": {
- final String id = attributes.getValue("id");
- this.currentDerivativePatternList = new ArrayList<DerivativePattern>();
- this.hyphenationServer.registerDerivativeRules(id, currentDerivativePatternList);
- return;
- }
- case "derivative-pattern": {
- this.currentDerivativeRuleList = new ArrayList<DerivativeRule>();
- return;
- }
- case "derivative-rule": {
- this.currentPartOfSpeech = null;
- this.currentQualifier = null;
- this.currentDerivativeTypeList = new ArrayList<DerivativeType>();
- return;
- }
- case "derivative-type": {
- final String typeString = attributes.getValue("type");
- final DerivativeType type = DerivativeType.fromToken(typeString);
- this.currentDerivativeTypeList.add(type);
- return;
- }
- case "match": {
- return;
- }
- case "replace": {
- return;
- }
- case "derivative-factory-list": {
- final String id = attributes.getValue("id");
- this.currentDerivateFactoryList = new ArrayList<WordWrapperFactory<?>>();
- this.derivativeLists.put(id, currentDerivateFactoryList);
- return;
- }
- case "derivative-factory": {
- final String factoryClassName = attributes.getValue("class");
- final WordWrapperFactory<?> factory = instantiate(factoryClassName, WordWrapperFactory.class, null, null);
- if (factory == null) {
- return;
- }
- this.currentDerivateFactoryList.add(factory);
- return;
- }
- case "lexer": {
- final String className = attributes.getValue("class");
- final String languageString = attributes.getValue("language-iso-3char");
- final String scriptString = attributes.getValue("script-iso-4char");
- final String countryString = attributes.getValue("country-iso-3char");
- final WritingSystem4a writingSystem = WritingSystem4a.find(languageString, scriptString, countryString);
-
- final Class<?>[] parameterTypes = {WritingSystem4a.class};
- final Object[] parameters = {writingSystem};
- final Lexer lexer = instantiate(className, Lexer.class, parameterTypes, parameters);
- if (lexer == null) {
- return;
- }
- this.currentOrthographyConfig.setLexer(lexer);
- return;
- }
- case "exclusion": {
- final String regexPatternString = attributes.getValue("regex-pattern");
- final Pattern regexPattern = Pattern.compile(regexPatternString);
- this.currentWordListElement.addExclusionPattern(regexPattern);
- return;
- }
- case "dictionary": {
- final String reference = attributes.getValue("reference");
- final DictionaryResource resource = this.dictionaries.get(reference);
- if (resource == null) {
- this.logger.error("dictionary-resource not found: {}", reference);
- this.logger.error(getContextMessage());
- } else {
- this.currentOrthographyConfig.setDictionaryResource(resource);
- }
- return;
- }
- case "hyphenation-patterns": {
- final String reference = attributes.getValue("reference");
- final HyphenationPatternsResource resource = this.hyphenationPatterns.get(reference);
- if (resource == null) {
- this.logger.error("hyphenation-patterns-resource not found: {}", reference);
- this.logger.error(getContextMessage());
- } else {
- this.currentOrthographyConfig.setHyphenationPatternsResource(resource);
- }
- return;
- }
- case "match-rules": {
- final String reference = attributes.getValue("reference");
- final List<Pattern> patterns = this.hyphenationServer.getMatchRules(reference);
- if (patterns == null) {
- this.logger.error("match-rules not found: {}", reference);
- this.logger.error(getContextMessage());
- } else {
- this.currentOrthographyConfig.registerMatchRuleListId(reference);
- }
- return;
- }
- case "derivative-rules": {
- final String reference = attributes.getValue("reference");
- final List<DerivativePattern> rules = this.hyphenationServer.getDerivativePatterns(reference);
- if (rules == null) {
- this.logger.error("derivative-rules not found: {}", reference);
- this.logger.error(getContextMessage());
- } else {
- this.currentOrthographyConfig.registerDerivativeRuleListId(reference);
- }
- return;
- }
- case "derivative-factories": {
- final String reference = attributes.getValue("reference");
- final List<WordWrapperFactory<?>> factories = this.derivativeLists.get(reference);
- if (factories == null) {
- this.logger.error("derivative-factories not found: {}", reference);
- this.logger.error(getContextMessage());
- } else {
- this.currentOrthographyConfig.setWordWrapperFactories(factories);
- }
- return;
- }
- case "dictionary-resource": {
- final String id = attributes.getValue("id");
- this.currentDictionaryResource = new DictionaryResource(id);
- this.dictionaries.put(id, this.currentDictionaryResource);
- return;
- }
- case "hyphenation-patterns-resource": {
- final String id = attributes.getValue("id");
- this.currentHyphenationPatternsResource = new HyphenationPatternsResource(id);
- this.hyphenationPatterns.put(id, this.currentHyphenationPatternsResource);
- return;
- }
- case "parsed-resource": {
- /* All processing is done at endElement. */
- return;
- }
- case "resource-location": {
- final String typeString = attributes.getValue("type");
- this.currentResourceLocationType = ResourceLocation.Type.fromId(typeString);
- if (this.currentResourceLocationType == null) {
- throw new SAXException("Invalid resource location type: " + typeString);
- }
- return;
- }
- case "unparsed-dictionary": {
- /* All processing is done at endElement. */
- return;
- }
- case "dictionary-element": {
- this.currentWordListElement = this.currentDictionaryResource.new WordListElement();
- this.currentDictionaryResource.addWordListElement(this.currentWordListElement);
- return;
- }
- case "unparsed-hyphenation-patterns": {
- /* All processing is done at endElement. */
- return;
- }
- case "configuration": {
- this.currentOrthographyConfig = new Orthography4a(this.hyphenationServer);
- return;
- }
- case "orthography": {
- parseElementOrthography(attributes);
- return;
- }
- case "noun": {
- this.currentPartOfSpeech = PartOfSpeech.NOUN;
- this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
- return;
- }
- case "pronoun": {
- this.currentPartOfSpeech = PartOfSpeech.PRONOUN;
- this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
- return;
- }
- case "verb": {
- this.currentPartOfSpeech = PartOfSpeech.VERB;
- this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
- return;
- }
- case "adjective": {
- this.currentPartOfSpeech = PartOfSpeech.ADJECTIVE;
- this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
- return;
- }
- case "adverb": {
- this.currentPartOfSpeech = PartOfSpeech.ADVERB;
- this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
- return;
- }
- case "preposition": {
- this.currentPartOfSpeech = PartOfSpeech.PREPOSITION;
- this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
- return;
- }
- case "conjunction": {
- this.currentPartOfSpeech = PartOfSpeech.CONJUNCTION;
- this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
- return;
- }
- case "article": {
- this.currentPartOfSpeech = PartOfSpeech.DETERMINER;
- this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
- return;
- }
- case "interjection": {
- this.currentPartOfSpeech = PartOfSpeech.INTERJECTION;
- this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
- return;
- }
- case "cardinal": {
- this.currentPartOfSpeech = PartOfSpeech.CARDINAL;
- this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
- return;
- }
- case "ordinal": {
- this.currentPartOfSpeech = PartOfSpeech.ORDINAL;
- this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
- return;
- }
- default: {
- /* Make sure user knows about unknown tag. */
- this.logger.error("Unknown tag in orthography configuration: {}", localName);
- }
- }
- }
-
- /**
- * Parses the "orthography" element.
- * @param attributes The raw parsed attributes.
- */
- private void parseElementOrthography(final Attributes attributes) {
- final String languageString = attributes.getValue("language-iso-3char");
- final String countryString = attributes.getValue("country-iso-3char");
- final String scriptString = attributes.getValue("script-iso-4char");
- final Language4a language = Language4a.findFrom3Char(languageString);
- if (language == null) {
- this.logger.error("Unable to find language for: {}", languageString);
- this.logger.error(getContextMessage());
- }
- final Script4a script = Script4a.findFromAlpha(scriptString);
- if (script == null) {
- this.logger.error("Unable to find script for: {}", scriptString);
- this.logger.error(getContextMessage());
- }
- final Country4a country = Country4a.findFrom3Char(countryString);
- if (country == null) {
- this.logger.error("Unable to find country for: {}", countryString);
- this.logger.error(getContextMessage());
- }
- final WritingSystem4a writingSystem = WritingSystem4a.find(language, script, country);
- if (writingSystem == null) {
- this.logger.error("Unable to find script for: {}_{}_{}", languageString, scriptString, countryString);
- this.logger.error(getContextMessage());
- }
- this.hyphenationServer.registerOrthography(writingSystem, this.currentOrthographyConfig);
- }
-
- /**
- * Instantiates an instance of a specified class using reflection, and ensures that it is a subtype of a given type.
- * @param className The name of the class that should be instantiated.
- * @param expectedType The expected superclass for {@code className}.
- * @param <T> The type of the superclass object that is being instantiated.
- * @param parameterTypes The array of parameter types.
- * @param parameters The array of parameters.
- * @return The new instance of {@code className}, or null if it could not be created.
- * @throws SAXException Wraps a number of exceptions that can be thrown during instantiation by reflection.
- */
- private <T extends Object> T instantiate(final String className, final Class<T> expectedType,
- final Class<?>[] parameterTypes, final Object[] parameters) throws SAXException {
- Class<?> theClass = null;
- try {
- theClass = Class.forName(className);
- } catch (final ClassNotFoundException e) {
- throw new SAXException(e);
- }
- if (! expectedType.isAssignableFrom(theClass)) {
- this.logger.warn("Class \"{}\" is not a {} class.", className, WordWrapperFactory.class.getName());
- return null;
- }
-
- @SuppressWarnings("unchecked")
- final Class<T> factoryClass = (Class<T>) theClass;
- /* For now, use only the no-args constructor. */
- Constructor<T> constructor = null;
- try {
- constructor = factoryClass.getConstructor(parameterTypes);
- } catch (final SecurityException e) {
- throw new SAXException(e);
- } catch (final NoSuchMethodException e) {
- throw new SAXException(e);
- }
-
- T newInstance = null;
- try {
- newInstance = constructor.newInstance(parameters);
- } catch (final IllegalArgumentException e) {
- throw new SAXException(e);
- } catch (final InstantiationException e) {
- throw new SAXException(e);
- } catch (final IllegalAccessException e) {
- throw new SAXException(e);
- } catch (final InvocationTargetException e) {
- throw new SAXException(e);
- }
- return newInstance;
- }
-
- @Override
- public void endElement(final String uri, final String localName, final String qName) {
- endElementInside(uri, localName, qName);
- this.elementStack.pop();
- }
-
- /**
- * Called by {@link #endElement(String, String, String)} so that we can make sure we get housekeeping done after
- * this method has run.
- * @param uri See {@link DefaultHandler#endElement(String, String, String)}.
- * @param localName See {@link DefaultHandler#endElement(String, String, String)}.
- * @param qName See {@link DefaultHandler#endElement(String, String, String)}.
- */
- private void endElementInside(final String uri, final String localName, final String qName) {
- switch(localName) {
- case "axsl-orthography-config": {
- return;
- }
- case "match-rule-list": {
- this.currentMatchRuleList = null;
- return;
- }
- case "derivative-pattern-list": {
- this.currentDerivativePatternList = null;
- return;
- }
- case "derivative-pattern": {
- final DerivativePattern pattern = new DerivativePattern(this.currentDerivativeRuleMatch,
- this.currentDerivativeRuleReplace, this.currentDerivativeRuleList);
- this.currentDerivativePatternList.add(pattern);
- this.currentDerivativeRuleList = null;
- this.currentDerivativeRuleMatch = null;
- this.currentDerivativeRuleReplace = null;
- return;
- }
- case "derivative-rule": {
- final DerivativeRule rule = new DerivativeRule(this.currentPartOfSpeech, this.currentQualifier,
- this.currentDerivativeTypeList);
- this.currentDerivativeRuleList.add(rule);
- this.currentPartOfSpeech = null;
- this.currentQualifier = null;
- this.currentDerivativeTypeList = null;
- return;
- }
- case "derivative-type": {
- return;
- }
- case "match": {
- final String matchString = this.textAccumulator.toString();
- StringUtils.clear(this.textAccumulator);
- final Pattern pattern = Pattern.compile(matchString);
- if (this.currentDerivativeRuleList != null) {
- this.currentDerivativeRuleMatch = pattern;
- } else {
- this.currentMatchRuleList.add(pattern);
- }
- return;
- }
- case "replace": {
- final String replaceString = this.textAccumulator.toString();
- StringUtils.clear(this.textAccumulator);
- this.currentDerivativeRuleReplace = replaceString;
- return;
- }
- case "derivative-factory-list": {
- this.currentDerivateFactoryList = null;
- return;
- }
- case "derivative-factory": {
- return;
- }
- case "lexer": {
- return;
- }
- case "exclusion": {
- return;
- }
- case "dictionary": {
- return;
- }
- case "hyphenation-patterns": {
- return;
- }
- case "match-rules": {
- return;
- }
- case "match-derivative": {
- return;
- }
- case "derivative-factories": {
- return;
- }
- case "dictionary-resource": {
- this.currentDictionaryResource = null;
- return;
- }
- case "hyphenation-patterns-resource": {
- this.currentHyphenationPatternsResource = null;
- return;
- }
- case "parsed-resource": {
- return;
- }
- case "resource-location": {
- final String content = this.textAccumulator.toString();
- StringUtils.clear(this.textAccumulator);
- switch (this.currentResourceLocationType) {
- case CLASSPATH_RESOURCE: {
- this.currentResourceLocation = new ResourceLocationClasspath(content);
- break;
- }
- case URL_RESOURCE: {
- this.currentResourceLocation = new ResourceLocationUrl(createUrl(content));
- break;
- }
- }
-
- if (this.currentWordListElement != null) {
- this.currentWordListElement.setLocation(this.currentResourceLocation);
- } else if (this.currentHyphenationPatternsResource != null) {
- final String parentElement = this.getParentElement();
- if ("unparsed-hyphenation-patterns".equals(parentElement)) {
- this.currentHyphenationPatternsResource.setUnparsedLocation(this.currentResourceLocation);
- } else if ("parsed-resource".equals(parentElement)) {
- this.currentHyphenationPatternsResource.addParsedResource(this.currentResourceLocation);
- } else {
- throw new IllegalStateException();
- }
- } else if (this.currentDictionaryResource != null) {
- this.currentDictionaryResource.addParsedResource(this.currentResourceLocation);
- } else {
- throw new IllegalStateException("Unexpected resource type.");
- }
- this.currentResourceLocation = null;
- return;
- }
- case "unparsed-dictionary": {
- return;
- }
- case "dictionary-element": {
- this.currentWordListElement = null;
- return;
- }
- case "unparsed-hyphenation-patterns": {
- return;
- }
- case "configuration": {
- this.currentOrthographyConfig = null;
- return;
- }
- case "orthography": {
- return;
- }
- }
- }
-
- /**
- * Sets the document locator for this parser.
- * @param locator The new locator.
- */
- public void setDocumentLocator(final Locator locator) {
- this.locator = locator;
- }
-
- @Override
- public void characters(final char[] chars, final int start, final int length) throws SAXException {
- this.textAccumulator.append(chars, start, length);
- }
-
- /**
- * Provides a formatted string showing the current locator context, which is useful in user messages to indicate
- * where in the document a condition arose.
- * @return The formatted context message.
- */
- private String getContextMessage() {
- if (this.locator == null) {
- return null;
- }
- return " Context: " + this.locator.getSystemId() + "\n"
- + " (Line " + this.locator.getLineNumber() + ", Column "
- + this.locator.getColumnNumber() + ")";
- }
-
- /**
- * Converts a string to a URL.
- * @param urlString The string to be converted.
- * @return The URL.
- */
- private URL createUrl(final String urlString) {
- try {
- return new URL(urlString);
- } catch (final MalformedURLException e) {
- this.logger.error("Invalid URL: {}", urlString);
- this.logger.error(getContextMessage());
- return null;
- }
- }
-
- /**
- * Returns the name of the element that is the parent of the current element.
- * @return The name of the element that is teh parent of the current element.
- */
- private String getParentElement() {
- /* Stack is a subclass of Vector, so we can use its methods to do a double-peek. */
- /* This is the index to the current element. */
- final int lastIndex = this.elementStack.size() - 1;
- final int parentIndex = lastIndex - 1;
- if (parentIndex < 0) {
- return null;
- }
- return this.elementStack.get(parentIndex);
- }
-
-}
Copied: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java (from rev 12081, trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ConfigParser.java)
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java (rev 0)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyConfigParser.java 2021-11-18 13:52:42 UTC (rev 12082)
@@ -0,0 +1,774 @@
+/*
+ * Copyright 2019 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography.util;
+
+import org.foray.common.AxslDtdUtil;
+import org.foray.common.i18n.Country4a;
+import org.foray.common.i18n.Language4a;
+import org.foray.common.i18n.Script4a;
+import org.foray.common.i18n.WritingSystem4a;
+import org.foray.common.primitive.StringUtils;
+import org.foray.common.resource.ResourceLocation;
+import org.foray.common.resource.ResourceLocationClasspath;
+import org.foray.common.resource.ResourceLocationUrl;
+import org.foray.orthography.DerivativePattern;
+import org.foray.orthography.DerivativeRule;
+import org.foray.orthography.DictionaryResource;
+import org.foray.orthography.HyphenationPatternsResource;
+import org.foray.orthography.Orthography4a;
+import org.foray.orthography.OrthographyServer4a;
+import org.foray.orthography.PosUtils;
+import org.foray.orthography.WordWrapperFactory;
+
+import org.axsl.orthography.OrthographyException;
+import org.axsl.orthography.Word;
+import org.axsl.orthography.Word.DerivativeType;
+import org.axsl.orthography.Word.PartOfSpeech;
+import org.axsl.orthography.Word.PosQualifier;
+import org.axsl.orthography.optional.Lexer;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.Attributes;
+import org.xml.sax.EntityResolver;
+import org.xml.sax.InputSource;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXNotRecognizedException;
+import org.xml.sax.SAXNotSupportedException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.DefaultHandler;
+
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+import java.util.regex.Pattern;
+
+import javax.xml.parsers.SAXParserFactory;
+
+/**
+ * SAX2 Handler which retrieves the orthography configuration information and stores it in a {@link OrthographyServer4a}
+ * instance.
+ * Normally this class doesn't need to be accessed directly.
+ */
+public class OrthographyConfigParser extends DefaultHandler {
+
+ /** The logger. */
+ private Logger logger = LoggerFactory.getLogger(this.getClass());
+
+ /** Stateful variable. */
+ private DictionaryResource currentDictionaryResource;
+
+ /** Stateful variable. */
+ private DictionaryResource.WordListElement currentWordListElement;
+
+ /** Stateful variable. */
+ private HyphenationPatternsResource currentHyphenationPatternsResource;
+
+ /** Stateful variable. */
+ private List<Pattern> currentMatchRuleList;
+
+ /** Stateful variable. */
+ private List<DerivativePattern> currentDerivativePatternList;
+
+ /** Stateful variable. */
+ private List<DerivativeRule> currentDerivativeRuleList;
+
+ /** Component of: derivative-rule. */
+ private Word.PartOfSpeech currentPartOfSpeech;
+
+ /** Component of: derivative-rule. */
+ private PosQualifier currentQualifier;
+
+ /** Component of: derivative-rule. */
+ private List<DerivativeType> currentDerivativeTypeList;
+
+ /** Stateful variable. */
+ private Pattern currentDerivativeRuleMatch;
+
+ /** Stateful variable. */
+ private String currentDerivativeRuleReplace;
+
+ /** Stateful variable. */
+ private List<WordWrapperFactory<?>> currentDerivateFactoryList;
+
+ /** Stateful variable. */
+ private ResourceLocation.Type currentResourceLocationType;
+
+ /** Stateful variable. */
+ private ResourceLocation currentResourceLocation;
+
+ /** Receives content of text nodes. */
+ private StringBuilder textAccumulator = new StringBuilder();
+
+ /** Stateful variable tracking the current orthography configuration. */
+ private transient Orthography4a currentOrthographyConfig;
+
+// /** The map of match rule lists, keyed by id. */
+// private Map<String, List<Pattern>> matchRuleLists = new HashMap<String, List<Pattern>>();
+//
+ /** The map of derivative factory lists, keyed by id. */
+ private Map<String, List<WordWrapperFactory<?>>> derivativeLists =
+ new HashMap<String, List<WordWrapperFactory<?>>>();
+
+ /** The map of dictionary instances, keyed by id. */
+ private Map<String, DictionaryResource> dictionaries = new HashMap<String, DictionaryResource>();
+
+ /** The map of hyphenation pattern tree instances, keyed by id. */
+ private Map<String, HyphenationPatternsResource> hyphenationPatterns =
+ new HashMap<String, HyphenationPatternsResource>();
+
+ /** The InputSource encapsulating the configuration file. */
+ private InputSource filename;
+
+ /** The hyphenation server receiving the parsed information. */
+ private OrthographyServer4a hyphenationServer;
+
+ /** The XML parser's Locator instance, used to indicate line and column numbers in user messages. */
+ private Locator locator;
+
+ /** The stack of elements currently being processed. */
+ private Stack<String> elementStack = new Stack<String>();
+
+ /**
+ * Register the URLStreamHandler for classpath: URLs.
+ * This has to be done only once, hence a static statement.
+ */
+ static {
+ org.foray.common.url.classpath.Handler.register();
+ }
+
+ /**
+ * Constructor.
+ * @param server The hyphenation server which will capture the information from the parsed configuration.
+ * @param filename The file which contains the configuration information
+ * to be parsed.
+ */
+ public OrthographyConfigParser(final OrthographyServer4a server, final InputSource filename) {
+ this.hyphenationServer = server;
+ this.filename = filename;
+ }
+
+ /**
+ * Parses the configuration file.
+ * @throws OrthographyException For errors during parsing.
+ */
+ public void start() throws OrthographyException {
+ final XMLReader parser = createParser();
+ /* Turn on validation if it is available. */
+ try {
+ parser.setFeature("http://xml.org/sax/features/validation", true);
+ } catch (final SAXNotRecognizedException e) {
+ this.logger.warn("Parser does not recognize validation.");
+ } catch (final SAXNotSupportedException e) {
+ this.logger.warn("Parser does not support validation.");
+ }
+ parser.setContentHandler(this);
+ final EntityResolver resolver = AxslDtdUtil.getEntityResolver();
+ parser.setEntityResolver(resolver);
+
+ try {
+ parser.parse(this.filename);
+ } catch (final SAXException e) {
+ if (e.getException() instanceof OrthographyException) {
+ throw (OrthographyException) e.getException();
+ }
+ throw new OrthographyException(e);
+ } catch (final IOException e) {
+ throw new OrthographyException(e);
+ }
+ }
+
+ /**
+ * Creates a SAX parser for parsing the configuration file.
+ * @return The created SAX parser.
+ * @throws OrthographyException For errors creating or configuring the parser.
+ */
+ private XMLReader createParser() throws OrthographyException {
+ try {
+ final SAXParserFactory spf =
+ javax.xml.parsers.SAXParserFactory.newInstance();
+ spf.setNamespaceAware(true);
+ final XMLReader xmlReader = spf.newSAXParser().getXMLReader();
+ final EntityResolver entityResolver = this.hyphenationServer.getEntityResolver();
+ xmlReader.setEntityResolver(entityResolver);
+ this.logger.debug("Orthography Configuration Parsing: Using {} as SAX2 Parser",
+ xmlReader.getClass().getName());
+ return xmlReader;
+ } catch (final javax.xml.parsers.ParserConfigurationException e) {
+ throw new OrthographyException(e);
+ } catch (final org.xml.sax.SAXException e) {
+ throw new OrthographyException(e);
+ }
+ }
+
+ @Override
+ public void startElement(final String uri, final String localName, final String qName,
+ final Attributes attributes) throws SAXException {
+ this.elementStack.push(localName);
+ switch(localName) {
+ case "axsl-orthography-config": {
+ /* Nothing to do here. */
+ return;
+ }
+ case "match-rule-list": {
+ final String id = attributes.getValue("id");
+ this.currentMatchRuleList = new ArrayList<Pattern>();
+ this.hyphenationServer.registerMatchRules(id, currentMatchRuleList);
+ return;
+ }
+ case "derivative-pattern-list": {
+ final String id = attributes.getValue("id");
+ this.currentDerivativePatternList = new ArrayList<DerivativePattern>();
+ this.hyphenationServer.registerDerivativeRules(id, currentDerivativePatternList);
+ return;
+ }
+ case "derivative-pattern": {
+ this.currentDerivativeRuleList = new ArrayList<DerivativeRule>();
+ return;
+ }
+ case "derivative-rule": {
+ this.currentPartOfSpeech = null;
+ this.currentQualifier = null;
+ this.currentDerivativeTypeList = new ArrayList<DerivativeType>();
+ return;
+ }
+ case "derivative-type": {
+ final String typeString = attributes.getValue("type");
+ final DerivativeType type = DerivativeType.fromToken(typeString);
+ this.currentDerivativeTypeList.add(type);
+ return;
+ }
+ case "match": {
+ return;
+ }
+ case "replace": {
+ return;
+ }
+ case "derivative-factory-list": {
+ final String id = attributes.getValue("id");
+ this.currentDerivateFactoryList = new ArrayList<WordWrapperFactory<?>>();
+ this.derivativeLists.put(id, currentDerivateFactoryList);
+ return;
+ }
+ case "derivative-factory": {
+ final String factoryClassName = attributes.getValue("class");
+ final WordWrapperFactory<?> factory = instantiate(factoryClassName, WordWrapperFactory.class, null, null);
+ if (factory == null) {
+ return;
+ }
+ this.currentDerivateFactoryList.add(factory);
+ return;
+ }
+ case "lexer": {
+ final String className = attributes.getValue("class");
+ final String languageString = attributes.getValue("language-iso-3char");
+ final String scriptString = attributes.getValue("script-iso-4char");
+ final String countryString = attributes.getValue("country-iso-3char");
+ final WritingSystem4a writingSystem = WritingSystem4a.find(languageString, scriptString, countryString);
+
+ final Class<?>[] parameterTypes = {WritingSystem4a.class};
+ final Object[] parameters = {writingSystem};
+ final Lexer lexer = instantiate(className, Lexer.class, parameterTypes, parameters);
+ if (lexer == null) {
+ return;
+ }
+ this.currentOrthographyConfig.setLexer(lexer);
+ return;
+ }
+ case "exclusion": {
+ final String regexPatternString = attributes.getValue("regex-pattern");
+ final Pattern regexPattern = Pattern.compile(regexPatternString);
+ this.currentWordListElement.addExclusionPattern(regexPattern);
+ return;
+ }
+ case "dictionary": {
+ final String reference = attributes.getValue("reference");
+ final DictionaryResource resource = this.dictionaries.get(reference);
+ if (resource == null) {
+ this.logger.error("dictionary-resource not found: {}", reference);
+ this.logger.error(getContextMessage());
+ } else {
+ this.currentOrthographyConfig.setDictionaryResource(resource);
+ }
+ return;
+ }
+ case "hyphenation-patterns": {
+ final String reference = attributes.getValue("reference");
+ final HyphenationPatternsResource resource = this.hyphenationPatterns.get(reference);
+ if (resource == null) {
+ this.logger.error("hyphenation-patterns-resource not found: {}", reference);
+ this.logger.error(getContextMessage());
+ } else {
+ this.currentOrthographyConfig.setHyphenationPatternsResource(resource);
+ }
+ return;
+ }
+ case "match-rules": {
+ final String reference = attributes.getValue("reference");
+ final List<Pattern> patterns = this.hyphenationServer.getMatchRules(reference);
+ if (patterns == null) {
+ this.logger.error("match-rules not found: {}", reference);
+ this.logger.error(getContextMessage());
+ } else {
+ this.currentOrthographyConfig.registerMatchRuleListId(reference);
+ }
+ return;
+ }
+ case "derivative-rules": {
+ final String reference = attributes.getValue("reference");
+ final List<DerivativePattern> rules = this.hyphenationServer.getDerivativePatterns(reference);
+ if (rules == null) {
+ this.logger.error("derivative-rules not found: {}", reference);
+ this.logger.error(getContextMessage());
+ } else {
+ this.currentOrthographyConfig.registerDerivativeRuleListId(reference);
+ }
+ return;
+ }
+ case "derivative-factories": {
+ final String reference = attributes.getValue("reference");
+ final List<WordWrapperFactory<?>> factories = this.derivativeLists.get(reference);
+ if (factories == null) {
+ this.logger.error("derivative-factories not found: {}", reference);
+ this.logger.error(getContextMessage());
+ } else {
+ this.currentOrthographyConfig.setWordWrapperFactories(factories);
+ }
+ return;
+ }
+ case "dictionary-resource": {
+ final String id = attributes.getValue("id");
+ this.currentDictionaryResource = new DictionaryResource(id);
+ this.dictionaries.put(id, this.currentDictionaryResource);
+ return;
+ }
+ case "hyphenation-patterns-resource": {
+ final String id = attributes.getValue("id");
+ this.currentHyphenationPatternsResource = new HyphenationPatternsResource(id);
+ this.hyphenationPatterns.put(id, this.currentHyphenationPatternsResource);
+ return;
+ }
+ case "parsed-resource": {
+ /* All processing is done at endElement. */
+ return;
+ }
+ case "resource-location": {
+ final String typeString = attributes.getValue("type");
+ this.currentResourceLocationType = ResourceLocation.Type.fromId(typeString);
+ if (this.currentResourceLocationType == null) {
+ throw new SAXException("Invalid resource location type: " + typeString);
+ }
+ return;
+ }
+ case "unparsed-dictionary": {
+ /* All processing is done at endElement. */
+ return;
+ }
+ case "dictionary-element": {
+ this.currentWordListElement = this.currentDictionaryResource.new WordListElement();
+ this.currentDictionaryResource.addWordListElement(this.currentWordListElement);
+ return;
+ }
+ case "unparsed-hyphenation-patterns": {
+ /* All processing is done at endElement. */
+ return;
+ }
+ case "configuration": {
+ this.currentOrthographyConfig = new Orthography4a(this.hyphenationServer);
+ return;
+ }
+ case "orthography": {
+ parseElementOrthography(attributes);
+ return;
+ }
+ case "noun": {
+ this.currentPartOfSpeech = PartOfSpeech.NOUN;
+ this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
+ return;
+ }
+ case "pronoun": {
+ this.currentPartOfSpeech = PartOfSpeech.PRONOUN;
+ this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
+ return;
+ }
+ case "verb": {
+ this.currentPartOfSpeech = PartOfSpeech.VERB;
+ this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
+ return;
+ }
+ case "adjective": {
+ this.currentPartOfSpeech = PartOfSpeech.ADJECTIVE;
+ this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
+ return;
+ }
+ case "adverb": {
+ this.currentPartOfSpeech = PartOfSpeech.ADVERB;
+ this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
+ return;
+ }
+ case "preposition": {
+ this.currentPartOfSpeech = PartOfSpeech.PREPOSITION;
+ this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
+ return;
+ }
+ case "conjunction": {
+ this.currentPartOfSpeech = PartOfSpeech.CONJUNCTION;
+ this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
+ return;
+ }
+ case "article": {
+ this.currentPartOfSpeech = PartOfSpeech.DETERMINER;
+ this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
+ return;
+ }
+ case "interjection": {
+ this.currentPartOfSpeech = PartOfSpeech.INTERJECTION;
+ this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
+ return;
+ }
+ case "cardinal": {
+ this.currentPartOfSpeech = PartOfSpeech.CARDINAL;
+ this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
+ return;
+ }
+ case "ordinal": {
+ this.currentPartOfSpeech = PartOfSpeech.ORDINAL;
+ this.currentQualifier = PosUtils.parseSingleQualifier(attributes);
+ return;
+ }
+ default: {
+ /* Make sure user knows about unknown tag. */
+ this.logger.error("Unknown tag in orthography configuration: {}", localName);
+ }
+ }
+ }
+
+ /**
+ * Parses the "orthography" element.
+ * @param attributes The raw parsed attributes.
+ */
+ private void parseElementOrthography(final Attributes attributes) {
+ final String languageString = attributes.getValue("language-iso-3char");
+ final String countryString = attributes.getValue("country-iso-3char");
+ final String scriptString = attributes.getValue("script-iso-4char");
+ final Language4a language = Language4a.findFrom3Char(languageString);
+ if (language == null) {
+ this.logger.error("Unable to find language for: {}", languageString);
+ this.logger.error(getContextMessage());
+ }
+ final Script4a script = Script4a.findFromAlpha(scriptString);
+ if (script == null) {
+ this.logger.error("Unable to find script for: {}", scriptString);
+ this.logger.error(getContextMessage());
+ }
+ final Country4a country = Country4a.findFrom3Char(countryString);
+ if (country == null) {
+ this.logger.error("Unable to find country for: {}", countryString);
+ this.logger.error(getContextMessage());
+ }
+ final WritingSystem4a writingSystem = WritingSystem4a.find(language, script, country);
+ if (writingSystem == null) {
+ this.logger.error("Unable to find script for: {}_{}_{}", languageString, scriptString, countryString);
+ this.logger.error(getContextMessage());
+ }
+ this.hyphenationServer.registerOrthography(writingSystem, this.currentOrthographyConfig);
+ }
+
+ /**
+ * Instantiates an instance of a specified class using reflection, and ensures that it is a subtype of a given type.
+ * @param className The name of the class that should be instantiated.
+ * @param expectedType The expected superclass for {@code className}.
+ * @param <T> The type of the superclass object that is being instantiated.
+ * @param parameterTypes The array of parameter types.
+ * @param parameters The array of parameters.
+ * @return The new instance of {@code className}, or null if it could not be created.
+ * @throws SAXException Wraps a number of exceptions that can be thrown during instantiation by reflection.
+ */
+ private <T extends Object> T instantiate(final String className, final Class<T> expectedType,
+ final Class<?>[] parameterTypes, final Object[] parameters) throws SAXException {
+ Class<?> theClass = null;
+ try {
+ theClass = Class.forName(className);
+ } catch (final ClassNotFoundException e) {
+ throw new SAXException(e);
+ }
+ if (! expectedType.isAssignableFrom(theClass)) {
+ this.logger.warn("Class \"{}\" is not a {} class.", className, WordWrapperFactory.class.getName());
+ return null;
+ }
+
+ @SuppressWarnings("unchecked")
+ final Class<T> factoryClass = (Class<T>) theClass;
+ /* For now, use only the no-args constructor. */
+ Constructor<T> constructor = null;
+ try {
+ constructor = factoryClass.getConstructor(parameterTypes);
+ } catch (final SecurityException e) {
+ throw new SAXException(e);
+ } catch (final NoSuchMethodException e) {
+ throw new SAXException(e);
+ }
+
+ T newInstance = null;
+ try {
+ newInstance = constructor.newInstance(parameters);
+ } catch (final IllegalArgumentException e) {
+ throw new SAXException(e);
+ } catch (final InstantiationException e) {
+ throw new SAXException(e);
+ } catch (final IllegalAccessException e) {
+ throw new SAXException(e);
+ } catch (final InvocationTargetException e) {
+ throw new SAXException(e);
+ }
+ return newInstance;
+ }
+
+ @Override
+ public void endElement(final String uri, final String localName, final String qName) {
+ endElementInside(uri, localName, qName);
+ this.elementStack.pop();
+ }
+
+ /**
+ * Called by {@link #endElement(String, String, String)} so that we can make sure we get housekeeping done after
+ * this method has run.
+ * @param uri See {@link DefaultHandler#endElement(String, String, String)}.
+ * @param localName See {@link DefaultHandler#endElement(String, String, String)}.
+ * @param qName See {@link DefaultHandler#endElement(String, String, String)}.
+ */
+ private void endElementInside(final String uri, final String localName, final String qName) {
+ switch(localName) {
+ case "axsl-orthography-config": {
+ return;
+ }
+ case "match-rule-list": {
+ this.currentMatchRuleList = null;
+ return;
+ }
+ case "derivative-pattern-list": {
+ this.currentDerivativePatternList = null;
+ return;
+ }
+ case "derivative-pattern": {
+ final DerivativePattern pattern = new DerivativePattern(this.currentDerivativeRuleMatch,
+ this.currentDerivativeRuleReplace, this.currentDerivativeRuleList);
+ this.currentDerivativePatternList.add(pattern);
+ this.currentDerivativeRuleList = null;
+ this.currentDerivativeRuleMatch = null;
+ this.currentDerivativeRuleReplace = null;
+ return;
+ }
+ case "derivative-rule": {
+ final DerivativeRule rule = new DerivativeRule(this.currentPartOfSpeech, this.currentQualifier,
+ this.currentDerivativeTypeList);
+ this.currentDerivativeRuleList.add(rule);
+ this.currentPartOfSpeech = null;
+ this.currentQualifier = null;
+ this.currentDerivativeTypeList = null;
+ return;
+ }
+ case "derivative-type": {
+ return;
+ }
+ case "match": {
+ final String matchString = this.textAccumulator.toString();
+ StringUtils.clear(this.textAccumulator);
+ final Pattern pattern = Pattern.compile(matchString);
+ if (this.currentDerivativeRuleList != null) {
+ this.currentDerivativeRuleMatch = pattern;
+ } else {
+ this.currentMatchRuleList.add(pattern);
+ }
+ return;
+ }
+ case "replace": {
+ final String replaceString = this.textAccumulator.toString();
+ StringUtils.clear(this.textAccumulator);
+ this.currentDerivativeRuleReplace = replaceString;
+ return;
+ }
+ case "derivative-factory-list": {
+ this.currentDerivateFactoryList = null;
+ return;
+ }
+ case "derivative-factory": {
+ return;
+ }
+ case "lexer": {
+ return;
+ }
+ case "exclusion": {
+ return;
+ }
+ case "dictionary": {
...
[truncated message content] |
|
From: <vic...@us...> - 2021-11-18 13:42:30
|
Revision: 12081
http://sourceforge.net/p/foray/code/12081
Author: victormote
Date: 2021-11-18 13:42:26 +0000 (Thu, 18 Nov 2021)
Log Message:
-----------
1. Some general Lexer cleanup. 2. Allow Lexers to specify WritingSystem at configuration time.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Language4a.java
trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Script4a.java
trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java
trunk/foray/foray-common/src/test/java/org/foray/common/i18n/Orthography4aTests.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ConfigParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParserXml.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
trunk/foray/foray-orthography/src/test/resources/orthography-config.xml
Added Paths:
-----------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerIcu4jBreakIterator.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
Removed Paths:
-------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerDefault.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerLatin1.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Language4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Language4a.java 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Language4a.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -748,6 +748,9 @@
* code is not valid.
*/
public static Language4a findFromAlpha(final String languageCode) {
+ if (languageCode == null) {
+ return null;
+ }
/* Convert to lowercase, since the raw data is in that format. */
final String normalizedLanguageCode = languageCode.toLowerCase().trim();
Language4a theInstance = findFrom2Char(normalizedLanguageCode);
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Script4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Script4a.java 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Script4a.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -420,6 +420,9 @@
* @return The instance matching {@code alphaCode}, or null if the code is not registered.
*/
public static Script4a findFromAlpha(final String alphaCode) {
+ if (alphaCode == null) {
+ return null;
+ }
/* Convert to lowercase, since the raw data is in that format. */
final String normalizedScriptCode = alphaCode.toLowerCase().trim();
return Script4a.mapAlpha.get(normalizedScriptCode);
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -29,6 +29,7 @@
package org.foray.common.i18n;
import org.foray.common.primitive.ObjectUtils;
+import org.foray.common.primitive.StringUtils;
import org.axsl.common.i18n.Country;
import org.axsl.common.i18n.Language;
@@ -35,7 +36,10 @@
import org.axsl.common.i18n.Script;
import org.axsl.common.i18n.WritingSystem;
+import com.ibm.icu.util.ULocale;
+
import java.util.HashMap;
+import java.util.Locale;
import java.util.Map;
/**
@@ -73,14 +77,14 @@
new HashMap<Script4a, Map<Language4a, Map<Country4a, WritingSystem4a>>>();
static {
- USA = WritingSystem4a.find(Language4a.ENGLISH, Country4a.USA, Script4a.LATIN);
- FINLAND = WritingSystem4a.find(Language4a.FINNISH, Country4a.FINLAND, Script4a.LATIN);
- HUNGARY = WritingSystem4a.find(Language4a.HUNGARIAN, Country4a.HUNGARY, Script4a.LATIN);
- ITALY = WritingSystem4a.find(Language4a.ITALIAN, Country4a.ITALY, Script4a.LATIN);
- POLAND = WritingSystem4a.find(Language4a.POLISH, Country4a.POLAND, Script4a.LATIN);
- PORTUGAL = WritingSystem4a.find(Language4a.PORTUGUESE, Country4a.PORTUGAL, Script4a.LATIN);
- RUSSIA = WritingSystem4a.find(Language4a.RUSSIAN, Country4a.RUSSIA, Script4a.CYRILLIC);
- SPAIN = WritingSystem4a.find(Language4a.SPANISH, Country4a.SPAIN, Script4a.LATIN);
+ USA = WritingSystem4a.find(Language4a.ENGLISH, Script4a.LATIN, Country4a.USA);
+ FINLAND = WritingSystem4a.find(Language4a.FINNISH, Script4a.LATIN, Country4a.FINLAND);
+ HUNGARY = WritingSystem4a.find(Language4a.HUNGARIAN, Script4a.LATIN, Country4a.HUNGARY);
+ ITALY = WritingSystem4a.find(Language4a.ITALIAN, Script4a.LATIN, Country4a.ITALY);
+ POLAND = WritingSystem4a.find(Language4a.POLISH, Script4a.LATIN, Country4a.POLAND);
+ PORTUGAL = WritingSystem4a.find(Language4a.PORTUGUESE, Script4a.LATIN, Country4a.PORTUGAL);
+ RUSSIA = WritingSystem4a.find(Language4a.RUSSIAN, Script4a.CYRILLIC, Country4a.RUSSIA);
+ SPAIN = WritingSystem4a.find(Language4a.SPANISH, Script4a.LATIN, Country4a.SPAIN);
}
/** The language. */
@@ -120,7 +124,7 @@
* @param script The script.
* @return The orthography for the given parameters.
*/
- public static WritingSystem4a find(final Language4a language, final Country4a country, final Script4a script) {
+ public static WritingSystem4a find(final Language4a language, final Script4a script, final Country4a country) {
Script4a scriptToUse = script;
if (scriptToUse == null) {
scriptToUse = Script4a.UNDETERMINED;
@@ -159,12 +163,12 @@
* @param scriptString The script.
* @return The orthography for the given parameters.
*/
- public static WritingSystem4a find(final String languageString, final String countryString,
- final String scriptString) {
+ public static WritingSystem4a find(final String languageString, final String scriptString,
+ final String countryString) {
final Language4a language = Language4a.findFromAlpha(languageString);
+ final Script4a script = Script4a.findFromAlpha(scriptString);
final Country4a country = Country4a.findFromAlpha(countryString);
- final Script4a script = Script4a.findFromAlpha(scriptString);
- return find(language, country, script);
+ return find(language, script, country);
}
@Override
@@ -282,4 +286,24 @@
return builder.toString();
}
+ /**
+ * Returns the Java Locale that is closest to this writing system.
+ * @return The Java Locale.
+ */
+ public Locale toLocale() {
+ final String languageString = this.language == null ? StringUtils.EMPTY_STRING : this.language.getAlpha3Code();
+ final String countryString = this.country == null ? StringUtils.EMPTY_STRING : this.country.getAlpha3Code();
+ return new Locale(languageString, countryString);
+ }
+
+ /**
+ * Returns the ICU4J ULocale that is closest to this writing system.
+ * @return The ICU4J ULocale.
+ */
+ public ULocale toULocale() {
+ final String languageString = this.language == null ? StringUtils.EMPTY_STRING : this.language.getAlpha3Code();
+ final String countryString = this.country == null ? StringUtils.EMPTY_STRING : this.country.getAlpha3Code();
+ return new ULocale(languageString, countryString);
+ }
+
}
Modified: trunk/foray/foray-common/src/test/java/org/foray/common/i18n/Orthography4aTests.java
===================================================================
--- trunk/foray/foray-common/src/test/java/org/foray/common/i18n/Orthography4aTests.java 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-common/src/test/java/org/foray/common/i18n/Orthography4aTests.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -47,7 +47,7 @@
/* Find some already-registered items multiple times. Purpose here is to make sure we aren't creating
* the same item multiple times. */
WritingSystem4a pre = WritingSystem4a.USA;
- WritingSystem4a found = WritingSystem4a.find(pre.getLanguage(), pre.getCountry(), pre.getScript());
+ WritingSystem4a found = WritingSystem4a.find(pre.getLanguage(), pre.getScript(), pre.getCountry());
/* Check that both reference the same object. If not, the registration is not working correctly. */
Assert.assertTrue("Must reference the same object (USA)", pre == found);
/* The count of items in the class should not have changed from our assertion above. */
@@ -54,7 +54,7 @@
Assert.assertEquals(expectedCount, WritingSystem4a.getCount());
pre = WritingSystem4a.FINLAND;
- found = WritingSystem4a.find(pre.getLanguage(), pre.getCountry(), pre.getScript());
+ found = WritingSystem4a.find(pre.getLanguage(), pre.getScript(), pre.getCountry());
Assert.assertTrue("Must reference the same object (FINLAND)", pre == found);
Assert.assertEquals(expectedCount, WritingSystem4a.getCount());
}
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/FoObj.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -3925,9 +3925,9 @@
*/
public WritingSystem getWritingSystem() {
final Language4a language = this.traitLanguage(null);
+ final Script4a script = this.traitScript(null);
final Country4a country = this.traitCountry(null);
- final Script4a script = this.traitScript(null);
- return WritingSystem4a.find(language, country, script);
+ return WritingSystem4a.find(language, script, country);
}
public Orthography getOrthography() {
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/AbstractCharacterSequence.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -218,7 +218,7 @@
public WritingSystem inlineWritingSystem() {
final Language4a language = inlineLanguage();
final Country4a country = inlineCountry();
- return WritingSystem4a.find(language, country, null);
+ return WritingSystem4a.find(language, null, country);
}
@Override
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/Character.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -283,9 +283,9 @@
@Override
public WritingSystem4a inlineWritingSystem() {
final Language4a language = inlineLanguage();
+ final Script4a script = inlineScript();
final Country4a country = inlineCountry();
- final Script4a script = inlineScript();
- return WritingSystem4a.find(language, country, script);
+ return WritingSystem4a.find(language, script, country);
}
@Override
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2021-11-18 13:42:26 UTC (rev 12081)
@@ -196,11 +196,12 @@
<dictionary reference="dictionary-eng-moby"/>
<hyphenation-patterns reference="hyph-patterns-eng"/>
<derivative-factories reference="eng-999-derivatives"/>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
- <orthography language-iso-3char="eng" country-iso-3char="USA" script-iso-4char="Latn"/>
- <orthography language-iso-3char="eng" country-iso-3char="USA" script-iso-4char="Zyyy"/>
- <orthography language-iso-3char="eng" country-iso-3char="999" script-iso-4char="Latn"/>
- <orthography language-iso-3char="eng" country-iso-3char="999" script-iso-4char="Zyyy"/>
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
+ <orthography language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
+ <orthography language-iso-3char="eng" script-iso-4char="Zyyy" country-iso-3char="USA"/>
+ <orthography language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="999"/>
+ <orthography language-iso-3char="eng" script-iso-4char="Zyyy" country-iso-3char="999"/>
</configuration>
</axsl-orthography-config>
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -28,6 +28,7 @@
package org.foray.orthography;
+import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.primitive.CharacterUtils;
import org.foray.common.primitive.StringUtils;
@@ -90,6 +91,25 @@
}
+ /** The writing system used by this lexer. */
+ private WritingSystem4a writingSystem;
+
+ /**
+ * Constructor.
+ * @param writingSystem The writing system used by this Lexer. This can be null.
+ */
+ public Lexer4a(final WritingSystem4a writingSystem) {
+ this.writingSystem = writingSystem;
+ }
+
+ /**
+ * Returns the writing system used by this lexer.
+ * @return The writing system.
+ */
+ public WritingSystem4a getWritingSystme() {
+ return this.writingSystem;
+ }
+
@Override
public List<CharSequence> tokenize(final CharSequence sequence) {
if (sequence == null
Deleted: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerDefault.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerDefault.java 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerDefault.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -1,37 +0,0 @@
-/*
- * Copyright 2019 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-package org.foray.orthography;
-
-/**
- * A default Lexer implementation that uses only the Unicode character information to determine whether a given
- * character in a sequence is part of a word or not.
- */
-public class LexerDefault extends LexerJavaBreakIterator {
-
-}
Added: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerIcu4jBreakIterator.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerIcu4jBreakIterator.java (rev 0)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerIcu4jBreakIterator.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2021 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography;
+
+import org.foray.common.i18n.WritingSystem4a;
+import org.foray.common.sequence.IntArrayBuilder;
+
+import org.axsl.common.sequence.IntSequence;
+import org.axsl.orthography.optional.Lexer;
+
+import com.ibm.icu.text.BreakIterator;
+
+
+/**
+ * {@link Lexer} implementation that uses the ICU4J {@link BreakIterator}.
+ * This class is NOT thread-safe.
+ */
+public class LexerIcu4jBreakIterator extends Lexer4a {
+
+ /** The break iterator used by this instance. BreakIterators are reusable, not thread-safe, and expensive to
+ * create. **/
+ private BreakIterator wordIterator;
+
+ /**
+ * Constructor.
+ * @param writingSystem The writing system used by this Lexer. This can be null.
+ */
+ public LexerIcu4jBreakIterator(final WritingSystem4a writingSystem) {
+ super(writingSystem);
+ final WritingSystem4a baseWritingSystem = writingSystem == null ? WritingSystem4a.USA : writingSystem;
+ this.wordIterator = BreakIterator.getWordInstance(baseWritingSystem.toULocale());
+ }
+
+ @Override
+ protected IntSequence findRawBreaks(final CharSequence sequence) {
+ final IntArrayBuilder result = new IntArrayBuilder(sequence.length());
+ this.wordIterator.setText(sequence.toString());
+ int boundary = this.wordIterator.first();
+
+ while (boundary != BreakIterator.DONE) {
+ result.append(boundary);
+ boundary = this.wordIterator.next();
+ }
+ return result;
+ }
+
+}
Property changes on: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerIcu4jBreakIterator.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -28,6 +28,7 @@
package org.foray.orthography;
+import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.sequence.IntArrayBuilder;
import org.axsl.common.sequence.IntSequence;
@@ -34,7 +35,6 @@
import org.axsl.orthography.optional.Lexer;
import java.text.BreakIterator;
-import java.util.Locale;
/**
* {@link Lexer} implementation that uses the Java {@link BreakIterator}.
@@ -42,18 +42,29 @@
*/
public class LexerJavaBreakIterator extends Lexer4a {
+ /** The break iterator used by this instance. BreakIterators are reusable, not thread-safe, and expensive to
+ * create. **/
+ private BreakIterator wordIterator;
+
+ /**
+ * Constructor.
+ * @param writingSystem The writing system used by this Lexer. This can be null.
+ */
+ public LexerJavaBreakIterator(final WritingSystem4a writingSystem) {
+ super(writingSystem);
+ final WritingSystem4a baseWritingSystem = writingSystem == null ? WritingSystem4a.USA : writingSystem;
+ this.wordIterator = BreakIterator.getWordInstance(baseWritingSystem.toLocale());
+ }
+
@Override
protected IntSequence findRawBreaks(final CharSequence sequence) {
final IntArrayBuilder result = new IntArrayBuilder(sequence.length());
- /* TODO: BreakIterators are reusable, not thread-safe, and expensive to create. Reuse this after figuring out
- * how instances of this class are used/reused. */
- final BreakIterator wordIterator = BreakIterator.getWordInstance(Locale.US);
- wordIterator.setText(sequence.toString());
- int boundary = wordIterator.first();
+ this.wordIterator.setText(sequence.toString());
+ int boundary = this.wordIterator.first();
while (boundary != BreakIterator.DONE) {
result.append(boundary);
- boundary = wordIterator.next();
+ boundary = this.wordIterator.next();
}
return result;
}
Deleted: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerLatin1.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerLatin1.java 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerLatin1.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -1,36 +0,0 @@
-/*
- * Copyright 2019 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-package org.foray.orthography;
-
-/**
- * A {@link Lexer4a} implementation that is intended for general use for Latin writing systems.
- */
-public class LexerLatin1 extends LexerJavaBreakIterator {
-
-}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ConfigParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ConfigParser.java 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/ConfigParser.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -286,7 +286,7 @@
}
case "derivative-factory": {
final String factoryClassName = attributes.getValue("class");
- final WordWrapperFactory<?> factory = instantiate(factoryClassName, WordWrapperFactory.class);
+ final WordWrapperFactory<?> factory = instantiate(factoryClassName, WordWrapperFactory.class, null, null);
if (factory == null) {
return;
}
@@ -295,7 +295,14 @@
}
case "lexer": {
final String className = attributes.getValue("class");
- final Lexer lexer = instantiate(className, Lexer.class);
+ final String languageString = attributes.getValue("language-iso-3char");
+ final String scriptString = attributes.getValue("script-iso-4char");
+ final String countryString = attributes.getValue("country-iso-3char");
+ final WritingSystem4a writingSystem = WritingSystem4a.find(languageString, scriptString, countryString);
+
+ final Class<?>[] parameterTypes = {WritingSystem4a.class};
+ final Object[] parameters = {writingSystem};
+ final Lexer lexer = instantiate(className, Lexer.class, parameterTypes, parameters);
if (lexer == null) {
return;
}
@@ -483,22 +490,22 @@
this.logger.error("Unable to find language for: {}", languageString);
this.logger.error(getContextMessage());
}
+ final Script4a script = Script4a.findFromAlpha(scriptString);
+ if (script == null) {
+ this.logger.error("Unable to find script for: {}", scriptString);
+ this.logger.error(getContextMessage());
+ }
final Country4a country = Country4a.findFrom3Char(countryString);
if (country == null) {
this.logger.error("Unable to find country for: {}", countryString);
this.logger.error(getContextMessage());
}
- final Script4a script = Script4a.findFromAlpha(scriptString);
- if (script == null) {
- this.logger.error("Unable to find script for: {}", scriptString);
+ final WritingSystem4a writingSystem = WritingSystem4a.find(language, script, country);
+ if (writingSystem == null) {
+ this.logger.error("Unable to find script for: {}_{}_{}", languageString, scriptString, countryString);
this.logger.error(getContextMessage());
}
- final WritingSystem4a orthography = WritingSystem4a.find(language, country, script);
- if (orthography == null) {
- this.logger.error("Unable to find script for: {}_{}_{}", languageString, countryString, scriptString);
- this.logger.error(getContextMessage());
- }
- this.hyphenationServer.registerOrthography(orthography, this.currentOrthographyConfig);
+ this.hyphenationServer.registerOrthography(writingSystem, this.currentOrthographyConfig);
}
/**
@@ -506,10 +513,13 @@
* @param className The name of the class that should be instantiated.
* @param expectedType The expected superclass for {@code className}.
* @param <T> The type of the superclass object that is being instantiated.
+ * @param parameterTypes The array of parameter types.
+ * @param parameters The array of parameters.
* @return The new instance of {@code className}, or null if it could not be created.
* @throws SAXException Wraps a number of exceptions that can be thrown during instantiation by reflection.
*/
- private <T extends Object> T instantiate(final String className, final Class<T> expectedType) throws SAXException {
+ private <T extends Object> T instantiate(final String className, final Class<T> expectedType,
+ final Class<?>[] parameterTypes, final Object[] parameters) throws SAXException {
Class<?> theClass = null;
try {
theClass = Class.forName(className);
@@ -526,7 +536,7 @@
/* For now, use only the no-args constructor. */
Constructor<T> constructor = null;
try {
- constructor = factoryClass.getConstructor();
+ constructor = factoryClass.getConstructor(parameterTypes);
} catch (final SecurityException e) {
throw new SAXException(e);
} catch (final NoSuchMethodException e) {
@@ -535,7 +545,7 @@
T newInstance = null;
try {
- newInstance = constructor.newInstance();
+ newInstance = constructor.newInstance(parameters);
} catch (final IllegalArgumentException e) {
throw new SAXException(e);
} catch (final InstantiationException e) {
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParserXml.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParserXml.java 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParserXml.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -369,7 +369,7 @@
final String language = attributes.getValue(StringUtils.EMPTY_STRING, "language");
final String country = attributes.getValue(StringUtils.EMPTY_STRING, "country");
final String script = attributes.getValue(StringUtils.EMPTY_STRING, "script");
- this.currentDictionary.orthography = WritingSystem4a.find(language, country, script);
+ this.currentDictionary.orthography = WritingSystem4a.find(language, script, country);
logger.info("Begin dictionary word list parsing: " + this.currentDictionary.orthography.toString());
final String soft = attributes.getValue(StringUtils.EMPTY_STRING, "soft-hyphen-char");
if (soft.length() != 1) {
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -360,7 +360,7 @@
* the text content. */
final Script4a script = Script4a.LATIN;
- element.writingSystem = WritingSystem4a.find(language, country, script);
+ element.writingSystem = WritingSystem4a.find(language, script, country);
if (element.writingSystem == null) {
final String message = String.format(
"Orthography not found. Language: %1$s, Country: %2$s, Script: %3$s ", languageString,
Copied: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java (from rev 12080, trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java)
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java (rev 0)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -0,0 +1,258 @@
+/*
+ * Copyright 2019 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography;
+
+import org.foray.common.i18n.WritingSystem4a;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.List;
+
+/**
+ * Tests of {@link LexerLatin1}.
+ */
+public class LexerEnglishTests {
+
+ /** The object under test. */
+ private LexerJavaBreakIterator out;
+ /* TODO: This is temporarily referencing the wrong type. */
+
+ /**
+ * Setup the test.
+ */
+ @Before
+ public void setupTest() {
+ this.out = new LexerJavaBreakIterator(WritingSystem4a.USA);
+ }
+
+ /**
+ * A simple test of {@link LexerLatin1#breakIntoWords(CharSequence)}.
+ */
+ @Test
+ public void testBreakSimple() {
+ final String testString = "Beware the ides of March.";
+ final List<CharSequence> actual = this.out.tokenize(testString);
+ Assert.assertEquals(10, actual.size());
+ Assert.assertEquals("Beware", actual.get(0));
+ Assert.assertEquals(" ", actual.get(1));
+ Assert.assertEquals("the", actual.get(2));
+ Assert.assertEquals(" ", actual.get(3));
+ Assert.assertEquals("ides", actual.get(4));
+ Assert.assertEquals(" ", actual.get(5));
+ Assert.assertEquals("of", actual.get(6));
+ Assert.assertEquals(" ", actual.get(7));
+ Assert.assertEquals("March", actual.get(8));
+ Assert.assertEquals(".", actual.get(9));
+ }
+
+ /**
+ * A more complicated test of {@link LexerLatin1#breakIntoWords(CharSequence)}.
+ */
+ @Test
+ public void testMedium() {
+ final String testString = "39. It was the best of times. It was the worst of times. <----";
+ final List<CharSequence> actual = this.out.tokenize(testString);
+ Assert.assertEquals(27, actual.size());
+ Assert.assertEquals("39", actual.get(0));
+ Assert.assertEquals(". ", actual.get(1));
+ Assert.assertEquals("It", actual.get(2));
+ Assert.assertEquals(" ", actual.get(3));
+ Assert.assertEquals("was", actual.get(4));
+ Assert.assertEquals(" ", actual.get(5));
+ Assert.assertEquals("the", actual.get(6));
+ Assert.assertEquals(" ", actual.get(7));
+ Assert.assertEquals("best", actual.get(8));
+ Assert.assertEquals(" ", actual.get(9));
+ Assert.assertEquals("of", actual.get(10));
+ Assert.assertEquals(" ", actual.get(11));
+ Assert.assertEquals("times", actual.get(12));
+ Assert.assertEquals(". ", actual.get(13));
+ Assert.assertEquals("It", actual.get(14));
+ Assert.assertEquals(" ", actual.get(15));
+ Assert.assertEquals("was", actual.get(16));
+ Assert.assertEquals(" ", actual.get(17));
+ Assert.assertEquals("the", actual.get(18));
+ Assert.assertEquals(" ", actual.get(19));
+ Assert.assertEquals("worst", actual.get(20));
+ Assert.assertEquals(" ", actual.get(21));
+ Assert.assertEquals("of", actual.get(22));
+ Assert.assertEquals(" ", actual.get(23));
+ Assert.assertEquals("times", actual.get(24));
+ Assert.assertEquals(". ", actual.get(25));
+ Assert.assertEquals("<----", actual.get(26));
+ }
+
+ /**
+ * Test of {@link LexerLatin1#breakIntoWords(CharSequence)} with a compound word.
+ */
+ @Test
+ public void testWithCompoundWord() {
+ /* Spoken by Juliet, Romeo & Juliet, Act 3 Scene 2. */
+ final String testString = "Gallop apace, you fiery-footed steeds,";
+ final List<CharSequence> actual = this.out.tokenize(testString);
+
+ /* Compound word "fiery-footed" treated as one word. */
+ Assert.assertEquals(10, actual.size());
+ Assert.assertEquals("Gallop", actual.get(0));
+ Assert.assertEquals(" ", actual.get(1));
+ Assert.assertEquals("apace", actual.get(2));
+ Assert.assertEquals(", ", actual.get(3));
+ Assert.assertEquals("you", actual.get(4));
+ Assert.assertEquals(" ", actual.get(5));
+ Assert.assertEquals("fiery-footed", actual.get(6));
+ Assert.assertEquals(" ", actual.get(7));
+ Assert.assertEquals("steeds", actual.get(8));
+ Assert.assertEquals(",", actual.get(9));
+ }
+
+ /**
+ * Test of {@link LexerLatin1#breakIntoWords(CharSequence)} with a mid-word contraction.
+ */
+ @Test
+ public void testWithMidWordContractionApostrophe() {
+ /* Spoken by Hamlet, Hamlet, Act 2, Scene 2. */
+ final String testString = "The play's the thing";
+ final List<CharSequence> actual = this.out.tokenize(testString);
+ Assert.assertEquals(7, actual.size());
+ Assert.assertEquals("The", actual.get(0));
+ Assert.assertEquals(" ", actual.get(1));
+ Assert.assertEquals("play's", actual.get(2));
+ Assert.assertEquals(" ", actual.get(3));
+ Assert.assertEquals("the", actual.get(4));
+ Assert.assertEquals(" ", actual.get(5));
+ Assert.assertEquals("thing", actual.get(6));
+ }
+
+ /**
+ * Test of {@link LexerLatin1#breakIntoWords(CharSequence)} with a bunch of symbols that are not really words.
+ * However, because each one is between hard word breaks, should be tokenized as words.
+ */
+ @Test
+ public void testWithSymbolsAsWords() {
+ final String testString = "! @ # $ %";
+ final List<CharSequence> actual = this.out.tokenize(testString);
+ Assert.assertEquals(9, actual.size());
+ Assert.assertEquals("!", actual.get(0));
+ Assert.assertEquals(" ", actual.get(1));
+ Assert.assertEquals("@", actual.get(2));
+ Assert.assertEquals(" ", actual.get(3));
+ Assert.assertEquals("#", actual.get(4));
+ Assert.assertEquals(" ", actual.get(5));
+ Assert.assertEquals("$", actual.get(6));
+ Assert.assertEquals(" ", actual.get(7));
+ Assert.assertEquals("%", actual.get(8));
+ }
+
+ /**
+ * Test of {@link LexerLatin1#breakIntoWords(CharSequence)} with attached leading and trailing punctuation.
+ * That punctuation should be included in the interword text unless it is detached, in which case it should be
+ * treated as a word.
+ */
+ @Test
+ public void testWithAttachedPunctuation() {
+ final String testString = "Parentheses (as I stated earlier) are a matching pair of ( and ) characters.";
+ final List<CharSequence> actual = this.out.tokenize(testString);
+ Assert.assertEquals(28, actual.size());
+ Assert.assertEquals("Parentheses", actual.get(0));
+ Assert.assertEquals(" (", actual.get(1));
+ Assert.assertEquals("as", actual.get(2));
+ Assert.assertEquals(" ", actual.get(3));
+ Assert.assertEquals("I", actual.get(4));
+ Assert.assertEquals(" ", actual.get(5));
+ Assert.assertEquals("stated", actual.get(6));
+ Assert.assertEquals(" ", actual.get(7));
+ Assert.assertEquals("earlier", actual.get(8));
+ Assert.assertEquals(") ", actual.get(9));
+ Assert.assertEquals("are", actual.get(10));
+ Assert.assertEquals(" ", actual.get(11));
+ Assert.assertEquals("a", actual.get(12));
+ Assert.assertEquals(" ", actual.get(13));
+ Assert.assertEquals("matching", actual.get(14));
+ Assert.assertEquals(" ", actual.get(15));
+ Assert.assertEquals("pair", actual.get(16));
+ Assert.assertEquals(" ", actual.get(17));
+ Assert.assertEquals("of", actual.get(18));
+ Assert.assertEquals(" ", actual.get(19));
+ Assert.assertEquals("(", actual.get(20));
+ Assert.assertEquals(" ", actual.get(21));
+ Assert.assertEquals("and", actual.get(22));
+ Assert.assertEquals(" ", actual.get(23));
+ Assert.assertEquals(")", actual.get(24));
+ Assert.assertEquals(" ", actual.get(25));
+ Assert.assertEquals("characters", actual.get(26));
+ Assert.assertEquals(".", actual.get(27));
+ }
+
+ /**
+ * Test of example used in the Unicode Text Segmentation annex.
+ * @see <a href="https://www.unicode.org/reports/tr29/#Word_Boundaries">Unicode(r) Standard Annex #29, Unicode Text
+ * Segmentation</a>
+ */
+ @Test
+ public void testUnicodeWordBoundariesExample() {
+ final String testString = "The quick (“brown”) fox can’t jump 32.3 feet, right?";
+ final List<CharSequence> actual = this.out.tokenize(testString);
+ Assert.assertEquals(18, actual.size());
+ Assert.assertEquals("The", actual.get(0));
+ Assert.assertEquals(" ", actual.get(1));
+ Assert.assertEquals("quick", actual.get(2));
+ Assert.assertEquals(" (“", actual.get(3));
+ Assert.assertEquals("brown", actual.get(4));
+ Assert.assertEquals("”) ", actual.get(5));
+ Assert.assertEquals("fox", actual.get(6));
+ Assert.assertEquals(" ", actual.get(7));
+ Assert.assertEquals("can’t", actual.get(8));
+ Assert.assertEquals(" ", actual.get(9));
+ Assert.assertEquals("jump", actual.get(10));
+ Assert.assertEquals(" ", actual.get(11));
+ Assert.assertEquals("32.3", actual.get(12));
+ Assert.assertEquals(" ", actual.get(13));
+ Assert.assertEquals("feet", actual.get(14));
+ Assert.assertEquals(", ", actual.get(15));
+ Assert.assertEquals("right", actual.get(16));
+ Assert.assertEquals("?", actual.get(17));
+ }
+
+ /**
+ * Test.
+ */
+ @Test
+ public void testCorner001() {
+ final String testString = "Appendix D.4)";
+ final List<CharSequence> actual = this.out.tokenize(testString);
+ Assert.assertEquals(4, actual.size());
+ Assert.assertEquals("Appendix", actual.get(0));
+ Assert.assertEquals(" ", actual.get(1));
+ Assert.assertEquals("D.4", actual.get(2));
+ Assert.assertEquals(")", actual.get(3));
+ }
+
+}
Deleted: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java 2021-11-18 13:42:26 UTC (rev 12081)
@@ -1,256 +0,0 @@
-/*
- * Copyright 2019 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-package org.foray.orthography;
-
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-import java.util.List;
-
-/**
- * Tests of {@link LexerLatin1}.
- */
-public class LexerLatin1Tests {
-
- /** The object under test. */
- private LexerJavaBreakIterator out;
- /* TODO: This is temporarily referencing the wrong type. */
-
- /**
- * Setup the test.
- */
- @Before
- public void setupTest() {
- this.out = new LexerJavaBreakIterator();
- }
-
- /**
- * A simple test of {@link LexerLatin1#breakIntoWords(CharSequence)}.
- */
- @Test
- public void testBreakSimple() {
- final String testString = "Beware the ides of March.";
- final List<CharSequence> actual = this.out.tokenize(testString);
- Assert.assertEquals(10, actual.size());
- Assert.assertEquals("Beware", actual.get(0));
- Assert.assertEquals(" ", actual.get(1));
- Assert.assertEquals("the", actual.get(2));
- Assert.assertEquals(" ", actual.get(3));
- Assert.assertEquals("ides", actual.get(4));
- Assert.assertEquals(" ", actual.get(5));
- Assert.assertEquals("of", actual.get(6));
- Assert.assertEquals(" ", actual.get(7));
- Assert.assertEquals("March", actual.get(8));
- Assert.assertEquals(".", actual.get(9));
- }
-
- /**
- * A more complicated test of {@link LexerLatin1#breakIntoWords(CharSequence)}.
- */
- @Test
- public void testMedium() {
- final String testString = "39. It was the best of times. It was the worst of times. <----";
- final List<CharSequence> actual = this.out.tokenize(testString);
- Assert.assertEquals(27, actual.size());
- Assert.assertEquals("39", actual.get(0));
- Assert.assertEquals(". ", actual.get(1));
- Assert.assertEquals("It", actual.get(2));
- Assert.assertEquals(" ", actual.get(3));
- Assert.assertEquals("was", actual.get(4));
- Assert.assertEquals(" ", actual.get(5));
- Assert.assertEquals("the", actual.get(6));
- Assert.assertEquals(" ", actual.get(7));
- Assert.assertEquals("best", actual.get(8));
- Assert.assertEquals(" ", actual.get(9));
- Assert.assertEquals("of", actual.get(10));
- Assert.assertEquals(" ", actual.get(11));
- Assert.assertEquals("times", actual.get(12));
- Assert.assertEquals(". ", actual.get(13));
- Assert.assertEquals("It", actual.get(14));
- Assert.assertEquals(" ", actual.get(15));
- Assert.assertEquals("was", actual.get(16));
- Assert.assertEquals(" ", actual.get(17));
- Assert.assertEquals("the", actual.get(18));
- Assert.assertEquals(" ", actual.get(19));
- Assert.assertEquals("worst", actual.get(20));
- Assert.assertEquals(" ", actual.get(21));
- Assert.assertEquals("of", actual.get(22));
- Assert.assertEquals(" ", actual.get(23));
- Assert.assertEquals("times", actual.get(24));
- Assert.assertEquals(". ", actual.get(25));
- Assert.assertEquals("<----", actual.get(26));
- }
-
- /**
- * Test of {@link LexerLatin1#breakIntoWords(CharSequence)} with a compound word.
- */
- @Test
- public void testWithCompoundWord() {
- /* Spoken by Juliet, Romeo & Juliet, Act 3 Scene 2. */
- final String testString = "Gallop apace, you fiery-footed steeds,";
- final List<CharSequence> actual = this.out.tokenize(testString);
-
- /* Compound word "fiery-footed" treated as one word. */
- Assert.assertEquals(10, actual.size());
- Assert.assertEquals("Gallop", actual.get(0));
- Assert.assertEquals(" ", actual.get(1));
- Assert.assertEquals("apace", actual.get(2));
- Assert.assertEquals(", ", actual.get(3));
- Assert.assertEquals("you", actual.get(4));
- Assert.assertEquals(" ", actual.get(5));
- Assert.assertEquals("fiery-footed", actual.get(6));
- Assert.assertEquals(" ", actual.get(7));
- Assert.assertEquals("steeds", actual.get(8));
- Assert.assertEquals(",", actual.get(9));
- }
-
- /**
- * Test of {@link LexerLatin1#breakIntoWords(CharSequence)} with a mid-word contraction.
- */
- @Test
- public void testWithMidWordContractionApostrophe() {
- /* Spoken by Hamlet, Hamlet, Act 2, Scene 2. */
- final String testString = "The play's the thing";
- final List<CharSequence> actual = this.out.tokenize(testString);
- Assert.assertEquals(7, actual.size());
- Assert.assertEquals("The", actual.get(0));
- Assert.assertEquals(" ", actual.get(1));
- Assert.assertEquals("play's", actual.get(2));
- Assert.assertEquals(" ", actual.get(3));
- Assert.assertEquals("the", actual.get(4));
- Assert.assertEquals(" ", actual.get(5));
- Assert.assertEquals("thing", actual.get(6));
- }
-
- /**
- * Test of {@link LexerLatin1#breakIntoWords(CharSequence)} with a bunch of symbols that are not really words.
- * However, because each one is between hard word breaks, should be tokenized as words.
- */
- @Test
- public void testWithSymbolsAsWords() {
- final String testString = "! @ # $ %";
- final List<CharSequence> actual = this.out.tokenize(testString);
- Assert.assertEquals(9, actual.size());
- Assert.assertEquals("!", actual.get(0));
- Assert.assertEquals(" ", actual.get(1));
- Assert.assertEquals("@", actual.get(2));
- Assert.assertEquals(" ", actual.get(3));
- Assert.assertEquals("#", actual.get(4));
- Assert.assertEquals(" ", actual.get(5));
- Assert.assertEquals("$", actual.get(6));
- Assert.assertEquals(" ", actual.get(7));
- Assert.assertEquals("%", actual.get(8));
- }
-
- /**
- * Test of {@link LexerLatin1#breakIntoWords(CharSequence)} with attached leading and trailing punctuation.
- * That punctuation should be included in the interword text unless it is detached, in which case it should be
- * treated as a word.
- */
- @Test
- public void testWithAttachedPunctuation() {
- final String testString = "Parentheses (as I stated earlier) are a matching pair of ( and ) characters.";
- final List<CharSequence> actual = this.out.tokenize(testString);
- Assert.assertEquals(28, actual.size());
- Assert.assertEquals("Parentheses", actual.get(0));
- Assert.assertEquals(" (", actual.get(1));
- Assert.assertEquals("as", actual.get(2));
- Assert.assertEquals(" ", actual.get(3));
- Assert.assertEquals("I", actual.get(4));
- Assert.assertEquals(" ", actual.get(5));
- Assert.assertEquals("stated", actual.get(6));
- Assert.assertEquals(" ", actual.get(7));
- Assert.assertEquals("earlier", actual.get(8));
- Assert.assertEquals(") ", actual.get(9));
- Assert.assertEquals("are", actual.get(10));
- Assert.assertEquals(" ", actual.get(11));
- Assert.assertEquals("a", actual.get(12));
- Assert.assertEquals(" ", actual.get(13));
- Assert.assertEquals("matching", actual.get(14));
- Assert.assertEquals(" ", actual.get(15));
- Assert.assertEquals("pair", actual.get(16));
- Assert.assertEquals(" ", actual.get(17));
- Assert.assertEquals("of", actual.get(18));
- Assert.assertEquals(" ", actual.get(19));
- Assert.assertEquals("(", actual.get(20));
- Assert.assertEquals(" ", actual.get(21));
- Assert.assertEquals("and", actual.get(22));
- Assert.assertEquals(" ", actual.get(23));
- Assert.assertEquals(")", actual.get(24));
- Assert.assertEquals(" ", actual.get(25));
- Assert.assertEquals("characters", actual.get(26));
- Assert.assertEquals(".", actual.get(27));
- }
-
- /**
- * Test of example used in the Unicode Text Segmentation annex.
- * @see <a href="https://www.unicode.org/reports/tr29/#Word_Boundaries">Unicode(r) Standard Annex #29, Unicode Text
- * Segmentation</a>
- */
- @Test
- public void testUnicodeWordBoundariesExample() {
- final String testString = "The quick (“brown”) fox can’t jump 32.3 feet, right?";
- final List<CharSequence> actual = this.out.tokenize(testString);
- Assert.assertEquals(18, actual.size());
- Assert.assertEquals("The", actual.get(0));
- Assert.assertEquals(" ", actual.get(1));
- Assert.assertEquals("quick", actual.get(2));
- Assert.assertEquals(" (“", actual.get(3));
- Assert.assertEquals("brown", actual.get(4));
- Assert.assertEquals("”) ", actual.get(5));
- Assert.assertEquals("fox", actual.get(6));
- Assert.assertEquals(" ", actual.get(7));
- Assert.assertEquals("can’t", actual.get(8));
- Assert.assertEquals(" ", actual.get(9));
- Assert.assertEquals("jump", actual.get(10));
- Assert.assertEquals(" ", actual.get(11));
- Assert.assertEquals("32.3", actual.get(12));
- Assert.assertEquals(" ", actual.get(13));
- Assert.assertEquals("feet", actual.get(14));
- Assert.assertEquals(", ", actual.get(15));
- Assert.assertEquals("right", actual.get(16));
- Assert.assertEquals("?", actual.get(17));
- }
-
- /**
- * Test.
- */
- @Test
- public void testCorner001() {
- final String testString = "Appendix D.4)";
- final List<CharSequence> actual = this.out.tokenize(testString);
- Assert.assertEquals(4, actual.size());
- Assert.assertEquals("Appendix", actual.get(0));
- Assert.assertEquals(" ", actual.get(1));
- Assert.assertEquals("D.4", actual.get(2));
- Assert.assertEquals(")", actual.get(3));
- }
-
-}
Modified: trunk/foray/foray-orthography/src/test/resources/orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/test/resources/orthography-config.xml 2021-11-17 23:03:23 UTC (rev 12080)
+++ trunk/foray/foray-orthography/src/test/resources/orthography-config.xml 2021-11-18 13:42:26 UTC (rev 12081)
@@ -129,51 +129,52 @@
<dictionary reference="dictionary-eng-moby"/>
<hyphenation-patterns reference="hyph-patterns-eng"/>
<derivative-factories reference="derivatives-eng"/>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
- <orthography language-iso-3char="eng" country-iso-3char="USA" script-iso-4char="Latn"/>
- <orthography language-iso-3char="eng" country-iso-3char="USA" script-iso-4char="Zyyy"/>
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
+ <orthography language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
+ <orthography language-iso-3char="eng" script-iso-4char="Zyyy" country-iso-3char="USA"/>
</configuration>
<configuration>
<hyphenation-patterns reference="hyph-patterns-fin"/>
- <lexer class="org.foray.orthography.LexerDefault"/>
- <orthography language-iso-3char="fin" country-iso-3char="FIN" script-iso-4char="Latn"/>
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
+ <orthography language-iso-3char="fin" script-iso-4char="Latn" country-iso-3char="FIN"/>
</configuration>
<configuration>
<hyphenation-patterns reference="hyph-patterns-hun"/>
- <lexer class="org.foray.orthography.LexerDefault"/>
- <orthography language-iso-3char="hun" country-iso-3char="HUN" script-iso-4char="Latn"/>
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
+ <orthography language-iso-3char="hun" script-iso-4char="Latn" country-iso-3char="HUN"/>
</configuration>
<configuration>
<hyphenation-patterns reference="hyph-patterns-ita"/>
- <lexer class="org.foray.orthography.LexerLatin1"/>
- <orthography language-iso-3char="ita" country-iso-3char="ITA" script-iso-4char="Latn"/>
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
+ <orthography language-iso-3char="ita" script-iso-4char="Latn" country-iso-3char="ITA"/>
</configuration>
<configuration>
<hyphenation-patterns reference="hyph-patterns-pol"/>
- <lexer class="org.foray.orthography.LexerDefault"/>
- <orthography language-iso-3char="pol" country-iso-3char="POL" script-iso-4char="Latn"/>
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
+ <orthography language-iso-3char="pol" script-iso-4char="Latn" country-iso-3char="POL"/>
</configuration>
<configuration>
<hyphenation-patterns reference="hyph-patterns-por"/>
- <lexer class="org.foray.orthography.LexerDefault"/>
- <orthography language-iso-3char="por" country-iso-3char="PRT" script-iso-4char="Latn"/>
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
+ <orthography language-iso-3char="por" script-iso-4char="Latn" country-iso-3char="PRT"/>
</configuration>
<configuration>
<hyphenation-patterns reference="hyph-patterns-rus"/>
- <lexer class="org.foray.orthography.LexerDefault"/>
- <orthography language-iso-3char="rus" country-iso-3char="RUS" script-iso-4char="Cyrl"/>
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
+ <orthography language-iso-3char="rus" script-iso-4char="Cyrl" country-iso-3char="RUS"/>
</configuration>
<configuration>
<hyphenation-patterns reference="hyph-patterns-spa"/>
- <lexer class="org.foray.orthography.LexerDefault"/>
- <orthography language-iso-3char="spa" country-iso-3char="ESP" script-iso-4char="Latn"/>
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
+ <orthography language-iso-3char="spa" script-iso-4char="Latn" country-iso-3char="ESP"/>
</configuration>
</axsl-orthography-config>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-17 23:03:25
|
Revision: 12080
http://sourceforge.net/p/foray/code/12080
Author: victormote
Date: 2021-11-17 23:03:23 +0000 (Wed, 17 Nov 2021)
Log Message:
-----------
Improvements to tokenization.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java 2021-11-17 22:20:00 UTC (rev 12079)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java 2021-11-17 23:03:23 UTC (rev 12080)
@@ -76,6 +76,7 @@
private static final String POSSIBLE_INTRAWORD_PUNCTUATION = new String(new char[] {
Basic_Latin_Block.APOSTROPHE,
General_Punctuation_Block.RIGHT_SINGLE_QUOTATION_MARK,
+ Basic_Latin_Block.FULL_STOP, //English example: "Section 8.16"
});
/** The punctuation characters which, when they immediately follow a word, can be separated from that word during
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-17 22:20:00 UTC (rev 12079)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-17 23:03:23 UTC (rev 12080)
@@ -158,12 +158,14 @@
if (currentBreakType == CharType.ATTACHED_TRAILING_OR_INTRAWORD_PUNCTUATION) {
final CharType previousBreakType = breakTypes[breakIndex - 1];
final CharType nextBreakType = breakTypes[breakIndex + 1];
- if (previousBreakType == CharType.WORD_CHAR
- && nextBreakType == CharType.WORD_CHAR) {
- /* This also is part of the word. */
- breakTypes[breakIndex] = CharType.WORD_CHAR;
- } else {
- breakTypes[breakIndex] = CharType.ATTACHED_TRAILING_PUNCTUATION;
+ if (previousBreakType == CharType.WORD_CHAR) {
+ if (nextBreakType == CharType.WORD_CHAR
+ || nextBreakType == CharType.ATTACHED_TRAILING_PUNCTUATION) {
+ /* This also is part of the word. */
+ breakTypes[breakIndex] = CharType.WORD_CHAR;
+ } else {
+ breakTypes[breakIndex] = CharType.ATTACHED_TRAILING_PUNCTUATION;
+ }
}
}
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java 2021-11-17 22:20:00 UTC (rev 12079)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java 2021-11-17 23:03:23 UTC (rev 12080)
@@ -239,4 +239,18 @@
Assert.assertEquals("?", actual.get(17));
}
+ /**
+ * Test.
+ */
+ @Test
+ public void testCorner001() {
+ final String testString = "Appendix D.4)";
+ final List<CharSequence> actual = this.out.tokenize(testString);
+ Assert.assertEquals(4, actual.size());
+ Assert.assertEquals("Appendix", actual.get(0));
+ Assert.assertEquals(" ", actual.get(1));
+ Assert.assertEquals("D.4", actual.get(2));
+ Assert.assertEquals(")", actual.get(3));
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-17 22:20:03
|
Revision: 12079
http://sourceforge.net/p/foray/code/12079
Author: victormote
Date: 2021-11-17 22:20:00 +0000 (Wed, 17 Nov 2021)
Log Message:
-----------
Improvements to standard tokenizing.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-17 15:58:12 UTC (rev 12078)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-17 22:20:00 UTC (rev 12079)
@@ -168,23 +168,41 @@
}
}
- /* Convert attached trailing punctuation immediately after word content to interword. */
+ /* Resolve attached leading punctuation. */
for (int breakIndex = 1; breakIndex < breakTypes.length; breakIndex ++) {
final CharType currentBreakType = breakTypes[breakIndex];
- if (currentBreakType == CharType.ATTACHED_TRAILING_PUNCTUATION) {
+ if (currentBreakType == CharType.ATTACHED_LEADING_PUNCTUATION) {
final CharType previousBreakType = breakTypes[breakIndex - 1];
- if (previousBreakType != CharType.BREAK_CHAR) {
- breakTypes[breakIndex] = CharType.BREAK_CHAR;
+ final CharType nextBreakType = breakTypes[breakIndex + 1];
+ if (previousBreakType == CharType.BREAK_CHAR) {
+ if (nextBreakType == CharType.BREAK_CHAR) {
+ /* Surrounded by breaks. Treat this as a word. */
+ breakTypes[breakIndex] = CharType.WORD_CHAR;
+ } else {
+ /* Combine it with the previous whitespace. */
+ breakTypes[breakIndex] = CharType.BREAK_CHAR;
+ }
}
}
}
- /* Convert attached leading punctuation immediately before word content to interword. */
- for (int breakIndex = 1; breakIndex < breakTypes.length; breakIndex ++) {
+
+
+ /* Resolve attached trailing punctuation. Iterate these in reverse order. */
+ for (int breakIndex = breakTypes.length - 1; breakIndex > 0; breakIndex --) {
final CharType currentBreakType = breakTypes[breakIndex];
- if (currentBreakType == CharType.ATTACHED_LEADING_PUNCTUATION) {
+ if (currentBreakType == CharType.ATTACHED_TRAILING_PUNCTUATION) {
+ final CharType previousBreakType = breakTypes[breakIndex - 1];
final CharType nextBreakType = breakTypes[breakIndex + 1];
- if (nextBreakType != CharType.BREAK_CHAR) {
- breakTypes[breakIndex] = CharType.BREAK_CHAR;
+
+ if (nextBreakType == CharType.BREAK_CHAR
+ || nextBreakType == CharType.END) {
+ if (previousBreakType == CharType.BREAK_CHAR) {
+ /* Surrounded by breaks. Treat this as a word. */
+ breakTypes[breakIndex] = CharType.WORD_CHAR;
+ } else {
+ /* Combine it with the previous whitespace. */
+ breakTypes[breakIndex] = CharType.BREAK_CHAR;
+ }
}
}
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java 2021-11-17 15:58:12 UTC (rev 12078)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java 2021-11-17 22:20:00 UTC (rev 12079)
@@ -30,7 +30,6 @@
import org.junit.Assert;
import org.junit.Before;
-import org.junit.Ignore;
import org.junit.Test;
import java.util.List;
@@ -216,7 +215,6 @@
* Segmentation</a>
*/
@Test
- @Ignore
public void testUnicodeWordBoundariesExample() {
final String testString = "The quick (“brown”) fox can’t jump 32.3 feet, right?";
final List<CharSequence> actual = this.out.tokenize(testString);
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-17 15:58:15
|
Revision: 12078
http://sourceforge.net/p/foray/code/12078
Author: victormote
Date: 2021-11-17 15:58:12 +0000 (Wed, 17 Nov 2021)
Log Message:
-----------
Improvements to possible intraword punctuation handling.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java 2021-11-17 14:58:35 UTC (rev 12077)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java 2021-11-17 15:58:12 UTC (rev 12078)
@@ -72,6 +72,12 @@
General_Punctuation_Block.RIGHT_DOUBLE_QUOTATION_MARK,
});
+ /** The punctuation characters which may, depending on context, be treated as intraword punctuation. */
+ private static final String POSSIBLE_INTRAWORD_PUNCTUATION = new String(new char[] {
+ Basic_Latin_Block.APOSTROPHE,
+ General_Punctuation_Block.RIGHT_SINGLE_QUOTATION_MARK,
+ });
+
/** The punctuation characters which, when they immediately follow a word, can be separated from that word during
* line-breaking, but which preferably should not be separated. TODO: This list is not comprehensive and should be
* improved. */
@@ -148,6 +154,18 @@
}
/**
+ * Indicates whether a given codepoint is possibly intraword punctuation. i.e. punctuation that is legitimately
+ * part of a word.
+ * For example, in English, the apostrophe and typographical closing quote can be used to mark contractions or
+ * possessives.
+ * @param c The codepoint to be tested.
+ * @return True if {@code c} is possible intraword punctuation.
+ */
+ public static boolean isPossibleIntrawordPunctuation(final int c) {
+ return POSSIBLE_INTRAWORD_PUNCTUATION.indexOf(c) > -1;
+ }
+
+ /**
* Indicates whether a given character is a punctuation mark.
* @param c The char to be tested.
* @return True if and only if {@code c} is a punctuation mark.
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-17 14:58:35 UTC (rev 12077)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-17 15:58:12 UTC (rev 12078)
@@ -79,6 +79,9 @@
* parenthesis. */
ATTACHED_TRAILING_PUNCTUATION,
+ /** Character is a either attached trailing punctuation mark or intraword punctuation. */
+ ATTACHED_TRAILING_OR_INTRAWORD_PUNCTUATION,
+
/** There is no character here -- this is the end of the character sequence. */
END,
@@ -149,6 +152,22 @@
* @param breakTypes The array of charTypes.
*/
protected void filterBreakTypes(final CharType[] breakTypes) {
+ /* Resolve possible intraword punctuation. */
+ for (int breakIndex = 1; breakIndex < breakTypes.length; breakIndex ++) {
+ final CharType currentBreakType = breakTypes[breakIndex];
+ if (currentBreakType == CharType.ATTACHED_TRAILING_OR_INTRAWORD_PUNCTUATION) {
+ final CharType previousBreakType = breakTypes[breakIndex - 1];
+ final CharType nextBreakType = breakTypes[breakIndex + 1];
+ if (previousBreakType == CharType.WORD_CHAR
+ && nextBreakType == CharType.WORD_CHAR) {
+ /* This also is part of the word. */
+ breakTypes[breakIndex] = CharType.WORD_CHAR;
+ } else {
+ breakTypes[breakIndex] = CharType.ATTACHED_TRAILING_PUNCTUATION;
+ }
+ }
+ }
+
/* Convert attached trailing punctuation immediately after word content to interword. */
for (int breakIndex = 1; breakIndex < breakTypes.length; breakIndex ++) {
final CharType currentBreakType = breakTypes[breakIndex];
@@ -291,7 +310,11 @@
return CharType.ATTACHED_LEADING_PUNCTUATION;
}
if (CharacterUtils.isAttachedTrailingPunctuation(c)) {
- return CharType.ATTACHED_TRAILING_PUNCTUATION;
+ if (CharacterUtils.isPossibleIntrawordPunctuation(c)) {
+ return CharType.ATTACHED_TRAILING_OR_INTRAWORD_PUNCTUATION;
+ } else {
+ return CharType.ATTACHED_TRAILING_PUNCTUATION;
+ }
}
return CharType.OTHER;
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java 2021-11-17 14:58:35 UTC (rev 12077)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerLatin1Tests.java 2021-11-17 15:58:12 UTC (rev 12078)
@@ -30,6 +30,7 @@
import org.junit.Assert;
import org.junit.Before;
+import org.junit.Ignore;
import org.junit.Test;
import java.util.List;
@@ -209,4 +210,35 @@
Assert.assertEquals(".", actual.get(27));
}
+ /**
+ * Test of example used in the Unicode Text Segmentation annex.
+ * @see <a href="https://www.unicode.org/reports/tr29/#Word_Boundaries">Unicode(r) Standard Annex #29, Unicode Text
+ * Segmentation</a>
+ */
+ @Test
+ @Ignore
+ public void testUnicodeWordBoundariesExample() {
+ final String testString = "The quick (“brown”) fox can’t jump 32.3 feet, right?";
+ final List<CharSequence> actual = this.out.tokenize(testString);
+ Assert.assertEquals(18, actual.size());
+ Assert.assertEquals("The", actual.get(0));
+ Assert.assertEquals(" ", actual.get(1));
+ Assert.assertEquals("quick", actual.get(2));
+ Assert.assertEquals(" (“", actual.get(3));
+ Assert.assertEquals("brown", actual.get(4));
+ Assert.assertEquals("”) ", actual.get(5));
+ Assert.assertEquals("fox", actual.get(6));
+ Assert.assertEquals(" ", actual.get(7));
+ Assert.assertEquals("can’t", actual.get(8));
+ Assert.assertEquals(" ", actual.get(9));
+ Assert.assertEquals("jump", actual.get(10));
+ Assert.assertEquals(" ", actual.get(11));
+ Assert.assertEquals("32.3", actual.get(12));
+ Assert.assertEquals(" ", actual.get(13));
+ Assert.assertEquals("feet", actual.get(14));
+ Assert.assertEquals(", ", actual.get(15));
+ Assert.assertEquals("right", actual.get(16));
+ Assert.assertEquals("?", actual.get(17));
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-17 14:58:37
|
Revision: 12077
http://sourceforge.net/p/foray/code/12077
Author: victormote
Date: 2021-11-17 14:58:35 +0000 (Wed, 17 Nov 2021)
Log Message:
-----------
Move most logic from the implementation to the abstract superclass, to maximize reuse.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-17 14:02:41 UTC (rev 12076)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2021-11-17 14:58:35 UTC (rev 12077)
@@ -29,11 +29,33 @@
package org.foray.orthography;
import org.foray.common.primitive.CharacterUtils;
+import org.foray.common.primitive.StringUtils;
+import org.axsl.common.sequence.IntSequence;
import org.axsl.orthography.optional.Lexer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
/**
- * Implementations know how to break a character sequence into words and interword content.
+ * <p>Implementations know how to break a character sequence into words and interword content.
+ * Where possible, the process of "lexing" or "tokenizing" being done here should be done without reference to
+ * word dictionaries.
+ * We hope to split the content between words and non-words using information about the characters only.</p>
+ *
+ * <p>The general process used by implementation of this class is as follows:</p>
+ * <ol>
+ * <li>Find raw break boundaries. This task can be delegated to {@link java.text.BreakIterator} or ICU4J's similar
+ * class {@link com.ibm.icu.text.BreakIterator}, which use the Unicode text segmentation algorithms to find the
+ * boundaries that they report.</li>
+ * <li>Detect initial boundary types.</li>
+ * <li>Refine/reduce the boundary types to the ones that we care about.</li>
+ * <li>Tokenize the text based on the refined boundary types.</li>
+ * </ol>
+ *
+ * @see <a href="https://www.unicode.org/reports/tr29/#Word_Boundaries">Unicode Standard Annex #29, Unicode Text
+ * Segmentation</a>
*/
public abstract class Lexer4a implements Lexer {
@@ -65,8 +87,150 @@
}
+ @Override
+ public List<CharSequence> tokenize(final CharSequence sequence) {
+ if (sequence == null
+ || sequence.length() < 1) {
+ return Collections.emptyList();
+ }
+ /* First pass is to find all of the breaks that the BreakIterator can find. */
+ final IntSequence rawBreaks = findRawBreaks(sequence);
+
+
+ /* The BreakIterator is helpful, but for our purposes does not dig deeply enough.
+ * Our purpose is to find where words start and end and to treat all other content as non-word or interword
+ * content.
+ * So our second pass is to find out the type of each character that is at a break. */
+ final CharType[] breakTypes = findBreakTypes(sequence, rawBreaks);
+
+ /* Third pass. Simplify the breakTypes array. */
+ filterBreakTypes(breakTypes);
+
+ /* The fourth step iterates over the resolved break types and turns them into tokens. */
+ return createTokens(sequence, rawBreaks, breakTypes);
+ }
+
/**
+ * Uses the Java BreakIterator to find the breaks that it detects.
+ * @param sequence The sequence whose breaks are needed.
+ * @return The sequence of breaks, indexes into {@code sequence}.
+ */
+ protected abstract IntSequence findRawBreaks(CharSequence sequence);
+
+ /**
+ * Determines the type of character that triggered each raw break.
+ * @param sequence The characters being tokenized.
+ * @param rawBreaks The raw breaks.
+ * @return An array with a one-to-one correspondence with {@code rawBreaks}, containing the type of character at
+ * that break.
+ */
+ protected CharType[] findBreakTypes(final CharSequence sequence, final IntSequence rawBreaks) {
+ final CharType[] breakTypes = new CharType[rawBreaks.length()];
+ for (int breakIndex = 0; breakIndex < rawBreaks.length(); breakIndex ++) {
+ if (breakIndex >= rawBreaks.length() - 1) {
+ breakTypes[breakIndex] = CharType.END;
+ } else {
+ final int sequenceIndex = rawBreaks.intAt(breakIndex);
+ final int testChar = sequence.charAt(sequenceIndex);
+ breakTypes[breakIndex] = computeCharType(testChar);
+ }
+ }
+ return breakTypes;
+ }
+
+ /**
+ * Combine and eliminate the elements in the charTypes array.
+ * The touchstone here is the known word breaks which are always interword content.
+ * Anything between them must be either attached to the word break to become a part of the interword content, or
+ * must get coalesced into a "word" whether it is recognized as word content or not. If done properly, every element
+ * in the array, when finished, should be either {@link CharType#WORD_CHAR} or {@link CharType#BREAK_CHAR}.
+ * Anything not in those two categories will be treated in the final tokenization as {@link CharType#WORD_CHAR}.
+ * @param breakTypes The array of charTypes.
+ */
+ protected void filterBreakTypes(final CharType[] breakTypes) {
+ /* Convert attached trailing punctuation immediately after word content to interword. */
+ for (int breakIndex = 1; breakIndex < breakTypes.length; breakIndex ++) {
+ final CharType currentBreakType = breakTypes[breakIndex];
+ if (currentBreakType == CharType.ATTACHED_TRAILING_PUNCTUATION) {
+ final CharType previousBreakType = breakTypes[breakIndex - 1];
+ if (previousBreakType != CharType.BREAK_CHAR) {
+ breakTypes[breakIndex] = CharType.BREAK_CHAR;
+ }
+ }
+ }
+ /* Convert attached leading punctuation immediately before word content to interword. */
+ for (int breakIndex = 1; breakIndex < breakTypes.length; breakIndex ++) {
+ final CharType currentBreakType = breakTypes[breakIndex];
+ if (currentBreakType == CharType.ATTACHED_LEADING_PUNCTUATION) {
+ final CharType nextBreakType = breakTypes[breakIndex + 1];
+ if (nextBreakType != CharType.BREAK_CHAR) {
+ breakTypes[breakIndex] = CharType.BREAK_CHAR;
+ }
+ }
+ }
+ }
+
+ protected List<CharSequence> createTokens(final CharSequence sequence, final IntSequence rawBreaks,
+ final CharType[] breakTypes) {
+ final List<CharSequence> tokens = new ArrayList<CharSequence>();
+ boolean inWord = false;
+ int startNextToken = 0;
+
+ /* First token. */
+ switch(breakTypes[0]) {
+ case BREAK_CHAR: {
+ /* Sequence starts with a break. Add the empty dummy word that signals that fact. */
+ tokens.add(StringUtils.EMPTY_STRING);
+ inWord = false;
+ break;
+ }
+ default: {
+ /* Sequence starts with a word. */
+ inWord = true;
+ }
+ }
+
+
+ /* Iterate all remaining tokens. */
+ for (int breakIndex = 1; breakIndex < breakTypes.length; breakIndex ++) {
+ final CharType currentBreakType = breakTypes[breakIndex];
+ final int sequenceIndex = rawBreaks.intAt(breakIndex);
+
+ switch (currentBreakType) {
+ case END: {
+ tokens.add(sequence.subSequence(startNextToken, sequence.length()));
+ break;
+ }
+ case BREAK_CHAR: {
+ if (inWord) {
+ /* Write the word and roll forward. */
+ tokens.add(sequence.subSequence(startNextToken, sequenceIndex));
+ startNextToken = sequenceIndex;
+ inWord = false;
+ } else {
+ /* There is no state change. Nothing to do. */
+ }
+ break;
+ }
+ default: {
+ /* This is considered the start of word content. */
+ if (inWord) {
+ /* There is no state change. Nothing to do. */
+ } else {
+ /* Write the interword content and roll forward. */
+ tokens.add(sequence.subSequence(startNextToken, sequenceIndex));
+ startNextToken = sequenceIndex;
+ inWord = true;
+ }
+ }
+ }
+ }
+ return tokens;
+ }
+
+
+ /**
* Indicates whether a given Unicode code point is a word character.
* Subclasses should override this as needed.
* @param c The character being tested.
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java 2021-11-17 14:02:41 UTC (rev 12076)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java 2021-11-17 14:58:35 UTC (rev 12077)
@@ -28,7 +28,6 @@
package org.foray.orthography;
-import org.foray.common.primitive.StringUtils;
import org.foray.common.sequence.IntArrayBuilder;
import org.axsl.common.sequence.IntSequence;
@@ -35,21 +34,15 @@
import org.axsl.orthography.optional.Lexer;
import java.text.BreakIterator;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
import java.util.Locale;
/**
* {@link Lexer} implementation that uses the Java {@link BreakIterator}.
+ * This class is NOT thread-safe.
*/
public class LexerJavaBreakIterator extends Lexer4a {
- /**
- * Uses the Java BreakIterator to find the breaks that it detects.
- * @param sequence The sequence whose breaks are needed.
- * @return The sequence of breaks, indexes into {@code sequence}.
- */
+ @Override
protected IntSequence findRawBreaks(final CharSequence sequence) {
final IntArrayBuilder result = new IntArrayBuilder(sequence.length());
/* TODO: BreakIterators are reusable, not thread-safe, and expensive to create. Reuse this after figuring out
@@ -65,140 +58,4 @@
return result;
}
- /**
- * Determines the type of character that triggered each raw break.
- * @param sequence The characters being tokenized.
- * @param rawBreaks The raw breaks.
- * @return An array with a one-to-one correspondence with {@code rawBreaks}, containing the type of character at
- * that break.
- */
- protected CharType[] findBreakTypes(final CharSequence sequence, final IntSequence rawBreaks) {
- final CharType[] breakTypes = new CharType[rawBreaks.length()];
- for (int breakIndex = 0; breakIndex < rawBreaks.length(); breakIndex ++) {
- if (breakIndex >= rawBreaks.length() - 1) {
- breakTypes[breakIndex] = CharType.END;
- } else {
- final int sequenceIndex = rawBreaks.intAt(breakIndex);
- final int testChar = sequence.charAt(sequenceIndex);
- breakTypes[breakIndex] = computeCharType(testChar);
- }
- }
- return breakTypes;
- }
-
- /**
- * Combine and eliminate the elements in the charTypes array.
- * The touchstone here is the known word breaks which are always interword content.
- * Anything between them must be either attached to the word break to become a part of the interword content, or
- * must get coalesced into a "word" whether it is recognized as word content or not. If done properly, every element
- * in the array, when finished, should be either {@link CharType#WORD_CHAR} or {@link CharType#BREAK_CHAR}.
- * Anything not in those two categories will be treated in the final tokenization as {@link CharType#WORD_CHAR}.
- * @param breakTypes The array of charTypes.
- */
- protected void filterBreakTypes(final CharType[] breakTypes) {
- /* Convert attached trailing punctuation immediately after word content to interword. */
- for (int breakIndex = 1; breakIndex < breakTypes.length; breakIndex ++) {
- final CharType currentBreakType = breakTypes[breakIndex];
- if (currentBreakType == CharType.ATTACHED_TRAILING_PUNCTUATION) {
- final CharType previousBreakType = breakTypes[breakIndex - 1];
- if (previousBreakType != CharType.BREAK_CHAR) {
- breakTypes[breakIndex] = CharType.BREAK_CHAR;
- }
- }
- }
- /* Convert attached leading punctuation immediately before word content to interword. */
- for (int breakIndex = 1; breakIndex < breakTypes.length; breakIndex ++) {
- final CharType currentBreakType = breakTypes[breakIndex];
- if (currentBreakType == CharType.ATTACHED_LEADING_PUNCTUATION) {
- final CharType nextBreakType = breakTypes[breakIndex + 1];
- if (nextBreakType != CharType.BREAK_CHAR) {
- breakTypes[breakIndex] = CharType.BREAK_CHAR;
- }
- }
- }
- }
-
-
- protected List<CharSequence> createTokens(final CharSequence sequence, final IntSequence rawBreaks,
- final CharType[] breakTypes) {
- final List<CharSequence> tokens = new ArrayList<CharSequence>();
- boolean inWord = false;
- int startNextToken = 0;
-
- /* First token. */
- switch(breakTypes[0]) {
- case BREAK_CHAR: {
- /* Sequence starts with a break. Add the empty dummy word that signals that fact. */
- tokens.add(StringUtils.EMPTY_STRING);
- inWord = false;
- break;
- }
- default: {
- /* Sequence starts with a word. */
- inWord = true;
- }
- }
-
-
- /* Iterate all remaining tokens. */
- for (int breakIndex = 1; breakIndex < breakTypes.length; breakIndex ++) {
- final CharType currentBreakType = breakTypes[breakIndex];
- final int sequenceIndex = rawBreaks.intAt(breakIndex);
-
- switch (currentBreakType) {
- case END: {
- tokens.add(sequence.subSequence(startNextToken, sequence.length()));
- break;
- }
- case BREAK_CHAR: {
- if (inWord) {
- /* Write the word and roll forward. */
- tokens.add(sequence.subSequence(startNextToken, sequenceIndex));
- startNextToken = sequenceIndex;
- inWord = false;
- } else {
- /* There is no state change. Nothing to do. */
- }
- break;
- }
- default: {
- /* This is considered the start of word content. */
- if (inWord) {
- /* There is no state change. Nothing to do. */
- } else {
- /* Write the interword content and roll forward. */
- tokens.add(sequence.subSequence(startNextToken, sequenceIndex));
- startNextToken = sequenceIndex;
- inWord = true;
- }
- }
- }
- }
- return tokens;
- }
-
- @Override
- public List<CharSequence> tokenize(final CharSequence sequence) {
- if (sequence == null
- || sequence.length() < 1) {
- return Collections.emptyList();
- }
-
- /* First pass is to find all of the breaks that the BreakIterator can find. */
- final IntSequence rawBreaks = findRawBreaks(sequence);
-
-
- /* The BreakIterator is helpful, but for our purposes does not dig deeply enough.
- * Our purpose is to find where words start and end and to treat all other content as non-word or interword
- * content.
- * So our second pass is to find out the type of each character that is at a break. */
- final CharType[] breakTypes = findBreakTypes(sequence, rawBreaks);
-
- /* Third pass. Simplify the breakTypes array. */
- filterBreakTypes(breakTypes);
-
- /* The fourth step iterates over the resolved break types and turns them into tokens. */
- return createTokens(sequence, rawBreaks, breakTypes);
- }
-
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|