foray-commit Mailing List for FOray (Page 28)
Modular XSL-FO Implementation for Java.
Status: Alpha
Brought to you by:
victormote
You can subscribe to this list here.
| 2006 |
Jan
|
Feb
|
Mar
(139) |
Apr
(98) |
May
(250) |
Jun
(394) |
Jul
(84) |
Aug
(13) |
Sep
(420) |
Oct
(186) |
Nov
(1) |
Dec
(3) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2007 |
Jan
(108) |
Feb
(202) |
Mar
(291) |
Apr
(247) |
May
(374) |
Jun
(227) |
Jul
(231) |
Aug
(60) |
Sep
(31) |
Oct
(45) |
Nov
(18) |
Dec
|
| 2008 |
Jan
(38) |
Feb
(71) |
Mar
(142) |
Apr
|
May
(59) |
Jun
(6) |
Jul
(10) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2009 |
Jan
(12) |
Feb
(4) |
Mar
(88) |
Apr
(121) |
May
(17) |
Jun
(30) |
Jul
|
Aug
(5) |
Sep
|
Oct
(1) |
Nov
|
Dec
|
| 2010 |
Jan
(11) |
Feb
(76) |
Mar
(11) |
Apr
|
May
(11) |
Jun
|
Jul
|
Aug
(44) |
Sep
(14) |
Oct
(7) |
Nov
|
Dec
|
| 2011 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(9) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(10) |
Nov
|
Dec
|
| 2012 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
(3) |
Jul
(4) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2016 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(168) |
| 2017 |
Jan
(77) |
Feb
(11) |
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2018 |
Jan
|
Feb
|
Mar
(1) |
Apr
(6) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2019 |
Jan
|
Feb
(88) |
Mar
(118) |
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2020 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(6) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(141) |
| 2021 |
Jan
(170) |
Feb
(20) |
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
|
Sep
|
Oct
(62) |
Nov
(189) |
Dec
(162) |
| 2022 |
Jan
(201) |
Feb
(118) |
Mar
(8) |
Apr
|
May
(2) |
Jun
(47) |
Jul
(19) |
Aug
(14) |
Sep
(3) |
Oct
|
Nov
(28) |
Dec
(235) |
| 2023 |
Jan
(112) |
Feb
(23) |
Mar
(2) |
Apr
(2) |
May
|
Jun
(1) |
Jul
|
Aug
(70) |
Sep
(92) |
Oct
(20) |
Nov
(1) |
Dec
(1) |
| 2024 |
Jan
|
Feb
|
Mar
(1) |
Apr
(1) |
May
(14) |
Jun
(11) |
Jul
(1) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2025 |
Jan
(10) |
Feb
(29) |
Mar
|
Apr
(162) |
May
(245) |
Jun
(83) |
Jul
|
Aug
(1) |
Sep
|
Oct
|
Nov
|
Dec
|
|
From: <vic...@us...> - 2023-08-31 00:08:29
|
Revision: 13200
http://sourceforge.net/p/foray/code/13200
Author: victormote
Date: 2023-08-31 00:08:27 +0000 (Thu, 31 Aug 2023)
Log Message:
-----------
Improvements to explicit token handling.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2023-08-30 23:08:05 UTC (rev 13199)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2023-08-31 00:08:27 UTC (rev 13200)
@@ -189,7 +189,14 @@
final CharType[] breakTypes = findBreakTypes(sequence, rawBreaks);
/* Third pass. Simplify the breakTypes array. */
- filterBreakTypes(breakTypes);
+ /* For normal case (no explicit tokens), the token previous to the first one in "sequence" is a break char. */
+ CharType previousBreakType = CharType.BREAK_CHAR;
+ if (tokens.size() > 0) {
+ /* If there are already resolved tokens, it can only be because there are explicit tokens. In that case,
+ * the previous token (the explicit token) is a word. */
+ previousBreakType = CharType.WORD_CHAR;
+ }
+ filterBreakTypes(breakTypes, previousBreakType);
/* The fourth step iterates over the resolved break types and turns them into tokens. */
createImplicitTokens(sequence, rawBreaks, breakTypes, tokens);
@@ -240,12 +247,14 @@
* in the array, when finished, should be either {@link CharType#WORD_CHAR} or {@link CharType#BREAK_CHAR}.
* Anything not in those two categories will be treated in the final tokenization as {@link CharType#WORD_CHAR}.
* @param breakTypes The array of charTypes.
+ * @param preSequenceBreakType The break type that is conceptually immediately before the first (index 0) break
+ * type in {@code breakTypes}.
*/
- protected void filterBreakTypes(final CharType[] breakTypes) {
+ protected void filterBreakTypes(final CharType[] breakTypes, final CharType preSequenceBreakType) {
/* Resolve possible intraword punctuation. */
for (int breakIndex = 0; breakIndex < breakTypes.length; breakIndex ++) {
final CharType currentBreakType = breakTypes[breakIndex];
- final CharType previousBreakType = breakIndex == 0 ? CharType.BREAK_CHAR : breakTypes[breakIndex - 1];
+ final CharType previousBreakType = breakIndex == 0 ? preSequenceBreakType : breakTypes[breakIndex - 1];
final CharType nextBreakType = breakIndex == breakTypes.length - 1 ? CharType.END
: breakTypes[breakIndex + 1];
@@ -325,7 +334,7 @@
/* Resolve attached leading punctuation. */
for (int breakIndex = 0; breakIndex < breakTypes.length; breakIndex ++) {
final CharType currentBreakType = breakTypes[breakIndex];
- final CharType previousBreakType = breakIndex == 0 ? CharType.BREAK_CHAR : breakTypes[breakIndex - 1];
+ final CharType previousBreakType = breakIndex == 0 ? preSequenceBreakType : breakTypes[breakIndex - 1];
final CharType nextBreakType = breakIndex == breakTypes.length - 1 ? CharType.END
: breakTypes[breakIndex + 1];
switch (currentBreakType) {
@@ -362,7 +371,7 @@
/* Resolve attached trailing punctuation. Iterate these in reverse order. */
for (int breakIndex = breakTypes.length - 1; breakIndex > 0; breakIndex --) {
final CharType currentBreakType = breakTypes[breakIndex];
- final CharType previousBreakType = breakIndex == 0 ? CharType.BREAK_CHAR : breakTypes[breakIndex - 1];
+ final CharType previousBreakType = breakIndex == 0 ? preSequenceBreakType : breakTypes[breakIndex - 1];
final CharType nextBreakType = breakIndex == breakTypes.length - 1 ? CharType.END
: breakTypes[breakIndex + 1];
switch (currentBreakType) {
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2023-08-30 23:08:05 UTC (rev 13199)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2023-08-31 00:08:27 UTC (rev 13200)
@@ -51,6 +51,7 @@
public ExplicitTokens createExplicitTokens() {
final ExplicitTokens explicit = new ExplicitTokens();
explicit.addToken("i\\.e\\.");
+ explicit.addToken("&c\\.");
return explicit;
}
@@ -327,6 +328,26 @@
}
/**
+ * Test of a string starting with initial punctuation.
+ * This can happen when an explicit token, in this case "&c.", is immediately followed by a comma, which forces the
+ * string after the explicit token to be parsed independently of the explicit token.
+ */
+ @Test
+ public void testInitialPunctuation() {
+ final String testString = "Letter, &c.,\nat large;";
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
+ assertEquals(8, actual.size());
+ assertEquals("Letter", actual.get(0));
+ assertEquals(", ", actual.get(1));
+ assertEquals("&c.", actual.get(2));
+ assertEquals(",\n", actual.get(3));
+ assertEquals("at", actual.get(4));
+ assertEquals(" ", actual.get(5));
+ assertEquals("large", actual.get(6));
+ assertEquals(";", actual.get(7));
+ }
+
+ /**
* Test with one embedded explicit token in the input.
*/
@Test
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-30 23:08:08
|
Revision: 13199
http://sourceforge.net/p/foray/code/13199
Author: victormote
Date: 2023-08-30 23:08:05 +0000 (Wed, 30 Aug 2023)
Log Message:
-----------
Handle multiple explicit token patterns.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/ExplicitTokensTests.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java 2023-08-30 20:43:26 UTC (rev 13198)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java 2023-08-30 23:08:05 UTC (rev 13199)
@@ -28,6 +28,7 @@
package org.foray.orthography;
+import org.foray.primitive.NumberUtils;
import org.foray.primitive.StringUtils;
import java.util.ArrayList;
@@ -61,49 +62,81 @@
* @param start The index to the first character in {@code sequence} to be tested.
* @param length The number of chars in {@code sequence} to be parsed.
* @return The list of the parsed tokens, which is never null.
- * Even-number indexes contain unparsed content before/between found tokens.
+ * Even-numbered indexes contain unparsed content before/between found tokens.
* Odd-numbered indexes contain any parsed tokens themselves.
- * Index 0 will always contain any text found before the first token, and can have length zero.
+ * Index 0 will always contain any text found before the first explicit token, and can have length zero.
*/
public List<CharSequence> tokenize(final CharSequence sequence, final int start, final int length) {
+ /* TODO: This can almost certainly be made more efficient. */
- /* TODO: UGLY, UGLY ALERT. This will fail miserably for more than one Pattern at the moment. */
+ /* Place the original sequence in the result. It will be replaced as necessary. */
+ final CharSequence input = sequence.subSequence(start, start + length);
+ final List<CharSequence> result = new ArrayList<CharSequence>();
+ result.add(input);
+ if (this.tokenPatterns.size() < 1) {
+ return result;
+ }
+ boolean anyChanges = true;
+ /* Starting at index 0, step through the result elements 2 at a time. Every even-numbered index is a candidate
+ * to be searched for explicit tokens. */
+ outerLoop:
+ while (anyChanges) {
+ anyChanges = false;
+ for (int resultIndex = 0; resultIndex < result.size(); resultIndex += 2) {
+ final CharSequence unparsed = result.get(resultIndex);
+ /* Check unparsed against each pattern. */
+ for (int patternIndex = 0; patternIndex < this.tokenPatterns.size(); patternIndex ++) {
+ final Pattern tokenPattern = tokenPatterns.get(patternIndex);
+ final List<CharSequence> patternResult = searchForPattern(unparsed, tokenPattern);
+ if (patternResult.size() > 1) {
+ result.remove(resultIndex);
+ result.addAll(resultIndex, patternResult);
+ anyChanges = true;
+ continue outerLoop;
+ }
+ }
+ }
+ }
+ return result;
+ }
- /* TODO: This is horribly inefficient, used only for proof-of-concept. Rewrite. */
+ /**
+ * Search for a single token and break up the input if found.
+ * @param input The text to be tokenized.
+ * @param tokenPattern The token pattern being sought.
+ * @return The list of tokens to created by this method. This should always have an odd number of elements.
+ */
+ private List<CharSequence> searchForPattern(final CharSequence input, final Pattern tokenPattern) {
final List<CharSequence> result = new ArrayList<CharSequence>();
- if (this.tokenPatterns.size() < 1) {
- result.add(sequence);
- return result;
- }
- final CharSequence input = sequence.subSequence(start, start + length);
int nextUnparsed = 0;
-// for (int index = 0; index < tokenPatterns.size(); index ++) {
- final Pattern tokenPattern = tokenPatterns.get(0);
- final Matcher matcher = tokenPattern.matcher(input);
- while (matcher.find()) {
- final int matchStart = matcher.start();
- final int matchEnd = matcher.end();
- if (matchStart == 0
- && nextUnparsed == 0) {
- /* We are at the start. Create empty token for "between" text. */
- result.add(StringUtils.EMPTY_STRING);
- } else {
- final CharSequence between = input.subSequence(nextUnparsed, matchStart);
- result.add(between);
- }
- final CharSequence matched = input.subSequence(matchStart, matchEnd);
- result.add(matched);
- nextUnparsed = matchEnd;
- }
- if (nextUnparsed <= input.length()) {
- final CharSequence between = input.subSequence(nextUnparsed, input.length());
+ final Matcher matcher = tokenPattern.matcher(input);
+ while (matcher.find()) {
+ final int matchStart = matcher.start();
+ final int matchEnd = matcher.end();
+ if (matchStart == 0
+ && nextUnparsed == 0) {
+ /* We are at the start. Create empty token for "between" text. */
+ result.add(StringUtils.EMPTY_STRING);
+ } else {
+ final CharSequence between = input.subSequence(nextUnparsed, matchStart);
result.add(between);
}
-// }
+ final CharSequence matched = input.subSequence(matchStart, matchEnd);
+ result.add(matched);
+ nextUnparsed = matchEnd;
+ }
+ if (nextUnparsed < input.length()) {
+ final CharSequence between = input.subSequence(nextUnparsed, input.length());
+ result.add(between);
+ }
+ if (! NumberUtils.isOdd(result.size())) {
+ result.add(StringUtils.EMPTY_STRING);
+ }
+
return result;
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/ExplicitTokensTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/ExplicitTokensTests.java 2023-08-30 20:43:26 UTC (rev 13198)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/ExplicitTokensTests.java 2023-08-30 23:08:05 UTC (rev 13199)
@@ -54,7 +54,7 @@
}
/**
- * Test to ensure that when there are no explicit tokens in the input, the output matches the input.
+ * Test for one explicit token in the input.
*/
@Test
public void testOneToken() {
@@ -69,4 +69,25 @@
assertEquals(" to exist,) or not to be.", result.get(2));
}
+ /**
+ * Test for two distinct explicit tokens in the input, three total.
+ */
+ @Test
+ public void testTwoTokens() {
+ final ExplicitTokens out = new ExplicitTokens();
+ out.addToken("i\\.e\\.");
+ out.addToken("&c\\.");
+ final String testString = "To be, (i.e. to exist,) or not to be, &c.,\n&c.";
+
+ final List<CharSequence> result = out.tokenize(testString, 0, testString.length());
+ assertEquals(7, result.size());
+ assertEquals("To be, (", result.get(0));
+ assertEquals("i.e.", result.get(1));
+ assertEquals(" to exist,) or not to be, ", result.get(2));
+ assertEquals("&c.", result.get(3));
+ assertEquals(",\n", result.get(4));
+ assertEquals("&c.", result.get(5));
+ assertEquals("", result.get(6));
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-30 20:43:27
|
Revision: 13198
http://sourceforge.net/p/foray/code/13198
Author: victormote
Date: 2023-08-30 20:43:26 +0000 (Wed, 30 Aug 2023)
Log Message:
-----------
Fix regular expressions to find actual "." characters.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-08-30 19:52:42 UTC (rev 13197)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-08-30 20:43:26 UTC (rev 13198)
@@ -7,12 +7,12 @@
<axsl-orthography-config>
<explicit-token-list id="eng-Latn-explicit-tokens">
- <explicit-token end-of-sentence="never">i.e.</explicit-token>
- <explicit-token end-of-sentence="never">i. e.</explicit-token>
- <explicit-token end-of-sentence="never">&c.</explicit-token>
- <explicit-token end-of-sentence="never">l. s.</explicit-token>
- <explicit-token end-of-sentence="never">e.g.</explicit-token>
- <explicit-token end-of-sentence="never">e. g.</explicit-token>
+ <explicit-token end-of-sentence="never">i\.e\.</explicit-token>
+ <explicit-token end-of-sentence="never">i\. e\.</explicit-token>
+ <explicit-token end-of-sentence="never">&c\.</explicit-token>
+ <explicit-token end-of-sentence="never">l\. s\.</explicit-token>
+ <explicit-token end-of-sentence="never">e\.g\.</explicit-token>
+ <explicit-token end-of-sentence="never">e\. g\.</explicit-token>
</explicit-token-list>
<match-rule-list id="eng-Latn-match-rules">
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-30 19:52:44
|
Revision: 13197
http://sourceforge.net/p/foray/code/13197
Author: victormote
Date: 2023-08-30 19:52:42 +0000 (Wed, 30 Aug 2023)
Log Message:
-----------
Add test with explicit token immediately after leading punctuation.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2023-08-30 01:20:27 UTC (rev 13196)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2023-08-30 19:52:42 UTC (rev 13197)
@@ -354,4 +354,33 @@
assertEquals(".", actual.get(17));
}
+ /**
+ * Test with one embedded explicit token in the input.
+ */
+ @Test
+ public void testOneExplicitToken2() {
+ final String testString = "To be, (i.e. to exist,) or not to be.";
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
+ assertEquals(19, actual.size());
+ assertEquals("To", actual.get(0));
+ assertEquals(" ", actual.get(1));
+ assertEquals("be", actual.get(2));
+ assertEquals(", ", actual.get(3));
+ assertEquals("(", actual.get(4));
+ assertEquals("i.e.", actual.get(5));
+ assertEquals(" ", actual.get(6));
+ assertEquals("to", actual.get(7));
+ assertEquals(" ", actual.get(8));
+ assertEquals("exist", actual.get(9));
+ assertEquals(",) ", actual.get(10));
+ assertEquals("or", actual.get(11));
+ assertEquals(" ", actual.get(12));
+ assertEquals("not", actual.get(13));
+ assertEquals(" ", actual.get(14));
+ assertEquals("to", actual.get(15));
+ assertEquals(" ", actual.get(16));
+ assertEquals("be", actual.get(17));
+ assertEquals(".", actual.get(18));
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-30 01:20:31
|
Revision: 13196
http://sourceforge.net/p/foray/code/13196
Author: victormote
Date: 2023-08-30 01:20:27 +0000 (Wed, 30 Aug 2023)
Log Message:
-----------
Improvements to tokenization.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2023-08-30 01:06:36 UTC (rev 13195)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2023-08-30 01:20:27 UTC (rev 13196)
@@ -418,8 +418,11 @@
break;
}
default: {
- /* Sequence starts with non-word content. Add the empty dummy token to signal that fact. */
- tokens.add(StringUtils.EMPTY_STRING);
+ /* Sequence starts with non-word content. */
+ if (tokens.size() < 1) {
+ /* This is the first token created. Add the empty dummy token to signal that fact. */
+ tokens.add(StringUtils.EMPTY_STRING);
+ }
inWord = false;
break;
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java 2023-08-30 01:06:36 UTC (rev 13195)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java 2023-08-30 01:20:27 UTC (rev 13196)
@@ -51,7 +51,7 @@
*/
@BeforeEach
public void setupTest() {
- this.out = new LexerIcu4jBreakIterator(WritingSystem4a.USA, null);
+ this.out = new LexerIcu4jBreakIterator(WritingSystem4a.USA, createExplicitTokens());
}
@Override
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java 2023-08-30 01:06:36 UTC (rev 13195)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java 2023-08-30 01:20:27 UTC (rev 13196)
@@ -45,7 +45,7 @@
*/
@BeforeEach
public void setupTest() {
- this.out = new LexerJavaBreakIterator(WritingSystem4a.USA, null);
+ this.out = new LexerJavaBreakIterator(WritingSystem4a.USA, createExplicitTokens());
}
@Override
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2023-08-30 01:06:36 UTC (rev 13195)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2023-08-30 01:20:27 UTC (rev 13196)
@@ -45,6 +45,16 @@
public abstract Lexer4a getObjectUnderTest();
/**
+ * Creates explicit tokens to be used by the lexer during testing.
+ * @return Explicit tokens suitable for testing.
+ */
+ public ExplicitTokens createExplicitTokens() {
+ final ExplicitTokens explicit = new ExplicitTokens();
+ explicit.addToken("i\\.e\\.");
+ return explicit;
+ }
+
+ /**
* A simple test of {@link LexerLatin1#breakIntoWords(CharSequence)}.
*/
@Test
@@ -316,4 +326,32 @@
assertEquals(".", actual.get(11));
}
+ /**
+ * Test with one embedded explicit token in the input.
+ */
+ @Test
+ public void testOneExplicitToken() {
+ final String testString = "To be, i.e. to exist, or not to be.";
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
+ assertEquals(18, actual.size());
+ assertEquals("To", actual.get(0));
+ assertEquals(" ", actual.get(1));
+ assertEquals("be", actual.get(2));
+ assertEquals(", ", actual.get(3));
+ assertEquals("i.e.", actual.get(4));
+ assertEquals(" ", actual.get(5));
+ assertEquals("to", actual.get(6));
+ assertEquals(" ", actual.get(7));
+ assertEquals("exist", actual.get(8));
+ assertEquals(", ", actual.get(9));
+ assertEquals("or", actual.get(10));
+ assertEquals(" ", actual.get(11));
+ assertEquals("not", actual.get(12));
+ assertEquals(" ", actual.get(13));
+ assertEquals("to", actual.get(14));
+ assertEquals(" ", actual.get(15));
+ assertEquals("be", actual.get(16));
+ assertEquals(".", actual.get(17));
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-30 01:06:39
|
Revision: 13195
http://sourceforge.net/p/foray/code/13195
Author: victormote
Date: 2023-08-30 01:06:36 +0000 (Wed, 30 Aug 2023)
Log Message:
-----------
Add explicit tokens and dictionary entries for abbreviation tokens.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-08-30 00:50:11 UTC (rev 13194)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-08-30 01:06:36 UTC (rev 13195)
@@ -56,7 +56,7 @@
-->
<w><t>&</t></w>
-<w><t>&c</t><abbrev referenced-word="etc., et cetera"/></w>
+<w><t>&c.</t><abbrev referenced-word="etc., et cetera"/></w>
<w><t>a</t></w>
<w><t>a-a</t></w>
<w><t>Aa-chen</t></w>
@@ -47325,6 +47325,8 @@
<w><t>EFTA</t></w>
<w><t>eft-soon</t></w>
<w><t>eft-soons</t></w>
+<w><t>e.g.</t><abbrev referenced-word="exempli gratia"/><comment>Latin "for example."</comment></w>
+<w><t>e. g.</t><abbrev referenced-word="exempli gratia"/><comment>Latin "for example."</comment></w>
<w><t>e-gad</t></w>
<w><t>E-ga-di</t></w>
<w><t>e-gal</t></w>
@@ -73374,8 +73376,8 @@
<w><t>hy-zone</t></w>
<w><t>i/c</t></w>
<w><t>I/O</t></w>
-<w><t>i.e</t><abbrev referenced-word="id est"/></w>
-<w><t>i. e</t><abbrev referenced-word="id est"/><comment>Contains embedded non-breaking space.</comment></w>
+<w><t>i.e.</t><abbrev referenced-word="id est"/><comment>Latin "that is."</comment></w>
+<w><t>i. e.</t><abbrev referenced-word="id est"/><comment>Latin "that is."</comment></w>
<w><t>I-ac-chus</t></w>
<w><t>I-a-che</t></w>
<w><t>IAEA</t></w>
@@ -89726,6 +89728,7 @@
<w><t>LPG</t></w>
<w><t>L=ra-di-a-tion</t></w>
<w><t>L-ri-da</t></w>
+<w><t>l. s.</t><abbrev referenced-word="Locus sigilli"/><comment>Latin "the place of the seal."</comment></w>
<w><t>Lsd</t></w>
<w><t>lsd</t></w>
<w><t>LSD</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-08-30 00:50:11 UTC (rev 13194)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-08-30 01:06:36 UTC (rev 13195)
@@ -8,7 +8,11 @@
<explicit-token-list id="eng-Latn-explicit-tokens">
<explicit-token end-of-sentence="never">i.e.</explicit-token>
- <explicit-token end-of-sentence="never">l. s.</explicit-token>
+ <explicit-token end-of-sentence="never">i. e.</explicit-token>
+ <explicit-token end-of-sentence="never">&c.</explicit-token>
+ <explicit-token end-of-sentence="never">l. s.</explicit-token>
+ <explicit-token end-of-sentence="never">e.g.</explicit-token>
+ <explicit-token end-of-sentence="never">e. g.</explicit-token>
</explicit-token-list>
<match-rule-list id="eng-Latn-match-rules">
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-30 00:50:12
|
Revision: 13194
http://sourceforge.net/p/foray/code/13194
Author: victormote
Date: 2023-08-30 00:50:11 +0000 (Wed, 30 Aug 2023)
Log Message:
-----------
Simplify for now by using only one pattern.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java 2023-08-29 21:36:27 UTC (rev 13193)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java 2023-08-30 00:50:11 UTC (rev 13194)
@@ -81,8 +81,8 @@
}
final CharSequence input = sequence.subSequence(start, start + length);
int nextUnparsed = 0;
- for (int index = 0; index < tokenPatterns.size(); index ++) {
- final Pattern tokenPattern = tokenPatterns.get(index);
+// for (int index = 0; index < tokenPatterns.size(); index ++) {
+ final Pattern tokenPattern = tokenPatterns.get(0);
final Matcher matcher = tokenPattern.matcher(input);
while (matcher.find()) {
final int matchStart = matcher.start();
@@ -103,7 +103,7 @@
final CharSequence between = input.subSequence(nextUnparsed, input.length());
result.add(between);
}
- }
+// }
return result;
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-29 21:36:29
|
Revision: 13193
http://sourceforge.net/p/foray/code/13193
Author: victormote
Date: 2023-08-29 21:36:27 +0000 (Tue, 29 Aug 2023)
Log Message:
-----------
Improvments to explicit tokenization.
Modified Paths:
--------------
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoTextWords4a.java
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerIcu4jBreakIterator.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java
trunk/foray/foray-orthography/src/test/resources/orthography-config.xml
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoTextWords4a.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoTextWords4a.java 2023-08-29 19:16:41 UTC (rev 13192)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/obj/FoTextWords4a.java 2023-08-29 21:36:27 UTC (rev 13193)
@@ -77,8 +77,9 @@
final FoOrthography orthography = orthographyServer.getOrthography(getWritingSystem());
if (orthography == null) {
LOGGER.warn("Orthography not found for: {}", getWritingSystem());
+ } else {
+ this.tokenFlow = orthography.tokenize(content, 0, content.length());
}
- this.tokenFlow = orthography.tokenize(content, 0, content.length());
}
@Override
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-08-29 19:16:41 UTC (rev 13192)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-08-29 21:36:27 UTC (rev 13193)
@@ -8,6 +8,7 @@
<explicit-token-list id="eng-Latn-explicit-tokens">
<explicit-token end-of-sentence="never">i.e.</explicit-token>
+ <explicit-token end-of-sentence="never">l. s.</explicit-token>
</explicit-token-list>
<match-rule-list id="eng-Latn-match-rules">
@@ -315,9 +316,9 @@
</hyphenation-patterns-resource>
<orthography language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA">
+ <explicit-tokens reference="eng-Latn-explicit-tokens"/>
<lexer class="org.foray.orthography.LexerJavaBreakIterator"
language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
- <explicit-tokens reference="eng-Latn-explicit-tokens"/>
<match-rules reference="eng-Latn-match-rules"/>
<derivative-rules reference="eng-Latn-derivative-patterns"/>
<dictionary reference="org.foray.eng.Latn.USA"/>
@@ -326,9 +327,9 @@
</orthography>
<orthography language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="GBR">
+ <explicit-tokens reference="eng-Latn-explicit-tokens"/>
<lexer class="org.foray.orthography.LexerJavaBreakIterator"
language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
- <explicit-tokens reference="eng-Latn-explicit-tokens"/>
<match-rules reference="eng-Latn-match-rules"/>
<derivative-rules reference="eng-Latn-derivative-patterns"/>
<dictionary reference="org.foray.eng.Latn.GBR"/>
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java 2023-08-29 19:16:41 UTC (rev 13192)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java 2023-08-29 21:36:27 UTC (rev 13193)
@@ -44,7 +44,7 @@
private static final int INITIAL_MAP_CAPACITY = 10;
/** The collection of explicit tokens. */
- private List<Pattern> tokens = new ArrayList<Pattern>(INITIAL_MAP_CAPACITY);
+ private List<Pattern> tokenPatterns = new ArrayList<Pattern>(INITIAL_MAP_CAPACITY);
/**
* Adds an explicit token to this orthography.
@@ -51,7 +51,7 @@
* @param token The explicit token to be added.
*/
public void addToken(final String token) {
- this.tokens.add(Pattern.compile(token));
+ this.tokenPatterns.add(Pattern.compile(token));
}
/**
@@ -74,12 +74,16 @@
/* TODO: This is horribly inefficient, used only for proof-of-concept. Rewrite. */
+ final List<CharSequence> result = new ArrayList<CharSequence>();
+ if (this.tokenPatterns.size() < 1) {
+ result.add(sequence);
+ return result;
+ }
final CharSequence input = sequence.subSequence(start, start + length);
int nextUnparsed = 0;
- final List<CharSequence> result = new ArrayList<CharSequence>();
- for (int index = 0; index < tokens.size(); index ++) {
- final Pattern pattern = tokens.get(index);
- final Matcher matcher = pattern.matcher(input);
+ for (int index = 0; index < tokenPatterns.size(); index ++) {
+ final Pattern tokenPattern = tokenPatterns.get(index);
+ final Matcher matcher = tokenPattern.matcher(input);
while (matcher.find()) {
final int matchStart = matcher.start();
final int matchEnd = matcher.end();
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2023-08-29 19:16:41 UTC (rev 13192)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2023-08-29 21:36:27 UTC (rev 13193)
@@ -122,12 +122,17 @@
/** The writing system used by this lexer. */
private WritingSystem4a writingSystem;
+ /** Explicit tokens to be tokenized. This can be empty, but should never be null. */
+ private ExplicitTokens explicitTokens;
+
/**
* Constructor.
* @param writingSystem The writing system used by this Lexer. This can be null.
+ * @param explicitTokens Explicit tokens to be tokenized.
*/
- public Lexer4a(final WritingSystem4a writingSystem) {
+ public Lexer4a(final WritingSystem4a writingSystem, final ExplicitTokens explicitTokens) {
this.writingSystem = writingSystem;
+ this.explicitTokens = explicitTokens == null ? new ExplicitTokens() : explicitTokens;
}
/**
@@ -134,7 +139,7 @@
* Returns the writing system used by this lexer.
* @return The writing system.
*/
- public WritingSystem4a getWritingSystme() {
+ public WritingSystem4a getWritingSystem() {
return this.writingSystem;
}
@@ -144,7 +149,35 @@
|| sequence.length() < 1) {
return Collections.emptyList();
}
+ final List<CharSequence> tokens = new ArrayList<CharSequence>();
+ tokenizeExplicit(sequence, tokens);
+ return tokens;
+ }
+
+ /**
+ * Tokenize any explicit tokens.
+ * @param sequence The sequence to be tokenized.
+ * @param tokens The list into which tokens should be added as they are parsed.
+ */
+ private void tokenizeExplicit(final CharSequence sequence, final List<CharSequence> tokens) {
+ final List<CharSequence> explicit = this.explicitTokens.tokenize(sequence, 0, sequence.length());
+ for (int index = 0; index < explicit.size(); index ++) {
+ /* Even indexes need to be parsed implicitly. Odd indexes are explicit tokens. */
+ if (NumberUtils.isOdd(index)) {
+ tokens.add(explicit.get(index));
+ } else {
+ tokenizeImplicit(explicit.get(index), tokens);
+ }
+ }
+ }
+
+ /**
+ * After handling explicit tokens, tokenizes the remaining chunk(s) of text using normal implicit tokenization.
+ * @param sequence The character sequence containing the untokenized text.
+ * @param tokens The list of tokens that is being built. Tokens will be added to this.
+ */
+ private void tokenizeImplicit(final CharSequence sequence, final List<CharSequence> tokens) {
/* First pass is to find all of the breaks that the BreakIterator can find. */
final IntSequence rawBreaks = findRawBreaks(sequence);
@@ -159,7 +192,7 @@
filterBreakTypes(breakTypes);
/* The fourth step iterates over the resolved break types and turns them into tokens. */
- return createTokens(sequence, rawBreaks, breakTypes);
+ createImplicitTokens(sequence, rawBreaks, breakTypes, tokens);
}
/**
@@ -370,11 +403,10 @@
* @param sequence The sequence of characters being tokenized.
* @param rawBreaks The breaks found by the break iterator.
* @param breakTypes The filtered break types.
- * @return The list of tokens.
+ * @param tokens The list of tokens being built.
*/
- protected List<CharSequence> createTokens(final CharSequence sequence, final IntSequence rawBreaks,
- final CharType[] breakTypes) {
- final List<CharSequence> tokens = new ArrayList<CharSequence>();
+ protected void createImplicitTokens(final CharSequence sequence, final IntSequence rawBreaks,
+ final CharType[] breakTypes, final List<CharSequence> tokens) {
boolean inWord = false;
int startNextToken = 0;
@@ -428,7 +460,6 @@
}
}
}
- return tokens;
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerIcu4jBreakIterator.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerIcu4jBreakIterator.java 2023-08-29 19:16:41 UTC (rev 13192)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerIcu4jBreakIterator.java 2023-08-29 21:36:27 UTC (rev 13193)
@@ -50,9 +50,10 @@
/**
* Constructor.
* @param writingSystem The writing system used by this Lexer. This can be null.
+ * @param explicitTokens Explicit tokens to be tokenized.
*/
- public LexerIcu4jBreakIterator(final WritingSystem4a writingSystem) {
- super(writingSystem);
+ public LexerIcu4jBreakIterator(final WritingSystem4a writingSystem, final ExplicitTokens explicitTokens) {
+ super(writingSystem, explicitTokens);
final WritingSystem4a baseWritingSystem = writingSystem == null ? WritingSystem4a.USA : writingSystem;
this.wordIterator = BreakIterator.getWordInstance(baseWritingSystem.toULocale());
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java 2023-08-29 19:16:41 UTC (rev 13192)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/LexerJavaBreakIterator.java 2023-08-29 21:36:27 UTC (rev 13193)
@@ -49,9 +49,10 @@
/**
* Constructor.
* @param writingSystem The writing system used by this Lexer. This can be null.
+ * @param explicitTokens Explicit tokens to be tokenized.
*/
- public LexerJavaBreakIterator(final WritingSystem4a writingSystem) {
- super(writingSystem);
+ public LexerJavaBreakIterator(final WritingSystem4a writingSystem, final ExplicitTokens explicitTokens) {
+ super(writingSystem, explicitTokens);
final WritingSystem4a baseWritingSystem = writingSystem == null ? WritingSystem4a.USA : writingSystem;
this.wordIterator = BreakIterator.getWordInstance(baseWritingSystem.toLocale());
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2023-08-29 19:16:41 UTC (rev 13192)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2023-08-29 21:36:27 UTC (rev 13193)
@@ -522,17 +522,11 @@
}
/**
- * Searches a character sequence starting at a given index to see if the indexed character is the beginning of an
- * explicit token, returning that token if it is.
- * @param sequence The character sequence (usually a {@link String}) being searched.
- * @param start The index to the first character in {@code sequence} to be tested.
- * @return The matching token, if there is one, or null if not.
+ * Returns the explicit tokens, if any, for this orthography.
+ * @return The explicit tokens, if any, for this orthography.
*/
- public String findToken(final CharSequence sequence, final int start) {
- if (this.explicitTokens == null) {
- return null;
- }
- return this.explicitTokens.findToken(sequence, start);
+ public ExplicitTokens getExplicitTokens() {
+ return this.explicitTokens;
}
/**
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2023-08-29 19:16:41 UTC (rev 13192)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2023-08-29 21:36:27 UTC (rev 13193)
@@ -219,9 +219,10 @@
final String scriptString = attributes.getValue("script-iso-4char");
final String countryString = attributes.getValue("country-iso-3char");
final WritingSystem4a writingSystem = WritingSystem4a.find(languageString, scriptString, countryString);
+ final ExplicitTokens explicitTokens = this.currentOrthographyConfig.getExplicitTokens();
- final Class<?>[] parameterTypes = {WritingSystem4a.class};
- final Object[] parameters = {writingSystem};
+ final Class<?>[] parameterTypes = {WritingSystem4a.class, ExplicitTokens.class};
+ final Object[] parameters = {writingSystem, explicitTokens};
final Lexer lexer = instantiate(className, Lexer.class, parameterTypes, parameters);
if (lexer == null) {
return;
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-08-29 19:16:41 UTC (rev 13192)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-08-29 21:36:27 UTC (rev 13193)
@@ -30,6 +30,7 @@
import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.primitive.ObjectUtils;
+import org.foray.orthography.ExplicitTokens;
import org.foray.orthography.LexerJavaBreakIterator;
import org.foray.orthography.Orthography4a;
import org.foray.orthography.OrthographyServer4a;
@@ -355,7 +356,9 @@
*/
private void checkWords(final WritingSystem4a writingSystem) {
final Orthography4a orthography = writingSystem == null ? null : this.server.getOrthography(writingSystem);
- final Lexer lexer = orthography == null ? new LexerJavaBreakIterator(writingSystem) : orthography.getLexer();
+ final ExplicitTokens explicitTokens = orthography == null ? null : orthography.getExplicitTokens();
+ final Lexer lexer = orthography == null ? new LexerJavaBreakIterator(writingSystem, explicitTokens) :
+ orthography.getLexer();
final StringBuilder textAccumulator = getTextAccumulator();
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java 2023-08-29 19:16:41 UTC (rev 13192)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java 2023-08-29 21:36:27 UTC (rev 13193)
@@ -51,7 +51,7 @@
*/
@BeforeEach
public void setupTest() {
- this.out = new LexerIcu4jBreakIterator(WritingSystem4a.USA);
+ this.out = new LexerIcu4jBreakIterator(WritingSystem4a.USA, null);
}
@Override
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java 2023-08-29 19:16:41 UTC (rev 13192)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishJavaTests.java 2023-08-29 21:36:27 UTC (rev 13193)
@@ -45,7 +45,7 @@
*/
@BeforeEach
public void setupTest() {
- this.out = new LexerJavaBreakIterator(WritingSystem4a.USA);
+ this.out = new LexerJavaBreakIterator(WritingSystem4a.USA, null);
}
@Override
Modified: trunk/foray/foray-orthography/src/test/resources/orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/test/resources/orthography-config.xml 2023-08-29 19:16:41 UTC (rev 13192)
+++ trunk/foray/foray-orthography/src/test/resources/orthography-config.xml 2023-08-29 21:36:27 UTC (rev 13193)
@@ -125,56 +125,62 @@
</unparsed-hyphenation-patterns>
</hyphenation-patterns-resource>
- <configuration>
+ <orthography language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA">
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
<dictionary reference="org.foray.eng.Latn.ZZZ"/>
<hyphenation-patterns reference="hyph-patterns-eng"/>
<derivative-factories reference="derivatives-eng"/>
+ </orthography>
+
+ <orthography language-iso-3char="eng" script-iso-4char="Zyyy" country-iso-3char="USA">
<lexer class="org.foray.orthography.LexerJavaBreakIterator"
- language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
- <orthography language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
- <orthography language-iso-3char="eng" script-iso-4char="Zyyy" country-iso-3char="USA"/>
- </configuration>
+ language-iso-3char="eng" script-iso-4char="Zyyy" country-iso-3char="USA"/>
+ <dictionary reference="org.foray.eng.Latn.ZZZ"/>
+ <hyphenation-patterns reference="hyph-patterns-eng"/>
+ <derivative-factories reference="derivatives-eng"/>
+ </orthography>
- <configuration>
+ <orthography language-iso-3char="fin" script-iso-4char="Latn" country-iso-3char="FIN">
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="fin" script-iso-4char="Latn" country-iso-3char="FIN"/>
<hyphenation-patterns reference="hyph-patterns-fin"/>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
- <orthography language-iso-3char="fin" script-iso-4char="Latn" country-iso-3char="FIN"/>
- </configuration>
+ </orthography>
- <configuration>
+ <orthography language-iso-3char="hun" script-iso-4char="Latn" country-iso-3char="HUN">
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="hun" script-iso-4char="Latn" country-iso-3char="HUN"/>
<hyphenation-patterns reference="hyph-patterns-hun"/>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
- <orthography language-iso-3char="hun" script-iso-4char="Latn" country-iso-3char="HUN"/>
- </configuration>
+ </orthography>
- <configuration>
+ <orthography language-iso-3char="ita" script-iso-4char="Latn" country-iso-3char="ITA">
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="ita" script-iso-4char="Latn" country-iso-3char="ITA"/>
<hyphenation-patterns reference="hyph-patterns-ita"/>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
- <orthography language-iso-3char="ita" script-iso-4char="Latn" country-iso-3char="ITA"/>
- </configuration>
+ </orthography>
- <configuration>
+ <orthography language-iso-3char="pol" script-iso-4char="Latn" country-iso-3char="POL">
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="pol" script-iso-4char="Latn" country-iso-3char="POL"/>
<hyphenation-patterns reference="hyph-patterns-pol"/>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
- <orthography language-iso-3char="pol" script-iso-4char="Latn" country-iso-3char="POL"/>
- </configuration>
+ </orthography>
- <configuration>
+ <orthography language-iso-3char="por" script-iso-4char="Latn" country-iso-3char="PRT">
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="por" script-iso-4char="Latn" country-iso-3char="PRT"/>
<hyphenation-patterns reference="hyph-patterns-por"/>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
- <orthography language-iso-3char="por" script-iso-4char="Latn" country-iso-3char="PRT"/>
- </configuration>
+ </orthography>
- <configuration>
+ <orthography language-iso-3char="rus" script-iso-4char="Cyrl" country-iso-3char="RUS">
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="rus" script-iso-4char="Cyrl" country-iso-3char="RUS"/>
<hyphenation-patterns reference="hyph-patterns-rus"/>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
- <orthography language-iso-3char="rus" script-iso-4char="Cyrl" country-iso-3char="RUS"/>
- </configuration>
+ </orthography>
- <configuration>
+ <orthography language-iso-3char="spa" script-iso-4char="Latn" country-iso-3char="ESP">
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="spa" script-iso-4char="Latn" country-iso-3char="ESP"/>
<hyphenation-patterns reference="hyph-patterns-spa"/>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
- <orthography language-iso-3char="spa" script-iso-4char="Latn" country-iso-3char="ESP"/>
- </configuration>
+ </orthography>
</axsl-orthography-config>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-29 19:16:44
|
Revision: 13192
http://sourceforge.net/p/foray/code/13192
Author: victormote
Date: 2023-08-29 19:16:41 +0000 (Tue, 29 Aug 2023)
Log Message:
-----------
Rough-in code for handling explicit tokens.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java
Added Paths:
-----------
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/ExplicitTokensTests.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java 2023-08-29 14:58:19 UTC (rev 13191)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java 2023-08-29 19:16:41 UTC (rev 13192)
@@ -28,9 +28,13 @@
package org.foray.orthography;
-import java.util.HashSet;
-import java.util.Set;
+import org.foray.primitive.StringUtils;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
/**
* A collection of strings that should be treated as explicit tokens by a tokenizer/lexer.
*/
@@ -37,45 +41,66 @@
public class ExplicitTokens {
/** The initial size of the token collection. */
- private static final int INITIAL_MAP_CAPACITY = 50;
+ private static final int INITIAL_MAP_CAPACITY = 10;
/** The collection of explicit tokens. */
- private Set<String> tokens = new HashSet<String>(INITIAL_MAP_CAPACITY);
- /* TODO: Convert this to a TernaryTree. */
+ private List<Pattern> tokens = new ArrayList<Pattern>(INITIAL_MAP_CAPACITY);
- /** The size, in chars, of the largest member of {@link #tokens}. */
- private int maxTokenSize;
-
/**
* Adds an explicit token to this orthography.
* @param token The explicit token to be added.
*/
public void addToken(final String token) {
- if (token.length() > this.maxTokenSize) {
- this.maxTokenSize = token.length();
- }
- this.tokens.add(token);
+ this.tokens.add(Pattern.compile(token));
}
/**
- * Searches a character sequence starting at a given index to see if the indexed character is the beginning of an
+ * Parses a character sequence starting at a given index to see if the indexed character is the beginning of an
* explicit token, returning that token if it is.
* @param sequence The character sequence (usually a {@link String}) being searched.
* @param start The index to the first character in {@code sequence} to be tested.
- * @return The matching token, if there is one, or null if not.
+ * @param length The number of chars in {@code sequence} to be parsed.
+ * @return The list of the parsed tokens, which is never null.
+ * Even-number indexes contain unparsed content before/between found tokens.
+ * Odd-numbered indexes contain any parsed tokens themselves.
+ * Index 0 will always contain any text found before the first token, and can have length zero.
*/
- public String findToken(final CharSequence sequence, final int start) {
- int index = start + 1;
- while (index < sequence.length()
- && index - start < this.maxTokenSize) {
- /* TODO: This String creation horrible and should only be used for proof of concept. */
- final String testString = sequence.subSequence(start, index).toString();
- if (this.tokens.contains(testString)) {
- return testString;
+ public List<CharSequence> tokenize(final CharSequence sequence, final int start, final int length) {
+
+
+ /* TODO: UGLY, UGLY ALERT. This will fail miserably for more than one Pattern at the moment. */
+
+
+
+
+ /* TODO: This is horribly inefficient, used only for proof-of-concept. Rewrite. */
+ final CharSequence input = sequence.subSequence(start, start + length);
+ int nextUnparsed = 0;
+ final List<CharSequence> result = new ArrayList<CharSequence>();
+ for (int index = 0; index < tokens.size(); index ++) {
+ final Pattern pattern = tokens.get(index);
+ final Matcher matcher = pattern.matcher(input);
+ while (matcher.find()) {
+ final int matchStart = matcher.start();
+ final int matchEnd = matcher.end();
+ if (matchStart == 0
+ && nextUnparsed == 0) {
+ /* We are at the start. Create empty token for "between" text. */
+ result.add(StringUtils.EMPTY_STRING);
+ } else {
+ final CharSequence between = input.subSequence(nextUnparsed, matchStart);
+ result.add(between);
+ }
+ final CharSequence matched = input.subSequence(matchStart, matchEnd);
+ result.add(matched);
+ nextUnparsed = matchEnd;
}
- index ++;
+ if (nextUnparsed <= input.length()) {
+ final CharSequence between = input.subSequence(nextUnparsed, input.length());
+ result.add(between);
+ }
}
- return null;
+ return result;
}
}
Added: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/ExplicitTokensTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/ExplicitTokensTests.java (rev 0)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/ExplicitTokensTests.java 2023-08-29 19:16:41 UTC (rev 13192)
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2023 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+
+/**
+ * Tests of {@link ExplicitTokens}.
+ */
+public class ExplicitTokensTests {
+
+
+ /**
+ * Test to ensure that when there are no explicit tokens in the input, the output matches the input.
+ */
+ @Test
+ public void testNoTokens() {
+ final ExplicitTokens out = new ExplicitTokens();
+ out.addToken("i\\.e\\.");
+ final String testString = "There is a tide in the affairs of men.";
+
+ final List<CharSequence> result = out.tokenize(testString, 0, testString.length());
+ assertEquals(1, result.size());
+ assertEquals("There is a tide in the affairs of men.", result.get(0));
+ }
+
+ /**
+ * Test to ensure that when there are no explicit tokens in the input, the output matches the input.
+ */
+ @Test
+ public void testOneToken() {
+ final ExplicitTokens out = new ExplicitTokens();
+ out.addToken("i\\.e\\.");
+ final String testString = "To be, (i.e. to exist,) or not to be.";
+
+ final List<CharSequence> result = out.tokenize(testString, 0, testString.length());
+ assertEquals(3, result.size());
+ assertEquals("To be, (", result.get(0));
+ assertEquals("i.e.", result.get(1));
+ assertEquals(" to exist,) or not to be.", result.get(2));
+ }
+
+}
Property changes on: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/ExplicitTokensTests.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-29 14:58:21
|
Revision: 13191
http://sourceforge.net/p/foray/code/13191
Author: victormote
Date: 2023-08-29 14:58:19 +0000 (Tue, 29 Aug 2023)
Log Message:
-----------
Normal dictionary editing.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-08-29 14:55:32 UTC (rev 13190)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-08-29 14:58:19 UTC (rev 13191)
@@ -59,7 +59,6 @@
<w><t>&c</t><abbrev referenced-word="etc., et cetera"/></w>
<w><t>a</t></w>
<w><t>a-a</t></w>
-<w><t>aaaaaaaaaa</t><noun><convertible-to-possessive/></noun><comment>Pseudo-word used as a replacement for omitted word elements.</comment></w>
<w><t>Aa-chen</t></w>
<w><t>Aal-borg</t></w>
<w><t>Aa-le-sund</t></w>
@@ -66651,7 +66650,7 @@
<w><t>Ham-heung</t></w>
<w><t>Ham-hung</t></w>
<phrase><t>Ha-mil-car Bar-ca</t></phrase>
-<w><t>Ham-il-ton</t></w>
+<w><t>Ham-il-ton</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Ham-il-to-ni-an</t></w>
<w><t>Ham-il-to-ni-an-ism</t></w>
<w><t>Ham-ite</t></w>
@@ -81111,7 +81110,7 @@
<w><t>je-fe</t></w>
<w><t>Jeff</t></w>
<w><t>Jef-fers</t></w>
-<w><t>Jef-fer-son</t></w>
+<w><t>Jef-fer-son</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Jef-fer-son Cit-y</t></phrase>
<w><t>Jef-fer-so-ni-an</t></w>
<w><t>Jef-fer-so-ni-an-ism</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2023-08-29 14:55:32 UTC (rev 13190)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2023-08-29 14:58:19 UTC (rev 13191)
@@ -23,6 +23,8 @@
<w><t>bo-na</t></w>
<w><t>cap-tan-dum</t></w>
<w><t>ca-sus</t></w>
+<w><t>cent</t><abbrev referenced-word="centum"/></w>
+<w><t>cen-tum</t><comment>100, as in "per centum" or "percent."</comment></w>
<w><t>Christ</t></w>
<w><t>cir-ca</t></w>
<w><t>con</t><abbrev referenced-word="contradicente"/></w>
@@ -70,8 +72,8 @@
<w><t>per</t></w>
<w><t>per-son-ae</t></w>
<w><t>pe-ti-tio</t></w>
+<w><t>post</t></w>
<w><t>prin-ci-pii</t></w>
-<w><t>post</t></w>
<w><t>pro</t></w>
<w><t>prop-a-gan-da</t></w>
<w><t>quad-ru-plex</t></w>
@@ -87,8 +89,8 @@
<w><t>tes-te</t><comment>1. the witnessing or concluding clause of an
instrument (as a writ). 2. witness - used archaicly to indicate that what
follows is named as authority for what precedes.</comment></w>
+<w><t>to-to</t></w>
<w><t>trans-eunte</t></w>
-<w><t>to-to</t></w>
<w><t>ul-ti-ma</t></w>
<w><t>ul-tra</t></w>
<w><t>ver-ba-tim</t><adjective/><adverb/></w>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-29 14:55:34
|
Revision: 13190
http://sourceforge.net/p/foray/code/13190
Author: victormote
Date: 2023-08-29 14:55:32 +0000 (Tue, 29 Aug 2023)
Log Message:
-----------
Treat "Note" element as an empty string for spell-check purposes.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-08-29 00:33:09 UTC (rev 13189)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-08-29 14:55:32 UTC (rev 13190)
@@ -184,6 +184,7 @@
textElementMap.put("OmittedWord", "omitted");
textElementMap.put("PageRef", "999");
textElementMap.put("Roman", "III");
+ textElementMap.put("Note", StringUtils.EMPTY_STRING);
}
/** The list of ad-hoc dictionaries, usually parsed from the command-line. */
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-29 00:33:12
|
Revision: 13189
http://sourceforge.net/p/foray/code/13189
Author: victormote
Date: 2023-08-29 00:33:09 +0000 (Tue, 29 Aug 2023)
Log Message:
-----------
Normal dictionary editing.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml 2023-08-28 11:21:58 UTC (rev 13188)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml 2023-08-29 00:33:09 UTC (rev 13189)
@@ -18,6 +18,7 @@
-->
<w><t>be-hove</t><verb><regular-root/></verb></w>
+<w><t>cen-tre</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>co=la-bour-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ful-fil</t><verb/></w>
<w><t>ful-fil-ment</t><noun/></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml 2023-08-28 11:21:58 UTC (rev 13188)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml 2023-08-29 00:33:09 UTC (rev 13189)
@@ -18,6 +18,7 @@
-->
<w><t>be-hoove</t><verb><regular-root/></verb></w>
+<w><t>cen-ter</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>co=la-bor-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ful-fill</t><verb><regular-root/></verb></w>
<w><t>ful-fill-ment</t><noun/></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-08-28 11:21:58 UTC (rev 13188)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-08-29 00:33:09 UTC (rev 13189)
@@ -251,7 +251,7 @@
<w><t>a-bet-ted</t></w>
<w><t>a-bet-ter</t></w>
<w><t>a-bet-ting</t></w>
-<w><t>a-bet-tor</t></w>
+<w><t>a-bet-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>a-bey-ance</t></w>
<w><t>a-bey-ant</t></w>
<w><t>ab-far-ad</t></w>
@@ -491,7 +491,7 @@
<w><t>a-breast</t></w>
<w><t>a-bri</t></w>
<w><t>a-bridg-a-ble</t></w>
-<w><t>a-bridge</t></w>
+<w><t>a-bridge</t><verb><regular-root/></verb></w>
<w><t>a-bridge-a-ble</t></w>
<w><t>a-bridged</t></w>
<w><t>a-bridge-ment</t></w>
@@ -1015,7 +1015,7 @@
<w><t>ac-cu-sa-to-ry</t></w>
<w><t>ac-cuse</t><verb><regular-root/></verb></w>
<w><t>ac-cused</t></w>
-<w><t>ac-cus-er</t></w>
+<w><t>ac-cus-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ac-cus-ing</t></w>
<w><t>ac-cus-ing-ly</t></w>
<w><t>ac-cus-tom</t></w>
@@ -1130,6 +1130,7 @@
<w><t>A-chad</t></w>
<w><t>A-chae-a</t></w>
<w><t>A-chae-an</t></w>
+<w><t>A-chæ-an</t></w>
<phrase><t>A-chae-an League</t></phrase>
<w><t>A-chae-me-nes</t></w>
<w><t>Ach-ae-me-ni-an</t></w>
@@ -1913,7 +1914,7 @@
<w><t>ad-join-ing</t></w>
<w><t>ad-joint</t></w>
<w><t>ad-journ</t><verb><regular-root/></verb></w>
-<w><t>ad-journ-ment</t></w>
+<w><t>ad-journ-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>adjt</t></w>
<w><t>ad-judge</t></w>
<w><t>ad-judged</t></w>
@@ -2243,7 +2244,7 @@
<w><t>ad-verse-ness</t></w>
<phrase><t>ad-verse pos-ses-sion</t></phrase>
<w><t>ad-ver-si-ty</t></w>
-<w><t>ad-vert</t></w>
+<w><t>ad-vert</t><verb><regular-root/></verb></w>
<w><t>ad-vert-ence</t></w>
<w><t>ad-vert-en-cy</t></w>
<w><t>ad-vert-ent</t></w>
@@ -2605,7 +2606,7 @@
<w><t>af-faire</t></w>
<phrase><t>af-faire d’hon-neur</t></phrase>
<w><t>af-fairs</t></w>
-<w><t>af-fect</t></w>
+<w><t>af-fect</t><verb><regular-root/></verb></w>
<w><t>af-fec-ta-tion</t></w>
<w><t>af-fect-ed</t></w>
<w><t>af-fect-ed-ly</t></w>
@@ -6525,7 +6526,7 @@
<phrase><t>Anne of Aus-tri-a</t></phrase>
<phrase><t>Anne of Bo-he-mi-a</t></phrase>
<w><t>An-nette</t></w>
-<w><t>an-nex</t></w>
+<w><t>an-nex</t><verb><regular-root/></verb></w>
<w><t>an-nex-a-ble</t></w>
<w><t>an-nex-a-tion</t></w>
<w><t>an-nex-a-tion-al</t></w>
@@ -8545,7 +8546,7 @@
<w><t>ap-poin-tee</t></w>
<w><t>ap-point-er</t></w>
<w><t>ap-poin-tive</t></w>
-<w><t>ap-point-ment</t></w>
+<w><t>ap-point-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ap-poin-tor</t></w>
<w><t>Ap-po-mat-tox</t></w>
<w><t>ap-por-tion</t><verb><regular-root/></verb></w>
@@ -9380,7 +9381,7 @@
<w><t>A-ris-ti-des</t></w>
<w><t>Ar-i-stil-lus</t></w>
<w><t>Ar-is-tip-pus</t></w>
-<w><t>ar-is-toc-ra-cy</t></w>
+<w><t>ar-is-toc-ra-cy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>a-ris-to-crat</t></w>
<w><t>a-ris-to-crat-ic</t></w>
<w><t>a-ris-to-crat-i-cal</t></w>
@@ -9693,7 +9694,7 @@
<w><t>ar-ryth-mi-cal-ly</t></w>
<w><t>arse</t></w>
<phrase><t>arse lick-er</t></phrase>
-<w><t>ar-se-nal</t></w>
+<w><t>ar-se-nal</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ar-se-nate</t></w>
<w><t>ar-se-nic</t></w>
<phrase><t>ar-se-nic ac-id</t></phrase>
@@ -10279,7 +10280,7 @@
<phrase><t>as-sem-bly line</t></phrase>
<w><t>as-sem-bly-man</t></w>
<w><t>As-sen</t></w>
-<w><t>as-sent</t></w>
+<w><t>as-sent</t><noun><singular/></noun><verb><regular-root/></verb></w>
<w><t>as-sen-ta-tion</t></w>
<w><t>as-sent-er</t></w>
<w><t>as-sen-ti-ent</t></w>
@@ -11188,7 +11189,7 @@
<w><t>Au-gier</t></w>
<w><t>au-gite</t></w>
<w><t>au-git-ic</t></w>
-<w><t>aug-ment</t></w>
+<w><t>aug-ment</t><verb><regular-root/></verb></w>
<w><t>aug-ment-a-ble</t></w>
<w><t>aug-men-ta-tion</t></w>
<w><t>aug-ment-a-tive</t></w>
@@ -11430,7 +11431,7 @@
<w><t>au-thor-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>au-thor-iz-a-ble</t></w>
<w><t>au-thor-i-za-tion</t></w>
-<w><t>au-thor-ize</t></w>
+<w><t>au-thor-ize</t><verb><regular-root/></verb></w>
<w><t>au-thor-ized</t></w>
<phrase><t>Au-thor-ized Ver-sion</t></phrase>
<w><t>au-thor-iz-er</t></w>
@@ -11977,6 +11978,7 @@
<w><t>ax-o-plasm</t></w>
<w><t>ax-seed</t></w>
<w><t>Ax-um</t></w>
+<w><t>ay</t><noun><pluralizable/></noun></w>
<w><t>A-ya-cu-cho</t></w>
<w><t>a-yah</t></w>
<w><t>a-ya-huas-ca</t></w>
@@ -12698,7 +12700,7 @@
<w><t>Bal-a-kla-va</t></w>
<w><t>bal-a-lai-ka</t></w>
<w><t>Bal-ance</t></w>
-<w><t>bal-ance</t></w>
+<w><t>bal-ance</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>bal-ance-a-ble</t></w>
<w><t>bal-anced</t></w>
<phrase><t>bal-ance of na-ture</t></phrase>
@@ -12891,7 +12893,7 @@
<phrase><t>bal-loon sleeve</t></phrase>
<phrase><t>bal-loon tyre</t></phrase>
<phrase><t>bal-loon vine</t></phrase>
-<w><t>bal-lot</t></w>
+<w><t>bal-lot</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>bal-lo-tade</t></w>
<phrase><t>bal-lot box</t></phrase>
<w><t>bal-lot-ed</t></w>
@@ -14744,7 +14746,7 @@
<w><t>Beh-an</t></w>
<w><t>Be-han</t></w>
<w><t>Be-har</t></w>
-<w><t>be-have</t></w>
+<w><t>be-have</t><verb><regular-root/></verb></w>
<w><t>be-haved</t></w>
<w><t>be-hav-ior</t></w>
<w><t>be-hav-ior-al</t></w>
@@ -16790,7 +16792,7 @@
<w><t>blab-ber-mouth</t></w>
<w><t>blab-bing</t></w>
<w><t>Bla-cher</t></w>
-<w><t>black</t><adjective><extensible/></adjective></w>
+<w><t>black</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective><extensible/></adjective></w>
<w><t>Black</t></w>
<w><t>black-a-cre</t></w>
<w><t>black-a-moor</t></w>
@@ -17177,7 +17179,7 @@
<w><t>blimp</t></w>
<w><t>bli-my</t></w>
<w><t>blin</t></w>
-<w><t>blind</t></w>
+<w><t>blind</t><verb><regular-root/></verb><adjective></adjective></w>
<w><t>blind-age</t></w>
<phrase><t>blind al-ley</t></phrase>
<w><t>blind-cat</t></w>
@@ -17185,7 +17187,7 @@
<w><t>blin-ders</t></w>
<w><t>blind-fish</t></w>
<w><t>blind-fish-es</t></w>
-<w><t>blind-fold</t></w>
+<w><t>blind-fold</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>blind-fold-ed-ly</t></w>
<w><t>blind-fold-ed-ness</t></w>
<w><t>blind-fold-er</t></w>
@@ -18417,7 +18419,7 @@
<w><t>bo-ron-ic</t></w>
<phrase><t>bo-ron ni-tride</t></phrase>
<w><t>bo-ro-sil-i-cate</t></w>
-<w><t>bor-ough</t></w>
+<w><t>bor-ough</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>bor-ough=Eng-lish</t></w>
<w><t>bor-ras-ca</t></w>
<w><t>bor-rel-i-a</t></w>
@@ -20514,10 +20516,10 @@
<w><t>buhr</t></w>
<w><t>buhr-stone</t></w>
<w><t>bu-i-bu-i</t></w>
-<w><t>build</t></w>
+<w><t>build</t><verb><regular-root/></verb></w>
<w><t>build-a-ble</t></w>
<w><t>build-ed</t></w>
-<w><t>build-er</t></w>
+<w><t>build-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>build-ing</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>build-ing-less</t></w>
<phrase><t>build-ing line</t></phrase>
@@ -20663,7 +20665,7 @@
<w><t>Bü-low</t></w>
<w><t>bul-rush</t></w>
<w><t>Bult-mann</t></w>
-<w><t>bul-wark</t></w>
+<w><t>bul-wark</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Bul-wer</t></w>
<w><t>Bul-wer=Lyt-ton</t></w>
<w><t>bum</t></w>
@@ -22760,7 +22762,7 @@
<w><t>can-tle</t></w>
<w><t>can-tling</t></w>
<w><t>can-to</t></w>
-<w><t>can-ton</t></w>
+<w><t>can-ton</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Can-ton</t></w>
<w><t>can-ton-al</t></w>
<w><t>can-ton-al-ism</t></w>
@@ -22822,7 +22824,7 @@
<w><t>ca-pac-i-tive</t></w>
<w><t>ca-pac-i-tive-ly</t></w>
<w><t>ca-pac-i-tor</t></w>
-<w><t>ca-pac-i-ty</t></w>
+<w><t>ca-pac-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Ca-pa-ne-us</t></w>
<w><t>ca-par-i-son</t></w>
<w><t>ca-pa-ta-ces</t></w>
@@ -24791,7 +24793,7 @@
<w><t>cell</t></w>
<w><t>cel-la</t></w>
<w><t>cel-lae</t></w>
-<w><t>cel-lar</t></w>
+<w><t>cel-lar</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cel-lar-age</t></w>
<w><t>cel-lar-er</t></w>
<w><t>cel-lar-et</t></w>
@@ -24924,7 +24926,6 @@
<w><t>cen-ten-i-o-na-lis</t></w>
<w><t>cen-ten-ni-al</t></w>
<w><t>cen-ten-ni-al-ly</t></w>
-<w><t>cen-ter</t></w>
<w><t>Cen-ter</t></w>
<w><t>cen-ter-a-ble</t></w>
<w><t>cen-ter-board</t></w>
@@ -28526,7 +28527,7 @@
<phrase><t>Clau-di-us II</t></phrase>
<w><t>claught</t></w>
<w><t>claus-al</t></w>
-<w><t>clause</t></w>
+<w><t>clause</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Clau-se-witz</t></w>
<w><t>Clau-si-us</t></w>
<w><t>claus-thal-ite</t></w>
@@ -28735,7 +28736,7 @@
<w><t>cler-i-cals</t></w>
<w><t>cler-i-hew</t></w>
<w><t>cler-i-sy</t></w>
-<w><t>clerk</t></w>
+<w><t>clerk</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>clerk-ess</t></w>
<w><t>clerk-ish</t></w>
<w><t>clerk-li-er</t></w>
@@ -29812,7 +29813,7 @@
<w><t>coin</t></w>
<w><t>coin-a-ble</t></w>
<w><t>coin-age</t></w>
-<w><t>co-in-cide</t></w>
+<w><t>co-in-cide</t><verb><regular-root/></verb></w>
<w><t>co-in-cid-ed</t></w>
<w><t>co-in-ci-dence</t></w>
<w><t>co-in-ci-dent</t></w>
@@ -29994,7 +29995,7 @@
<w><t>col-la-tor</t></w>
<w><t>col-league</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>col-league-ship</t></w>
-<w><t>col-lect</t></w>
+<w><t>col-lect</t><verb><regular-root/></verb></w>
<w><t>col-lect-a-bil-i-ty</t></w>
<w><t>col-lect-a-ble</t></w>
<w><t>col-lec-ta-ne-a</t></w>
@@ -30734,7 +30735,7 @@
<phrase><t>Com-mon-wealth Day</t></phrase>
<w><t>com-mo-ran-cy</t></w>
<w><t>com-mo-rant</t></w>
-<w><t>com-mo-tion</t></w>
+<w><t>com-mo-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>com-mo-tion-al</t></w>
<w><t>com-mo-tive</t></w>
<w><t>com-move</t></w>
@@ -31291,7 +31292,7 @@
<w><t>con-cel-e-bra-tion</t></w>
<w><t>con-cent</t></w>
<w><t>con-cen-ter</t></w>
-<w><t>con-cen-trate</t></w>
+<w><t>con-cen-trate</t><verb><regular-root/></verb></w>
<w><t>con-cen-trat-ed</t></w>
<w><t>con-cen-trat-ing</t></w>
<w><t>con-cen-tra-tion</t></w>
@@ -31325,7 +31326,7 @@
<w><t>con-cep-tu-al-ized</t></w>
<w><t>con-cep-tu-al-iz-ing</t></w>
<w><t>con-cep-tu-al-ly</t></w>
-<w><t>con-cern</t></w>
+<w><t>con-cern</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-cerned</t></w>
<w><t>con-cern-ed-ly</t></w>
<w><t>con-cern-ed-ness</t></w>
@@ -31516,7 +31517,7 @@
<w><t>con-den-ser-y</t></w>
<w><t>con-den-si-ble</t></w>
<w><t>con-dens-ing</t></w>
-<w><t>con-de-scend</t></w>
+<w><t>con-de-scend</t><verb><regular-root/></verb></w>
<w><t>con-de-scend-ence</t></w>
<w><t>con-des-cend-ent</t></w>
<w><t>con-des-cend-er</t></w>
@@ -31639,10 +31640,10 @@
<w><t>Con-fed-er-a-cy</t></w>
<w><t>con-fed-er-al-ist</t></w>
<w><t>Con-fed-er-ate</t></w>
-<w><t>con-fed-er-ate</t></w>
+<w><t>con-fed-er-ate</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>con-fed-er-at-ed</t></w>
<w><t>con-fed-er-at-ing</t></w>
-<w><t>con-fed-er-a-tion</t></w>
+<w><t>con-fed-er-a-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Con-fed-er-a-tion</t></w>
<w><t>con-fed-er-a-tion-ism</t></w>
<w><t>con-fed-er-a-tion-ist</t></w>
@@ -31777,7 +31778,7 @@
<w><t>con-fra-ter-ni-ty</t></w>
<w><t>con-frere</t></w>
<w><t>con-fric-a-men-tum</t></w>
-<w><t>con-front</t></w>
+<w><t>con-front</t><verb><regular-root/></verb></w>
<w><t>con-fron-ta-tion</t></w>
<w><t>con-front-er</t></w>
<w><t>Con-fu-cian</t></w>
@@ -31794,7 +31795,7 @@
<w><t>con-fus-ed-ness</t></w>
<w><t>con-fus-ing</t></w>
<w><t>con-fus-ing-ly</t></w>
-<w><t>con-fu-sion</t></w>
+<w><t>con-fu-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-fu-sion-al</t></w>
<w><t>con-fut-a-ble</t></w>
<w><t>con-fu-ta-tion</t></w>
@@ -32382,7 +32383,7 @@
<w><t>Con-sue-lo</t></w>
<w><t>con-sue-tude</t></w>
<w><t>con-sue-tu-di-nar-y</t></w>
-<w><t>con-sul</t></w>
+<w><t>con-sul</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-su-lar</t></w>
<phrase><t>con-su-lar a-gent</t></phrase>
<w><t>con-su-late</t></w>
@@ -32733,8 +32734,7 @@
<w><t>con-tre=par-tie</t></w>
<w><t>con-tre-temps</t></w>
<w><t>con-trib-ut-a-ble</t></w>
-<w><t>con-trib-ute</t></w>
-<w><t>con-tri-bute</t></w>
+<w><t>con-trib-ute</t><verb><regular-root/></verb></w>
<w><t>con-tri-but-ed</t></w>
<w><t>con-tri-but-ing</t></w>
<w><t>con-tri-bu-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -32783,8 +32783,8 @@
<w><t>con-tro-ver-sial-ism</t></w>
<w><t>con-tro-ver-sial-ist</t></w>
<w><t>con-tro-ver-sial-ly</t></w>
-<w><t>con-tro-ver-sy</t></w>
-<w><t>con-tro-vert</t></w>
+<w><t>con-tro-ver-sy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>con-tro-vert</t><verb><regular-root/></verb></w>
<w><t>con-tro-vert-er</t></w>
<w><t>con-tro-vert-i-ble</t></w>
<w><t>con-tro-vert-i-bly</t></w>
@@ -33649,7 +33649,7 @@
<w><t>cor-po-rate</t></w>
<w><t>cor-po-rate-ly</t></w>
<w><t>cor-po-rate-ness</t></w>
-<w><t>cor-po-ra-tion</t></w>
+<w><t>cor-po-ra-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cor-po-ra-tion-al</t></w>
<w><t>cor-po-rat-ism</t></w>
<w><t>cor-po-rat-ist</t></w>
@@ -34348,7 +34348,7 @@
<w><t>coun-ter-shad-ing</t></w>
<w><t>coun-ter-shaft</t></w>
<w><t>coun-ter-shaft-ing</t></w>
-<w><t>coun-ter-sign</t></w>
+<w><t>coun-ter-sign</t><verb><regular-root/></verb></w>
<w><t>coun-ter-sig-na-ture</t></w>
<w><t>coun-ter-sink</t></w>
<w><t>coun-ter-sink-ing</t></w>
@@ -35012,7 +35012,7 @@
<w><t>cream-wove</t></w>
<w><t>cream-y</t></w>
<w><t>cre-ance</t></w>
-<w><t>crease</t></w>
+<w><t>crease</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>crease-less</t></w>
<w><t>crease=re-sis-tant</t></w>
<w><t>creas-i-er</t></w>
@@ -36469,7 +36469,7 @@
<w><t>cur-dle</t></w>
<w><t>cur-dler</t></w>
<w><t>curd-y</t></w>
-<w><t>cure</t></w>
+<w><t>cure</t><verb><regular-root/></verb></w>
<w><t>cu-ré</t></w>
<w><t>cure-less</t></w>
<w><t>cure-less-ly</t></w>
@@ -37956,7 +37956,7 @@
<w><t>daze</t></w>
<w><t>daz-ed-ly</t></w>
<w><t>daz-ing</t></w>
-<w><t>daz-zle</t></w>
+<w><t>daz-zle</t><verb><regular-root/></verb></w>
<w><t>daz-zler</t></w>
<w><t>daz-zling-ly</t></w>
<w><t>DBE</t></w>
@@ -38197,7 +38197,7 @@
<w><t>debt</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>debt-less</t></w>
<phrase><t>debt of hon-or</t></phrase>
-<w><t>debt-or</t></w>
+<w><t>debt-or</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-bug</t></w>
<w><t>de-bugged</t></w>
<w><t>de-bug-ging</t></w>
@@ -38646,7 +38646,7 @@
<w><t>de-crease</t></w>
<w><t>de-creas-ing</t></w>
<w><t>de-creas-ing-ly</t></w>
-<w><t>de-cree</t></w>
+<w><t>de-cree</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>de-cree ab-so-lute</t></phrase>
<w><t>de-creed</t></w>
<w><t>de-cree-ing</t></w>
@@ -38719,7 +38719,7 @@
<w><t>de-do-lo-mit-ize</t></w>
<w><t>de-do-lo-mit-ized</t></w>
<w><t>de-do-lo-mit-iz-ing</t></w>
-<w><t>de-duce</t></w>
+<w><t>de-duce</t><verb><regular-root/></verb></w>
<w><t>de-duced</t></w>
<w><t>de-duc-i-bil-i-ty</t></w>
<w><t>de-duc-i-ble</t></w>
@@ -38954,7 +38954,7 @@
<phrase><t>de-fla-tion-ar-y gap</t></phrase>
<w><t>de-fla-tion-ist</t></w>
<w><t>de-fla-tor</t></w>
-<w><t>de-flect</t></w>
+<w><t>de-flect</t><verb><regular-root/></verb></w>
<w><t>de-flect-a-ble</t></w>
<w><t>de-flect-ed</t></w>
<w><t>de-flec-tion</t></w>
@@ -39003,7 +39003,7 @@
<w><t>de-fraud</t></w>
<w><t>de-frau-da-tion</t></w>
<w><t>de-fraud-er</t></w>
-<w><t>de-fray</t></w>
+<w><t>de-fray</t><verb><regular-root/></verb></w>
<w><t>de-fray-a-ble</t></w>
<w><t>de-fray-al</t></w>
<w><t>de-fray-er</t></w>
@@ -39275,7 +39275,7 @@
<w><t>del-e-gate</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>del-e-gat-ed</t></w>
<w><t>del-e-gat-ing</t></w>
-<w><t>del-e-ga-tion</t></w>
+<w><t>del-e-ga-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>del-e-git</t></w>
<phrase><t>de Les-seps</t></phrase>
<w><t>de-lete</t></w>
@@ -39467,12 +39467,12 @@
<w><t>de-mag-net-ized</t></w>
<w><t>de-mag-net-iz-er</t></w>
<w><t>de-mag-net-iz-ing</t></w>
-<w><t>dem-a-gog</t></w>
+<w><t>dem-a-gog</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dem-a-gog-ic</t></w>
<w><t>dem-a-gog-i-cal</t></w>
<w><t>dem-a-gog-i-cal-ly</t></w>
<w><t>dem-a-gog-ism</t></w>
-<w><t>dem-a-gogue</t></w>
+<w><t>dem-a-gogue</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dem-a-gogu-er-y</t></w>
<w><t>dem-a-gogu-ism</t></w>
<w><t>dem-a-gog-y</t></w>
@@ -39608,7 +39608,7 @@
<w><t>de-mo-bi-lized</t></w>
<w><t>de-mo-bi-liz-ing</t></w>
<w><t>De-moc-o-on</t></w>
-<w><t>de-moc-ra-cy</t></w>
+<w><t>de-moc-ra-cy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dem-o-crat</t></w>
<w><t>Dem-o-crat</t></w>
<w><t>dem-o-crat-ic</t></w>
@@ -40065,7 +40065,7 @@
<w><t>de-pend-a-bly</t></w>
<w><t>de-pend-ance</t></w>
<w><t>de-pend-an-cy</t></w>
-<w><t>de-pend-ant</t></w>
+<w><t>de-pend-ant</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-pend-ant-ly</t></w>
<w><t>de-pend-ence</t></w>
<w><t>de-pend-en-cy</t></w>
@@ -40168,7 +40168,7 @@
<phrase><t>de-pos-it ac-count</t></phrase>
<w><t>de-pos-i-tar-ies</t></w>
<w><t>de-pos-i-tar-y</t></w>
-<w><t>dep-o-si-tion</t></w>
+<w><t>dep-o-si-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Dep-o-si-tion</t></w>
<w><t>dep-o-si-tion-al</t></w>
<w><t>de-pos-i-tor</t></w>
@@ -40321,7 +40321,7 @@
<w><t>de-riv-a-tive</t></w>
<w><t>de-riv-a-tive-ly</t></w>
<w><t>de-riv-a-tive-ness</t></w>
-<w><t>de-rive</t></w>
+<w><t>de-rive</t><verb><regular-root/></verb></w>
<w><t>de-rived</t></w>
<phrase><t>de-rived u-nit</t></phrase>
<w><t>de-riv-er</t></w>
@@ -40454,7 +40454,7 @@
<w><t>de-scrib-er</t></w>
<w><t>de-scried</t></w>
<w><t>de-scri-er</t></w>
-<w><t>de-scrip-tion</t></w>
+<w><t>de-scrip-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-scrip-tive</t></w>
<phrase><t>de-scrip-tive ge-om-e-try</t></phrase>
<phrase><t>de-scrip-tive lin-guis-tics</t></phrase>
@@ -40659,7 +40659,7 @@
<w><t>de-spond-ent-ly</t></w>
<w><t>de-spond-er</t></w>
<w><t>de-spond-ing-ly</t></w>
-<w><t>des-pot</t></w>
+<w><t>des-pot</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>des-pot-ic</t></w>
<w><t>des-pot-i-cal</t></w>
<w><t>des-pot-i-cal-ly</t></w>
@@ -40765,7 +40765,7 @@
<w><t>de-tach-ed-ness</t></w>
<w><t>de-tach-er</t></w>
<w><t>de-tach-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>de-tail</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>de-tail</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>de-tail draw-ing</t></phrase>
<w><t>de-tailed</t></w>
<w><t>de-tailed-ly</t></w>
@@ -40811,7 +40811,7 @@
<w><t>de-ter-mi-nate-ly</t></w>
<w><t>de-ter-mi-nate-ness</t></w>
<w><t>de-ter-mi-nat-ing</t></w>
-<w><t>de-ter-mi-na-tion</t></w>
+<w><t>de-ter-mi-na-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-ter-mi-na-tive</t></w>
<w><t>de-ter-mi-na-tive-ly</t></w>
<w><t>de-ter-mi-na-tive-ness</t></w>
@@ -41067,7 +41067,7 @@
<w><t>de-vol-a-til-iz-ing</t></w>
<w><t>dev-o-lu-tion</t></w>
<w><t>de-vo-lu-tion</t></w>
-<w><t>de-volve</t></w>
+<w><t>de-volve</t><verb><regular-root/></verb></w>
<w><t>de-volved</t></w>
<w><t>de-volve-ment</t></w>
<w><t>de-volv-ing</t></w>
@@ -41627,7 +41627,7 @@
<w><t>dic-tat-ing-ly</t></w>
<w><t>dic-ta-tion</t></w>
<w><t>dic-ta-tion-al</t></w>
-<w><t>dic-ta-tor</t></w>
+<w><t>dic-ta-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dic-ta-tor-i-al</t></w>
<w><t>dic-ta-to-ri-al</t></w>
<w><t>dic-ta-to-ri-al-ly</t></w>
@@ -43098,7 +43098,7 @@
<w><t>dis-mast-ment</t></w>
<w><t>dis-may</t><noun/><verb><regular-root/></verb></w>
<w><t>dis-mayed-ness</t></w>
-<w><t>dis-mem-ber</t></w>
+<w><t>dis-mem-ber</t><verb><regular-root/></verb></w>
<w><t>dis-mem-ber-er</t></w>
<w><t>dis-mem-berment</t></w>
<w><t>dis-miss</t><verb><regular-root/></verb></w>
@@ -43325,7 +43325,7 @@
<w><t>dis-qual-i-fi-a-ble</t></w>
<w><t>dis-qual-i-fi-ca-tion</t></w>
<w><t>dis-qual-i-fied</t></w>
-<w><t>dis-qual-i-fy</t></w>
+<w><t>dis-qual-i-fy</t><verb><regular-root/></verb></w>
<w><t>dis-qual-i-fy-ing</t></w>
<w><t>dis-qui-et</t></w>
<w><t>dis-qui-et-ed-ly</t></w>
@@ -43340,7 +43340,7 @@
<w><t>dis-rate</t></w>
<w><t>dis-rat-ed</t></w>
<w><t>dis-rat-ing</t></w>
-<w><t>dis-re-gard</t></w>
+<w><t>dis-re-gard</t><verb><regular-root/></verb></w>
<w><t>dis-re-gard-er</t></w>
<w><t>dis-re-gard-ful</t></w>
<w><t>dis-re-gard-ful-ly</t></w>
@@ -43412,7 +43412,7 @@
<w><t>dis-sem-i-na-tor</t></w>
<w><t>dis-sem-i-nule</t></w>
<w><t>dis-sen-sion</t><noun><pluralizable/></noun></w>
-<w><t>dis-sent</t></w>
+<w><t>dis-sent</t><noun><singular/></noun><verb><regular-root/></verb></w>
<w><t>dis-sent-er</t></w>
<w><t>Dis-sent-er</t></w>
<w><t>dis-sen-tience</t></w>
@@ -45318,7 +45318,7 @@
<w><t>dray-ing</t></w>
<w><t>dray-man</t></w>
<w><t>Dray-ton</t></w>
-<w><t>dread</t></w>
+<w><t>dread</t><noun><singular/></noun><verb><regular-root/></verb></w>
<w><t>dread-a-ble</t></w>
<w><t>dread-ful</t></w>
<w><t>dread-ful-ly</t></w>
@@ -46575,7 +46575,7 @@
<w><t>ear-wig-ging</t></w>
<w><t>ear-wig-gy</t></w>
<w><t>ear-wit-ness</t></w>
-<w><t>ease</t></w>
+<w><t>ease</t><verb><regular-root/></verb></w>
<w><t>ease-ful</t></w>
<w><t>ease-ful-ly</t></w>
<w><t>ease-ful-ness</t></w>
@@ -47642,7 +47642,7 @@
<w><t>elec</t></w>
<w><t>el-e-cam-pane</t></w>
<w><t>e-lect</t><noun><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
-<w><t>e-lec-tion</t></w>
+<w><t>e-lec-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>e-lec-tion-eer</t></w>
<w><t>e-lec-tion-eer-er</t></w>
<w><t>e-lec-tion-eer-ing</t><noun/></w>
@@ -48118,7 +48118,7 @@
<w><t>el-ling</t></w>
<w><t>El-ling-ton</t></w>
<w><t>El-lin-wood</t></w>
-<w><t>El-li-ot</t></w>
+<w><t>El-li-ot</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>El-li-ott</t></w>
<w><t>el-lipse</t></w>
<w><t>el-lip-sis</t></w>
@@ -48328,7 +48328,7 @@
<w><t>em-bar-ka-tion</t></w>
<w><t>em-bark-ment</t></w>
<phrase><t>em-bar-ras de rich-esses</t></phrase>
-<w><t>em-bar-rass</t></w>
+<w><t>em-bar-rass</t><verb><regular-root/></verb></w>
<w><t>em-bar-rassed-ly</t></w>
<w><t>em-bar-rass-ing-ly</t></w>
<w><t>em-bar-rass-ment</t></w>
@@ -48559,7 +48559,7 @@
<w><t>Em-i-scan</t></w>
<w><t>em-is-sar-ies</t></w>
<w><t>em-is-sar-y</t></w>
-<w><t>e-mis-sion</t></w>
+<w><t>e-mis-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>e-mis-sion spec-trum</t></phrase>
<w><t>e-mis-sive</t></w>
<w><t>em-is-siv-i-ty</t></w>
@@ -48603,7 +48603,7 @@
<w><t>e-mol-lience</t></w>
<w><t>e-mol-lient</t></w>
<w><t>em-ol-li-tion</t></w>
-<w><t>e-mol-u-ment</t></w>
+<w><t>e-mol-u-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Em-o-ry</t></w>
<w><t>e-mote</t></w>
<w><t>e-mot-ed</t></w>
@@ -48655,7 +48655,7 @@
<w><t>em-pen-nage</t></w>
<w><t>em-pen-nag-es</t></w>
<w><t>em-per-ies</t></w>
-<w><t>em-per-or</t></w>
+<w><t>em-per-or</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>em-per-or pen-guin</t></phrase>
<w><t>em-per-or-ship</t></w>
<w><t>em-per-y</t></w>
@@ -48672,7 +48672,7 @@
<w><t>em-phat-i-cal-ness</t></w>
<w><t>em-phy-se-ma</t></w>
<w><t>em-phy-sem-a-tous</t></w>
-<w><t>em-pire</t></w>
+<w><t>em-pire</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Em-pire</t></w>
<w><t>em-pire=build-er</t></w>
<phrase><t>Em-pire Day</t></phrase>
@@ -48698,8 +48698,8 @@
<w><t>em-ploy-a-ble</t></w>
<w><t>em-ploy-e</t></w>
<w><t>em-ploy-é</t></w>
-<w><t>em-ploy-ee</t></w>
-<w><t>em-ploy-er</t></w>
+<w><t>em-ploy-ee</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>em-ploy-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>em-ploy-ment</t></w>
<phrase><t>em-ploy-ment a-gen-cy</t></phrase>
<phrase><t>em-ploy-ment ex-change</t></phrase>
@@ -48712,7 +48712,7 @@
<w><t>Em-po-ri-um</t></w>
<w><t>em-po-ri-ums</t></w>
<w><t>em-pov-er-ish</t></w>
-<w><t>em-pow-er</t></w>
+<w><t>em-pow-er</t><verb><regular-root/></verb></w>
<w><t>em-pow-er-ment</t></w>
<w><t>em-press</t></w>
<w><t>em-presse-ment</t></w>
@@ -48953,7 +48953,7 @@
<w><t>en-cour-ag-ing-ly</t></w>
<w><t>en-crim-son</t></w>
<w><t>en-cri-nite</t></w>
-<w><t>en-croach</t></w>
+<w><t>en-croach</t><verb><regular-root/></verb></w>
<w><t>en-croach-er</t></w>
<w><t>en-croach-ment</t><noun><pluralizable/></noun></w>
<w><t>en-crust</t></w>
@@ -48999,7 +48999,7 @@
<w><t>en-da-moe-ba</t></w>
<w><t>en-da-moe-bic</t></w>
<w><t>end-an-ge-i-tis</t></w>
-<w><t>en-dan-ger</t></w>
+<w><t>en-dan-ger</t><verb><regular-root/></verb></w>
<w><t>en-dan-ger-ment</t></w>
<w><t>end-an-gi-tis</t></w>
<w><t>end-a-or-ti-tis</t></w>
@@ -51137,7 +51137,7 @@
<w><t>es-tan-ci-a</t></w>
<w><t>es-tan-cie-ro</t></w>
<w><t>es-tan-cie-ros</t></w>
-<w><t>es-tate</t></w>
+<w><t>es-tate</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>es-tate a-gent</t></phrase>
<phrase><t>es-tate car</t></phrase>
<w><t>es-tat-ed</t></w>
@@ -51923,7 +51923,7 @@
<w><t>e-vap-o-tran-spi-ra-tion</t></w>
<w><t>Ev-a-ris-tus</t></w>
<w><t>Ev-arts</t></w>
-<w><t>e-va-sion</t></w>
+<w><t>e-va-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>e-va-sion-al</t></w>
<w><t>e-va-sive</t></w>
<w><t>e-va-sive-ly</t></w>
@@ -51970,7 +51970,7 @@
<w><t>e-ven-tu-al</t></w>
<w><t>e-ven-tu-al-i-ty</t></w>
<w><t>e-ven-tu-al-ly</t></w>
-<w><t>e-ven-tu-ate</t></w>
+<w><t>e-ven-tu-ate</t><verb><regular-root/></verb></w>
<w><t>e-ven-tu-at-ed</t></w>
<w><t>e-ven-tu-at-ing</t></w>
<w><t>e-ven-tu-a-tion</t></w>
@@ -52250,7 +52250,7 @@
<w><t>ex-cip-u-lum</t></w>
<w><t>ex-cir-cle</t></w>
<w><t>ex-cis-a-ble</t></w>
-<w><t>ex-cise</t></w>
+<w><t>ex-cise</t><verb><regular-root/></verb></w>
<w><t>ex-cised</t></w>
<w><t>ex-cise-man</t></w>
<w><t>ex-cis-ing</t></w>
@@ -52452,7 +52452,7 @@
<w><t>ex-em-pla-ry</t></w>
<phrase><t>ex-em-pla-ry dam-ag-es</t></phrase>
<w><t>ex-em-pli-fi-a-ble</t></w>
-<w><t>ex-em-pli-fi-ca-tion</t></w>
+<w><t>ex-em-pli-fi-ca-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-em-pli-fi-ca-tive</t></w>
<w><t>ex-em-pli-fied</t></w>
<w><t>ex-em-pli-fi-er</t></w>
@@ -52460,7 +52460,7 @@
<w><t>ex-em-pli-fy-ing</t></w>
<phrase><t>ex-em-pli gra-ti-a</t></phrase>
<w><t>ex-em-plum</t></w>
-<w><t>ex-empt</t></w>
+<w><t>ex-empt</t><verb><regular-root/></verb><adjective><extensible value="false"/></adjective></w>
<w><t>ex-empt-i-ble</t></w>
<w><t>ex-emp-tion</t></w>
<w><t>ex-emp-tive</t></w>
@@ -52571,7 +52571,7 @@
<w><t>ex-im-i-ous-ly</t></w>
<w><t>ex-ine</t></w>
<w><t>ex-ist</t><verb><regular-root/></verb></w>
-<w><t>ex-ist-ence</t></w>
+<w><t>ex-ist-ence</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-ist-ent</t></w>
<w><t>ex-is-ten-tial</t></w>
<w><t>ex-is-ten-tial-ism</t></w>
@@ -52776,7 +52776,7 @@
<w><t>ex-pend-a-bil-i-ty</t></w>
<w><t>ex-pend-a-ble</t></w>
<w><t>ex-pend-er</t></w>
-<w><t>ex-pend-i-ture</t></w>
+<w><t>ex-pend-i-ture</t><noun><plural/><convertible-to-possessive/></noun></w>
<w><t>ex-pense</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>ex-pense ac-count</t></phrase>
<w><t>ex-pense-less</t></w>
@@ -53145,7 +53145,7 @@
<w><t>ex-tinc-tion</t></w>
<w><t>ex-tinc-tive</t></w>
<w><t>ex-tine</t></w>
-<w><t>ex-tin-guish</t></w>
+<w><t>ex-tin-guish</t><verb><regular-root/></verb></w>
<w><t>ex-tin-guish-a-ble</t></w>
<w><t>ex-tin-guish-ant</t></w>
<w><t>ex-tin-guish-er</t></w>
@@ -53166,7 +53166,7 @@
<w><t>ex-tol-ment</t></w>
<w><t>ex-tor-sive</t></w>
<w><t>ex-tor-sive-ly</t></w>
-<w><t>ex-tort</t></w>
+<w><t>ex-tort</t><verb><regular-root/></verb></w>
<w><t>ex-tort-er</t></w>
<w><t>ex-tor-tion</t></w>
<w><t>ex-tor-tion-ar-y</t></w>
@@ -53270,7 +53270,7 @@
<w><t>ex-tra-ver-tive-ly</t></w>
<w><t>Ex-tre-ma-du-ra</t></w>
<w><t>ex-tre-mal</t></w>
-<w><t>ex-treme</t></w>
+<w><t>ex-treme</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-treme-ly</t></w>
<phrase><t>ex-treme-ly high fre-quen-cy</t></phrase>
<w><t>ex-treme-ness</t></w>
@@ -53641,7 +53641,7 @@
<w><t>fail-ing-ly</t></w>
<w><t>fail-ing-ness</t></w>
<w><t>faille</t></w>
-<w><t>fail-ure</t></w>
+<w><t>fail-ure</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fain</t></w>
<w><t>fai-naigue</t></w>
<w><t>fai-naigued</t></w>
@@ -53651,7 +53651,7 @@
<w><t>fai-ne-ance</t></w>
<w><t>fai-ne-ant</t></w>
<w><t>fai-né-ant</t></w>
-<w><t>faint</t></w>
+<w><t>faint</t><verb><regular-root/></verb><adjective><extensible/></adjective></w>
<w><t>faint-er</t></w>
<w><t>faint-heart</t></w>
<w><t>faint-heart-ed</t></w>
@@ -54383,7 +54383,7 @@
<w><t>fa-vored</t></w>
<w><t>fa-vored-ly</t></w>
<w><t>fa-vored-ness</t></w>
-<w><t>fa-vor-er</t></w>
+<w><t>fa-vor-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fa-vor-ing-ly</t></w>
<w><t>fa-vor-ite</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective></adjective></w>
<phrase><t>fa-vor-ite son</t></phrase>
@@ -54555,7 +54555,7 @@
<w><t>Fed-er-al-ism</t></w>
<w><t>fed-er-al-ism</t></w>
<w><t>Fed-eralist</t></w>
-<w><t>fed-er-al-ist</t></w>
+<w><t>fed-er-al-ist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fed-er-al-is-tic</t></w>
<phrase><t>Fed-er-al-ist Par-ty</t></phrase>
<w><t>fed-er-al-i-za-tion</t></w>
@@ -54706,7 +54706,7 @@
<w><t>fe-lo-ni-ous-ly</t></w>
<w><t>fe-lo-ni-ous-ness</t></w>
<w><t>fel-on-ry</t></w>
-<w><t>fel-o-ny</t></w>
+<w><t>fel-o-ny</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fel-sic</t></w>
<w><t>fel-site</t></w>
<w><t>fel-sit-ic</t></w>
@@ -55055,7 +55055,7 @@
<w><t>fet-ish-like</t></w>
<w><t>fet-lock</t></w>
<w><t>fe-tor</t></w>
-<w><t>fet-ter</t></w>
+<w><t>fet-ter</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>fet-ter bone</t></phrase>
<w><t>fet-ter-bush</t></w>
<w><t>fet-ter-er</t></w>
@@ -55618,7 +55618,7 @@
<w><t>find-ing</t></w>
<w><t>Find-lay</t></w>
<w><t>finds</t><verb><regular-root value="false"/></verb></w>
-<w><t>fine</t></w>
+<w><t>fine</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fi-ne</t></w>
<w><t>fine-able</t></w>
<w><t>fine-a-ble</t></w>
@@ -57674,7 +57674,7 @@
<w><t>fore-worn</t></w>
<w><t>fore-yard</t></w>
<w><t>For-far</t></w>
-<w><t>for-feit</t></w>
+<w><t>for-feit</t><verb><regular-root/></verb></w>
<w><t>for-feit-a-ble</t></w>
<w><t>for-feit-er</t></w>
<w><t>for-fei-ture</t></w>
@@ -57683,17 +57683,18 @@
<w><t>for-gat</t></w>
<w><t>for-gath-er</t></w>
<w><t>for-gave</t></w>
-<w><t>forge</t></w>
+<w><t>forge</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>forge-a-ble</t></w>
<w><t>forg-er</t></w>
<w><t>for-ger-ies</t></w>
<w><t>for-ger-y</t></w>
-<w><t>for-get</t></w>
+<w><t>for-get</t><verb><regular-root value="false"/></verb></w>
<w><t>for-get-ful</t></w>
<w><t>for-get-ful-ly</t></w>
<w><t>for-get-ful-ness</t></w>
<w><t>for-ge-tive</t></w>
<w><t>for-get=me=not</t></w>
+<w><t>for-gets</t><verb><lemma>forget</lemma><vf><person-3rd/><singular/></vf></verb></w>
<w><t>for-get-ta-ble</t></w>
<w><t>for-get-ter</t></w>
<w><t>for-get-ter-y</t></w>
@@ -57937,7 +57938,7 @@
<w><t>for-tu-nate</t></w>
<w><t>for-tu-nate-ly</t></w>
<w><t>for-tu-nate-ness</t></w>
-<w><t>for-tune</t></w>
+<w><t>for-tune</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>For-tune</t></w>
<w><t>for-tuned</t></w>
<w><t>for-tune=hunt-er</t></w>
@@ -58280,7 +58281,7 @@
<phrase><t>frame aer-i-al</t></phrase>
<w><t>frame-less</t></w>
<phrase><t>frame of ref-er-ence</t></phrase>
-<w><t>fram-er</t></w>
+<w><t>fram-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>frame-work</t></w>
<w><t>fram-ing</t></w>
<w><t>Fram-ing-ham</t></w>
@@ -58295,7 +58296,7 @@
<w><t>Fran-ce-sco</t></w>
<w><t>Franche=Com-t</t></w>
<w><t>Franche=Com-té</t></w>
-<w><t>fran-chise</t></w>
+<w><t>fran-chise</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fran-chised</t></w>
<w><t>fran-chise-ment</t></w>
<w><t>fran-chis-er</t></w>
@@ -58362,7 +58363,7 @@
<w><t>Frank-ie</t></w>
<w><t>frank-in-cense</t></w>
<w><t>Frank-ish</t></w>
-<w><t>Frank-lin</t></w>
+<w><t>Frank-lin</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>frank-lin</t></w>
<w><t>frank-lin-ite</t></w>
<w><t>Frank-lin-ton</t></w>
@@ -58519,7 +58520,7 @@
<w><t>free=heart-ed</t></w>
<w><t>free-hold</t></w>
<w><t>Free-hold</t></w>
-<w><t>free-hold-er</t></w>
+<w><t>free-hold-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>free-ing</t></w>
<phrase><t>free la-bour</t></phrase>
<w><t>free-lance</t></w>
@@ -58533,7 +58534,7 @@
<w><t>free-load-ing</t></w>
<w><t>free-ly</t></w>
<w><t>free=ma-chin-ing</t></w>
-<w><t>free-man</t><noun><convertible-to-possessive/></noun></w>
+<w><t>free-man</t><noun><singular/><convertible-to-possessive/></noun></w>
<w><t>Free-man</t></w>
<w><t>free-mar-tin</t></w>
<w><t>free-ma-son</t></w>
@@ -58541,6 +58542,7 @@
<w><t>free-ma-son-ic</t></w>
<w><t>Free-ma-son-ry</t></w>
<w><t>free-ma-son-ry</t></w>
+<w><t>free-men</t><noun><plural/><convertible-to-possessive/></noun></w>
<w><t>free-ness</t></w>
<w><t>Free-port</t></w>
<w><t>Freer</t></w>
@@ -58990,7 +58992,7 @@
<w><t>Fronte-nac</t></w>
<w><t>Fron-te-nac</t></w>
<w><t>fron-tes</t></w>
-<w><t>fron-tier</t></w>
+<w><t>fron-tier</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fron-tier-less</t></w>
<w><t>fron-tier-like</t></w>
<w><t>fron-tiers-man</t></w>
@@ -61206,7 +61208,7 @@
<w><t>gen-tle-folk</t></w>
<w><t>gen-tle-folks</t></w>
<w><t>gen-tle-hood</t></w>
-<w><t>gen-tle-man</t></w>
+<w><t>gen-tle-man</t><noun><singular/><convertible-to-possessive/></noun></w>
<w><t>gen-tle-man=at=arms</t></w>
<w><t>gen-tle-man=com-mon-er</t></w>
<w><t>gen-tle-man=farm-er</t></w>
@@ -63532,7 +63534,7 @@
<w><t>gov-ern-men-tal-ly</t></w>
<w><t>gov-ern-ment=in=ex-ile</t></w>
<phrase><t>gov-ern-ment is-sue</t></phrase>
-<w><t>gov-er-nor</t></w>
+<w><t>gov-er-nor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>gov-er-nor gen-er-al</t></phrase>
<w><t>gov-er-nor=gen-er-al</t></w>
<w><t>gov-er-nor=gen-er-al-ship</t></w>
@@ -64452,7 +64454,7 @@
<w><t>griege</t></w>
<w><t>Grier-son</t></w>
<w><t>grie-shoch</t></w>
-<w><t>griev-ance</t></w>
+<w><t>griev-ance</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>grieve</t><verb><regular-root/></verb></w>
<w><t>griev-ed-ly</t></w>
<w><t>griev-er</t></w>
@@ -65706,7 +65708,7 @@
<w><t>ha-bit-u-al</t></w>
<w><t>ha-bit-u-al-ly</t></w>
<w><t>ha-bit-u-al-ness</t></w>
-<w><t>ha-bit-u-ate</t></w>
+<w><t>ha-bit-u-ate</t><verb><regular-root/></verb></w>
<w><t>ha-bit-u-at-ed</t></w>
<w><t>ha-bit-u-at-ing</t></w>
<w><t>ha-bit-u-a-tion</t></w>
@@ -67836,7 +67838,7 @@
<w><t>heart-en-ing-ly</t></w>
<phrase><t>heart fail-ure</t></phrase>
<w><t>heart-felt</t></w>
-<w><t>hearth</t></w>
+<w><t>hearth</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>hearth-less</t></w>
<w><t>hearth-side</t></w>
<w><t>hearth-stead</t></w>
@@ -70355,7 +70357,7 @@
<w><t>hol-laed</t></w>
<w><t>hol-la-ing</t></w>
<w><t>hol-land</t></w>
-<w><t>Hol-land</t></w>
+<w><t>Hol-land</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>hol-lan-daise sauce</t></phrase>
<w><t>Hol-lan-dale</t></w>
<w><t>Hol-land-er</t></w>
@@ -71756,7 +71758,7 @@
<w><t>hum-ding-er</t></w>
<w><t>hum-drum</t></w>
<w><t>hum-drum-ness</t></w>
-<w><t>Hume</t></w>
+<w><t>Hume</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>hu-mec-tant</t></w>
<w><t>hu-mer-al</t></w>
<phrase><t>hu-mer-al veil</t></phrase>
@@ -74221,7 +74223,7 @@
<w><t>im-bit-ter-er</t></w>
<w><t>im-bit-ter-ment</t></w>
<w><t>im-bod-i-ment</t></w>
-<w><t>im-bod-y</t></w>
+<w><t>im-bod-y</t><verb><regular-root/></verb></w>
<w><t>im-bold-en</t></w>
<w><t>im-bos-om</t></w>
<w><t>im-bow-er</t></w>
@@ -74563,7 +74565,7 @@
<w><t>im-pa-vid-i-ty</t></w>
<w><t>im-pav-id-ly</t></w>
<w><t>im-pawn</t></w>
-<w><t>im-peach</t></w>
+<w><t>im-peach</t><verb><regular-root/></verb></w>
<w><t>im-peach-a-bil-i-ty</t></w>
<w><t>im-peach-a-ble</t></w>
<w><t>im-peach-er</t></w>
@@ -74948,7 +74950,7 @@
<w><t>im-print</t></w>
<w><t>im-print-er</t></w>
<w><t>im-print-ing</t></w>
-<w><t>im-pris-on</t></w>
+<w><t>im-pris-on</t><verb><regular-root/></verb></w>
<w><t>im-pris-on-a-ble</t></w>
<w><t>im-pris-on-er</t></w>
<w><t>im-pris-on-ment</t></w>
@@ -75542,7 +75544,7 @@
<w><t>in-con-tro-vert-i-ble</t></w>
<w><t>in-con-tro-vert-i-ble-ness</t></w>
<w><t>in-con-tro-vert-i-bly</t></w>
-<w><t>in-con-ven-ience</t></w>
+<w><t>in-con-ven-ience</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-con-ven-ienced</t></w>
<w><t>in-con-ven-ienc-ing</t></w>
<w><t>in-con-ven-ien-cy</t></w>
@@ -75598,7 +75600,7 @@
<w><t>in-cras-sa-tion</t></w>
<w><t>in-cras-sa-tive</t></w>
<w><t>in-creas-a-ble</t></w>
-<w><t>in-crease</t></w>
+<w><t>in-crease</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>in-creased</t></w>
<w><t>in-creas-ed-ly</t></w>
<w><t>in-creas-er</t></w>
@@ -75912,7 +75914,7 @@
<w><t>in-di-ci-as</t></w>
<w><t>in-di-ci-um</t></w>
<w><t>in-dic-o-lite</t></w>
-<w><t>in-dict</t></w>
+<w><t>in-dict</t><verb><regular-root/></verb></w>
<w><t>in-dict-a-ble</t></w>
<w><t>in-dict-a-bly</t></w>
<w><t>in-dict-er</t></w>
@@ -77239,7 +77241,7 @@
<w><t>in-sep-a-ra-ble</t></w>
<w><t>in-sep-a-ra-ble-ness</t></w>
<w><t>in-sep-a-ra-bly</t></w>
-<w><t>in-sert</t></w>
+<w><t>in-sert</t><verb><regular-root/></verb></w>
<w><t>in-sert-a-ble</t></w>
<w><t>in-sert-ed</t></w>
<w><t>in-sert-er</t></w>
@@ -77376,7 +77378,7 @@
<w><t>in-spi-ra-tion-al-ly</t></w>
<w><t>in-spir-a-tive</t></w>
<w><t>in-spir-a-to-ry</t></w>
-<w><t>in-spire</t></w>
+<w><t>in-spire</t><verb><regular-root/></verb></w>
<w><t>in-spired</t></w>
<w><t>in-spir-ed-ly</t></w>
<w><t>in-spir-er</t></w>
@@ -77404,7 +77406,7 @@
<w><t>in-stal-ling</t></w>
<w><t>in-stall-ment</t></w>
<phrase><t>in-stall-ment plan</t></phrase>
-<w><t>in-stal-ment</t></w>
+<w><t>in-stal-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-stance</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-stanced</t></w>
<w><t>in-stanc-ing</t></w>
@@ -77571,7 +77573,7 @@
<w><t>in-sur-mount-a-ble</t></w>
<w><t>in-sur-mount-a-ble-ness</t></w>
<w><t>in-sur-mount-a-bly</t></w>
-<w><t>in-sur-rec-tion</t></w>
+<w><t>in-sur-rec-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-sur-rec-tion-al</t></w>
<w><t>in-sur-rec-tion-al-ly</t></w>
<w><t>in-sur-rec-tion-ar-ies</t></w>
@@ -77703,7 +77705,7 @@
<phrase><t>in-ten-sive care</t></phrase>
<w><t>in-ten-sive-ly</t></w>
<w><t>in-ten-sive-ness</t></w>
-<w><t>in-tent</t></w>
+<w><t>in-tent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-ten-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-ten-tion-al</t></w>
<w><t>in-ten-tion-al-i-ty</t></w>
@@ -79252,7 +79254,7 @@
<w><t>in-var-i-a-bly</t></w>
<w><t>in-var-i-ant</t></w>
<w><t>in-var-i-ant-ly</t></w>
-<w><t>in-va-sion</t></w>
+<w><t>in-va-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-va-sive</t></w>
<w><t>in-vect-ed</t></w>
<w><t>in-vec-tive</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -81259,7 +81261,7 @@
<w><t>jer-ry-can</t></w>
<phrase><t>jer-ry can</t></phrase>
<w><t>jer-sey</t></w>
-<w><t>Jer-sey</t></w>
+<w><t>Jer-sey</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Jer-sey-an</t></w>
<phrase><t>Jer-sey Cit-y</t></phrase>
<w><t>jer-seyed</t></w>
@@ -82077,7 +82079,7 @@
<w><t>junc</t></w>
<w><t>jun-ca-ceous</t></w>
<w><t>jun-co</t></w>
-<w><t>junc-tion</t></w>
+<w><t>junc-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>junc-tion-al</t></w>
<phrase><t>junc-tion box</t></phrase>
<phrase><t>junc-tion tran-sis-tor</t></phrase>
@@ -82171,7 +82173,7 @@
<w><t>ju-ror</t></w>
<w><t>Ju-ru</t></w>
<w><t>Ju-ru-á</t></w>
-<w><t>ju-ry</t></w>
+<w><t>ju-ry</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>ju-ry box</t></phrase>
<w><t>ju-ry-less</t></w>
<w><t>ju-ry-man</t></w>
@@ -83763,7 +83765,7 @@
<w><t>kit-am-bil-la</t></w>
<w><t>Ki-ta-sa-to</t></w>
<w><t>kit-bag</t></w>
-<w><t>kitch-en</t></w>
+<w><t>kitch-en</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>kitch-en-er</t></w>
<w><t>Kitch-e-ner</t></w>
<w><t>kitch-en-et</t></w>
@@ -85365,7 +85367,7 @@
<w><t>land-grave</t></w>
<w><t>land-gra-vi-ate</t></w>
<w><t>land-gra-vine</t></w>
-<w><t>land-hold-er</t></w>
+<w><t>land-hold-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>land=hold-er</t></w>
<w><t>land-hold-ing</t></w>
<w><t>land-ing</t></w>
@@ -86099,7 +86101,7 @@
<w><t>lav-er-ock</t></w>
<w><t>lav-ing</t></w>
<w><t>La-vin-i-a</t></w>
-<w><t>lav-ish</t></w>
+<w><t>lav-ish</t><verb><regular-root/></verb><adjective/></w>
<w><t>lav-ish-er</t></w>
<w><t>lav-ish-ly</t></w>
<w><t>lav-ish-ment</t></w>
@@ -86809,7 +86811,7 @@
<w><t>le-nes</t></w>
<w><t>L’En-fant</t></w>
<w><t>Leng-len</t></w>
-<w><t>length</t></w>
+<w><t>length</t><noun><pluralizable/></noun></w>
<w><t>length-en</t><verb><regular-root/></verb></w>
<w><t>length-en-er</t></w>
<w><t>length-i-er</t></w>
@@ -87018,7 +87020,7 @@
<w><t>less</t></w>
<w><t>les-see</t></w>
<w><t>les-see-ship</t></w>
-<w><t>less-en</t></w>
+<w><t>less-en</t><verb><regular-root/></verb></w>
<w><t>Les-seps</t></w>
<w><t>less-er</t></w>
<phrase><t>Less-er An-til-les</t></phrase>
@@ -88297,7 +88299,7 @@
<phrase><t>liq-uid par-af-fin</t></phrase>
<w><t>liq-ui-dus</t></w>
<w><t>liq-ui-fy</t></w>
-<w><t>liq-uor</t></w>
+<w><t>liq-uor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>liq-uo-rice</t></w>
<w><t>liq-uor-ish</t></w>
<phrase><t>liq-uor up</t></phrase>
@@ -88692,7 +88694,7 @@
<w><t>loan-blend</t></w>
<phrase><t>loan col-lec-tion</t></phrase>
<w><t>Lo-an-da</t></w>
-<w><t>loan-er</t></w>
+<w><t>loan-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>loan-in</t></w>
<w><t>loan-ing</t></w>
<w><t>loan-shift</t></w>
@@ -88804,7 +88806,7 @@
<w><t>loch-us</t></w>
<w><t>lo-ci</t></w>
<w><t>Lock</t></w>
-<w><t>lock</t></w>
+<w><t>lock</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>lock-a-ble</t></w>
<w><t>lock-age</t></w>
<w><t>lock-box</t></w>
@@ -89039,7 +89041,7 @@
<w><t>Lo-la</t></w>
<w><t>Lo-le-ta</t></w>
<w><t>Lo-li-ta</t></w>
-<w><t>loll</t></w>
+<w><t>loll</t><verb><regular-root/></verb></w>
<w><t>Lol-land</t></w>
<w><t>lol-la-pa-loo-sa</t></w>
<w><t>lol-la-pa-loo-za</t></w>
@@ -90864,7 +90866,7 @@
<w><t>mag-is-tral</t></w>
<w><t>mag-is-tral-i-ty</t></w>
<w><t>mag-is-tral-ly</t></w>
-<w><t>mag-is-trate</t></w>
+<w><t>mag-is-trate</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>mag-is-trates’ court</t></phrase>
<w><t>mag-is-trate-ship</t></w>
<w><t>mag-is-trat-i-cal-ly</t></w>
@@ -91217,7 +91219,7 @@
<w><t>make-less</t></w>
<phrase><t>make o-ver</t></phrase>
<w><t>Mak-er</t></w>
-<w><t>mak-er</t></w>
+<w><t>mak-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>make=read-y</t></w>
<w><t>makes</t><verb/></w>
<w><t>make-shift</t></w>
@@ -93345,7 +93347,7 @@
<w><t>meal-y=mouthed</t></w>
<w><t>meal-y-mouth-ed-ly</t></w>
<w><t>meal-y=mouth-ed-ness</t></w>
-<w><t>mean</t></w>
+<w><t>mean</t><adjective><extensible/></adjective></w>
<w><t>me-an-der</t><verb><regular-root/></verb></w>
<w><t>Me-an-der</t></w>
<w><t>me-an-der-er</t></w>
@@ -93996,7 +93998,7 @@
<w><t>Mem-phit-ic</t></w>
<w><t>Mem-phre-ma-gog</t></w>
<w><t>mem-sa-hib</t></w>
-<w><t>men</t></w>
+<w><t>men</t><noun><plural/><convertible-to-possessive/></noun></w>
<w><t>Me-na</t></w>
<w><t>men-ace</t></w>
<w><t>men-aced</t></w>
@@ -94023,7 +94025,7 @@
<w><t>Men-ci-us</t></w>
<w><t>Menck-en</t></w>
<w><t>Menc-ke-ni-an</t></w>
-<w><t>mend</t></w>
+<w><t>mend</t><verb><regular-root/></verb></w>
<w><t>Men-d</t></w>
<w><t>mend-a-ble</t></w>
<w><t>men-da-cious-ly</t></w>
@@ -94529,7 +94531,7 @@
<w><t>Mes-sa-pic</t></w>
<w><t>Mes-sei-gneurs</t></w>
<w><t>Mes-se-ne</t></w>
-<w><t>mes-sen-ger</t></w>
+<w><t>mes-sen-ger</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>mes-sen-ger RNA</t></phrase>
<w><t>Mes-se-ni-a</t></w>
<w><t>Mes-ser-schmitt</t></w>
@@ -97042,7 +97044,7 @@
<w><t>mod-i-fi-a-ble</t></w>
<w><t>mod-i-fi-a-ble-ness</t></w>
<w><t>mod-i-fi-cand</t></w>
-<w><t>mod-i-fi-ca-tion</t></w>
+<w><t>mod-i-fi-ca-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mod-i-fied</t></w>
<w><t>mod-i-fi-er</t></w>
<w><t>mod-i-fy</t></w>
@@ -97987,6 +97989,7 @@
<w><t>mor-al-ized</t></w>
<w><t>mor-al-iz-ing</t></w>
<w><t>mor-al-less</t></w>
+<w><t>mor-al-ly</t><adverb/></w>
<phrase><t>mor-al phil-os-o-phy</t></phrase>
<phrase><t>Mor-al Re-ar-ma-ment</t></phrase>
<phrase><t>mor-al the-ol-o-gy</t></phrase>
@@ -98730,7 +98733,7 @@
<w><t>Mul-ci-ber</t></w>
<w><t>mulct</t></w>
<w><t>Mul-doon</t></w>
-<w><t>mule</t></w>
+<w><t>mule</t><noun><plural/><convertible-to-possessive/></noun></w>
<w><t>Mule-shoe</t></w>
<phrase><t>mule skin-ner</t></phrase>
<w><t>mu-le-ta</t></w>
@@ -100422,7 +100425,7 @@
<w><t>ne-ces-si-tous-ly</t></w>
<w><t>ne-ces-si-tous-ness</t></w>
<w><t>ne-ces-si-tude</t></w>
-<w><t>ne-ces-si-ty</t></w>
+<w><t>ne-ces-si-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Ne-cho</t></w>
<w><t>neck</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Neck-ar</t></w>
@@ -100551,7 +100554,7 @@
<w><t>ne-ga-tion</t></w>
<w><t>ne-ga-tion-al</t></w>
<w><t>ne-ga-tion-ist</t></w>
-<w><t>neg-a-tive</t></w>
+<w><t>neg-a-tive</t><verb><regular-root/></verb><adjective></adjective></w>
<w><t>neg-a-tived</t></w>
<phrase><t>neg-a-tive feed-back</t></phrase>
<phrase><t>neg-a-tive hal-lu-ci-na-tion</t></phrase>
@@ -101868,7 +101871,7 @@
<w><t>no-bil-i-a-ry</t></w>
<phrase><t>no-bil-i-a-ry par-ti-cle</t></phrase>
<w><t>no-bil-i-ty</t></w>
-<w><t>no-ble</t><adjective><extensible/></adjective></w>
+<w><t>no-ble</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective><extensible/></adjective></w>
<phrase><t>no-ble art</t></phrase>
<w><t>no-ble-man</t><noun><singular/></noun></w>
<w><t>no-ble-man-ly</t></w>
@@ -102011,7 +102014,7 @@
<w><t>nom-i-nal-iz-ing</t></w>
<phrase><t>nom-i-nal val-ue</t></phrase>
<phrase><t>nom-i-nal wag-es</t></phrase>
-<w><t>nom-i-nate</t></w>
+<w><t>nom-i-nate</t><verb><regular-root/></verb></w>
<w><t>nom-i-nat-ed</t></w>
<w><t>nom-i-nat-ing</t></w>
<w><t>nom-i-na-tion</t></w>
@@ -108372,7 +108375,7 @@
<phrase><t>ob-ser-va-tion post</t></phrase>
<w><t>ob-serv-a-to-ry</t></w>
<w><t>ob-ser-va-to-ry</t></w>
-<w><t>ob-serve</t></w>
+<w><t>ob-serve</t><verb><regular-root/></verb></w>
<w><t>ob-served</t></w>
<w><t>ob-serv-ed-ly</t></w>
<w><t>ob-serv-er</t></w>
@@ -108531,7 +108534,7 @@
<w><t>oc-cult-ness</t></w>
<w><t>oc-cu-pan-cy</t></w>
<w><t>oc-cu-pant</t></w>
-<w><t>oc-cu-pa-tion</t></w>
+<w><t>oc-cu-pa-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>oc-cu-pa-tion-al</t></w>
<w><t>oc-cu-pa-tion-al-ly</t></w>
<phrase><t>oc-cu-pa-tion-al psy-chol-o-gy</t></phrase>
@@ -108867,7 +108870,7 @@
<w><t>of-fend-a-ble</t></w>
<w><t>of-fend-ed-ly</t></w>
<w><t>of-fend-ed-ness</t></w>
-<w><t>of-fend-er</t></w>
+<w><t>of-fend-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>of-fend-i-ble</t></w>
<w><t>of-fense</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>of-fense-less</t></w>
@@ -109827,7 +109830,7 @@
<w><t>op-po-si-tion-al</t></w>
<w><t>op-po-si-tion-ar-y</t></w>
<w><t>op-po-si-tion-less</t></w>
-<w><t>op-press</t></w>
+<w><t>op-press</t><verb><regular-root/></verb></w>
<w><t>op-pres-si-ble</t></w>
<w><t>op-pres-sion</t></w>
<w><t>op-pres-sive</t></w>
@@ -110219,7 +110222,7 @@
<w><t>o-rig-i-nal-i-ty</t></w>
<w><t>o-rig-i-nal-ly</t></w>
<phrase><t>o-rig-i-nal sin</t></phrase>
-<w><t>o-rig-i-nate</t></w>
+<w><t>o-rig-i-nate</t><verb><regular-root/></verb></w>
<w><t>o-rig-i-nat-ed</t></w>
<w><t>o-rig-i-nat-ing</t></w>
<w><t>o-rig-i-na-tion</t></w>
@@ -111214,7 +111217,7 @@
<w><t>out-last</t></w>
<w><t>out-laugh</t></w>
<w><t>out-launch</t></w>
-<w><t>out-law</t></w>
+<w><t>out-law</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>out-law-ry</t></w>
<w><t>out-lay</t></w>
<w><t>out-lay-ing</t></w>
@@ -112895,6 +112898,7 @@
<w><t>o-ver-lay</t></w>
<w><t>o-ver-lay-ing</t></w>
<w><t>o-ver-leaf</t></w>
+<w><t>o-ver-leap</t><verb><regular-root/></verb></w>
<w><t>o-ver-learn</t></w>
<w><t>o-ver-learn-ed</t></w>
<w><t>o-ver-leg-is-late</t></w>
@@ -115222,7 +115226,7 @@
<w><t>pa-rag-o-nit-ic</t></w>
<w><t>par-a-gon-less</t></w>
<w><t>Par-a-gould</t></w>
-<w><t>par-a-graph</t></w>
+<w><t>par-a-graph</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>par-a-graph-er</t></w>
<w><t>par-a-graph-i-a</t></w>
<w><t>par-a-graph-ic</t></w>
@@ -115595,7 +115599,7 @@
<w><t>par-ley-er</t></w>
<w><t>par-ley-ing</t></w>
<w><t>parl-ey-voo</t></w>
-<w><t>par-lia-ment</t></w>
+<w><t>par-lia-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Par-lia-ment</t></w>
<w><t>par-lia-men-tar-i-an</t></w>
<w><t>Par-lia-men-tar-i-an</t></w>
@@ -116005,7 +116009,8 @@
<w><t>pas-si-vate</t></w>
<w><t>pas-si-vat-ed</t></w>
<w><t>pas-si-vat-ing</t></w>
-<w><t>pas-sive</t></w>
+<w><t>pas-sive</t><adjective><extensible value="false"/></adjective></w>
+<w><t>pas-sive-ly</t><adverb/></w>
<phrase><t>pas-sive o-be-di-ence</t></phrase>
<phrase><t>pas-sive re-sist-ance</t></phrase>
<w><t>pas-siv-ism</t></w>
@@ -116240,7 +116245,7 @@
<w><t>Pa-trice</t></w>
<w><t>pa-tri-ces</t></w>
<w><t>Pa-tri-cia</t></w>
-<w><t>pa-tri-cian</t></w>
+<w><t>pa-tri-cian</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pa-tri-cian-hood</t></w>
<w><t>pa-tri-cian-ism</t></w>
<w><t>pa-tri-cian-ly</t></w>
@@ -116261,7 +116266,7 @@
<w><t>pat-ri-mo-ni-al</t></w>
<w><t>pat-ri-mo-ni-al-ly</t></w>
<w><t>pat-ri-mo-ny</t></w>
-<w><t>pa-tri-ot</t></w>
+<w><t>pa-tri-ot</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pa-tri-ot-ic</t></w>
<w><t>pa-tri-ot-i-cal-ly</t></w>
<w><t>pat-ri-ot-ism</t></w>
@@ -116285,7 +116290,7 @@
<w><t>pat-ro-log-ic</t></w>
<w><t>pa-trol-o-gy</t></w>
<phrase><t>pa-trol wag-on</t></phrase>
-<w><t>pa-tron</t></w>
+<w><t>pa-tron</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pat-ron-age</t></w>
<w><t>pa-tron-al</t></w>
<w><t>pa-tron-dom</t></w>
@@ -116402,7 +116407,7 @@
<w><t>pav-an</t></w>
<w><t>pa-van</t></w>
<w><t>pa-vane</t></w>
-<w><t>pave</t></w>
+<w><t>pave</t><verb><regular-root/></verb></w>
<w><t>pa-vé</t></w>
<w><t>pave-ment</t></w>
<w><t>pav-er</t></w>
@@ -116461,7 +116466,7 @@
<w><t>pay-ing</t></w>
<phrase><t>pay-ing guest</t></phrase>
<w><t>pay-load</t></w>
-<w><t>pay-mas-ter</t></w>
+<w><t>pay-mas-ter</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pay-mas-ter-ship</t></w>
<w><t>pay-ment</t></w>
<w><t>Payne</t></w>
@@ -117174,7 +117179,7 @@
<w><t>pen-sile</t></w>
<w><t>pen-sile-ness</t></w>
<w><t>pen-sil-i-ty</t></w>
-<w><t>pen-sion</t></w>
+<w><t>pen-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pen-sion-a-ble</t></w>
<w><t>pen-sion-a-bly</t></w>
<w><t>pen-sion-ar-ies</t></w>
@@ -118054,7 +118059,7 @@
<w><t>per-si-flage</t></w>
<w><t>per-sim-mon</t></w>
<w><t>Per-sis</t></w>
-<w><t>per-sist</t></w>
+<w><t>per-sist</t><verb><regular-root/></verb></w>
<w><t>per-sis-tence</t></w>
<w><t>per-sist-en-cy</t></w>
<w><t>per-sis-ten-cy</t></w>
@@ -118071,7 +118076,7 @@
<w><t>per-son-a-ble-ness</t></w>
<w><t>per-son-a-bly</t></w>
<w><t>Per-so-nae</t></w>
-<w><t>per-son-age</t></w>
+<w><t>per-son-age</t><noun><pluralizable/></noun></w>
<phrase><t>per-so-na gra-ta</t></phrase>
<w><t>per-son-al</t></w>
<phrase><t>per-son-al col-umn</t></phrase>
@@ -118201,7 +118206,7 @@
<phrase><t>Pe-ru-vi-an bark</t></phrase>
<w><t>Pe-ruz-zi</t></w>
<w><t>perv</t></w>
-<w><t>per-vade</t></w>
+<w><t>per-vade</t><verb><regular-root/></verb></w>
<w><t>per-vad-ed</t></w>
<w><t>per-vad-ing</t></w>
<w><t>per-vad-ing-ness</t></w>
@@ -118250,7 +118255,7 @@
<w><t>pes-si-mism</t></w>
<w><t>pes-si-mist</t></w>
<w><t>Pest</t></w>
-<w><t>pest</t></w>
+<w><t>pest</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Pes-ta-loz-zi</t></w>
<w><t>Pes-ta-loz-zi-an-ism</t></w>
<w><t>pes-ter</t></w>
@@ -119963,7 +119968,7 @@
<w><t>pinch-er</t></w>
<w><t>Pin-chot</t></w>
<w><t>pinch-pen-ny</t></w>
-<w><t>Pinck-ney</t></w>
+<w><t>Pinck-ney</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Pinck-ney-ville</t></w>
<w><t>pin-cush-ion</t></w>
<w><t>pin-cush-ion=flow-er</t></w>
@@ -120167,7 +120172,7 @@
<w><t>pi-quet</t></w>
<w><t>piqu-ing</t></w>
<w><t>pi-ra-</t></w>
-<w><t>pi-ra-cy</t></w>
+<w><t>pi-ra-cy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Pi-rae-us</t></w>
<w><t>pi-ra-gua</t></w>
<w><t>Pi-ran-del-lo</t></w>
@@ -120518,7 +120523,7 @@
<w><t>plan-er</t></w>
<phrase><t>plane sail-ing</t></phrase>
<phrase><t>plane sur-vey-ing</t></phrase>
-<w><t>plan-et</t></w>
+<w><t>plan-et</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>plane ta-ble</t></phrase>
<w><t>plan-e-tar-i-a</t></w>
<w><t>plan-e-tar-i-um</t></w>
@@ -120850,7 +120855,7 @@
<w><t>pleas-ur-a-ble</t></w>
<w><t>pleas-ur-a-ble-ness</t></w>
<w><t>pleas-ur-a-bly</t></w>
-<w><t>pleas-ure</t></w>
+<w><t>pleas-ure</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pleas-ured</t></w>
<w><t>pleas-ure-ful</t></w>
<w><t>pleas-ure-less</t></w>
@@ -120862,7 +120867,7 @@
<w><t>pleb</t></w>
<w><t>pleb-by</t></w>
<w><t>plebe</t></w>
-<w><t>ple-be-ian</t></w>
+<w><t>ple-be-ian</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ple-be-ian-ise</t></w>
<w><t>ple-be-ian-ised</t></w>
<w><t>ple-be-ian-is-ing</t></w>
@@ -121259,7 +121264,7 @@
<w><t>po-chette</t></w>
<w><t>pock</t></w>
<w><t>pocked</t></w>
-<w><t>pock-et</t></w>
+<w><t>pock-et</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>pock-et bat-tle-ship</t></phrase>
<phrase><t>pock-et bil-liards</t></phrase>
<w><t>pock-et-book</t></w>
@@ -121447,7 +121452,7 @@
<w><t>poised</t></w>
<w><t>pois-er</t></w>
<w><t>pois-ing</t></w>
-<w><t>poi-son</t></w>
+<w><t>poi-son</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>poi-son dog-wood</t></phrase>
<w><t>poi-son-er</t></w>
<phrase><t>poi-son gas</t></phrase>
@@ -122435,7 +122440,7 @@
<w><t>pos-ses-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pos-ses-sive</t></w>
<w><t>pos-ses-sive-ness</t></w>
-<w><t>pos-ses-sor</t></w>
+<w><t>pos-ses-sor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pos-ses-so-ri-ness</t></w>
<w><t>pos-ses-sor-ship</t></w>
<w><t>pos-ses-so-ry</t></w>
@@ -123203,6 +123208,7 @@
<w><t>praam</t></w>
<w><t>pra-cha-rak</t></w>
<w><t>prac-tic</t></w>
+<w><t>prac-ti-ca-bil-i-ty</t><noun/></w>
<w><t>prac-ti-ca-ble</t></w>
<w><t>prac-ti-cal</t></w>
<w><t>prac-ti-cal-i-ty</t></w>
@@ -123726,7 +123732,7 @@
<w><t>pre-cau-tion-al</t></w>
<w><t>pre-cau-tion-ar-y</t></w>
<w><t>pre-cau-tious</t></w>
-<w><t>pre-cede</t></w>
+<w><t>pre-cede</t><verb><regular-root/></verb></w>
<w><t>pre-ced-ed</t></w>
<w><t>prec-e-dence</t></w>
<w><t>prec-e-den-cy</t></w>
@@ -127244,7 +127250,7 @@
<w><t>pro-bar-gain-ing</t></w>
<w><t>pro-base-ball</t></w>
<w><t>pro-bas-ket-ball</t></w>
-<w><t>pro-bate</t></w>
+<w><t>pro-bate</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pro-bat-ed</t></w>
<w><t>pro-bat-ing</t></w>
<w><t>pro-ba-tion</t></w>
@@ -127317,7 +127323,7 @@
<w><t>pro-cen-sure</t></w>
<w><t>pro-cen-tral-i-za-tion</t></w>
<w><t>pro-ce-phal-ic</t></w>
-<w><t>proc-ess</t></w>
+<w><t>proc-ess</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>pro-cess</t></w>
<w><t>pro-ces-sion</t></w>
<w><t>pro-ces-sion-al</t></w>
@@ -127540,7 +127546,7 @@
<w><t>pro-fess</t><verb><regular-root/></verb></w>
<w><t>pro-fessed</t><adjective><extensible value="false"/></adjective></w>
<w><t>pro-fess-ed-ly</t><adverb/></w>
-<w><t>pro-fes-sion</t></w>
+<w><t>pro-fes-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pro-fes-sion-al</t></w>
<w><t>pro-fes-sion-al-ise</t></w>
<w><t>pro-fes-sion-al-ised</t></w>
@@ -128117,7 +128123,7 @@
<w><t>pro-pos-a-ble</t></w>
<w><t>pro-pos-al</t></w>
<w><t>...
[truncated message content] |
|
From: <vic...@us...> - 2023-08-28 11:22:01
|
Revision: 13188
http://sourceforge.net/p/foray/code/13188
Author: victormote
Date: 2023-08-28 11:21:58 +0000 (Mon, 28 Aug 2023)
Log Message:
-----------
Minor cleanup related to writing system.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Country4a.java
trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java
trunk/foray/foray-common/src/test/java/org/foray/common/i18n/WritingSystem4aTests.java
trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/prop/PdCountry.java
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Country4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Country4a.java 2023-08-28 11:20:18 UTC (rev 13187)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Country4a.java 2023-08-28 11:21:58 UTC (rev 13188)
@@ -42,7 +42,7 @@
public final class Country4a implements Country {
/** A country code suitable for cases where one is not known. */
- public static final Country4a UNDETERMINED;
+ public static final Country4a NOT_SPECIFIED;
/** The United States of America. */
public static final Country4a USA;
@@ -98,9 +98,9 @@
/* Checkstyle: Allow Magic Numbers that are hard-coded data. */
static {
- /* For "UNDETERMINED," the 2-character, 3-character, and numeric values were assigned by FOray for its own
+ /* For "NOT SPECIFIED," the 2-character, 3-character, and numeric values were assigned by FOray for its own
* convenience, and are not sanctioned by any standards-setting body. */
- Country4a.register(new Country4a("UNDETERMINED", "ZZ", "ZZZ", Short.MAX_VALUE));
+ Country4a.register(new Country4a("NOT SPECIFIED", "ZZ", "ZZZ", Short.MAX_VALUE));
Country4a.register(new Country4a("AALAND ISLANDS", "AX", "ALA", (short) 248));
Country4a.register(new Country4a("AFGHANISTAN", "AF", "AFG", (short) 4));
@@ -345,7 +345,7 @@
Country4a.register(new Country4a("ZAMBIA", "ZM", "ZMB", (short) 894));
Country4a.register(new Country4a("ZIMBABWE", "ZW", "ZWE", (short) 716));
- UNDETERMINED = Country4a.findFrom3Char("ZZZ");
+ NOT_SPECIFIED = Country4a.findFrom3Char("ZZZ");
USA = Country4a.findFrom3Char("USA");
FINLAND = Country4a.findFrom3Char("FIN");
HUNGARY = Country4a.findFrom3Char("HUN");
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java 2023-08-28 11:20:18 UTC (rev 13187)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java 2023-08-28 11:21:58 UTC (rev 13188)
@@ -100,7 +100,7 @@
private int hash;
/**
- * Private constructor. Use {@link #find(Language, Country, Script)} to obtain an instance of this class.
+ * Private constructor. Use {@link #find(Language, Script, Country)} to obtain an instance of this class.
* @param language The language.
* @param country The country.
* @param script The script.
@@ -129,7 +129,7 @@
throw new NullPointerException("Language cannot be null.");
}
- final Country4a countryToUse = country == null ? Country4a.UNDETERMINED : country;
+ final Country4a countryToUse = country == null ? Country4a.NOT_SPECIFIED : country;
Script4a scriptToUse = script == null ? language.getDefaultScript(countryToUse) : script;
scriptToUse = scriptToUse == null ? Script4a.UNDETERMINED : scriptToUse;
@@ -396,26 +396,10 @@
}
}
- /* Country must either match or both be null. */
- final Country thisCountry = this.country;
- final Country otherCountry = other.getCountry();
- if (thisCountry == null) {
- if (otherCountry == null) {
- /* Both are null. Continue. */
- } else {
- return false;
- }
- } else {
- if (otherCountry == null) {
- return false;
- } else {
- if (! thisCountry.equals(otherCountry)) {
- return false;
- }
- }
- }
-
- return true;
+ /* Treat null as "undetermined", then make sure they match. */
+ final Country thisCountry = this.country == null ? Country4a.NOT_SPECIFIED : this.country;
+ final Country otherCountry = other.getCountry() == null ? Country4a.NOT_SPECIFIED : other.getCountry();
+ return thisCountry.equals(otherCountry);
}
}
Modified: trunk/foray/foray-common/src/test/java/org/foray/common/i18n/WritingSystem4aTests.java
===================================================================
--- trunk/foray/foray-common/src/test/java/org/foray/common/i18n/WritingSystem4aTests.java 2023-08-28 11:20:18 UTC (rev 13187)
+++ trunk/foray/foray-common/src/test/java/org/foray/common/i18n/WritingSystem4aTests.java 2023-08-28 11:21:58 UTC (rev 13188)
@@ -86,7 +86,7 @@
/* Exclude country. */
out = WritingSystem4a.find("eng-Latn");
- parsingAssertions(out, Language4a.ENGLISH, Script4a.LATIN, Country4a.UNDETERMINED);
+ parsingAssertions(out, Language4a.ENGLISH, Script4a.LATIN, Country4a.NOT_SPECIFIED);
/* Exclude script. */
out = WritingSystem4a.find("eng-USA");
Modified: trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/prop/PdCountry.java
===================================================================
--- trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/prop/PdCountry.java 2023-08-28 11:20:18 UTC (rev 13187)
+++ trunk/foray/foray-fotree/src/main/java/org/foray/fotree/fo/prop/PdCountry.java 2023-08-28 11:21:58 UTC (rev 13188)
@@ -85,7 +85,7 @@
}
final DtCountry dtCountry = DtCountry.makeCountryDT(value);
if (dtCountry != null
- && dtCountry.getValue() != Country4a.UNDETERMINED) {
+ && dtCountry.getValue() != Country4a.NOT_SPECIFIED) {
return dtCountry;
}
throw unexpectedValue(value);
@@ -105,7 +105,7 @@
return getValueNoInstance(context, fobj);
}
case NONE: {
- return Country4a.UNDETERMINED;
+ return Country4a.NOT_SPECIFIED;
}
default: {
break;
@@ -130,7 +130,7 @@
if (parent != null) {
return parent.getPropertyList().getCountry(parent, context);
}
- return Country4a.UNDETERMINED;
+ return Country4a.NOT_SPECIFIED;
}
@Override
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-08-28 11:20:18 UTC (rev 13187)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-08-28 11:21:58 UTC (rev 13188)
@@ -320,11 +320,22 @@
<explicit-tokens reference="eng-Latn-explicit-tokens"/>
<match-rules reference="eng-Latn-match-rules"/>
<derivative-rules reference="eng-Latn-derivative-patterns"/>
- <dictionary reference="org.foray.eng.Latn.ZZZ"/>
+ <dictionary reference="org.foray.eng.Latn.USA"/>
<hyphenation-patterns reference="hyph-patterns-eng"/>
<derivative-factories reference="eng-Latn-derivatives"/>
</orthography>
+ <orthography language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="GBR">
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
+ <explicit-tokens reference="eng-Latn-explicit-tokens"/>
+ <match-rules reference="eng-Latn-match-rules"/>
+ <derivative-rules reference="eng-Latn-derivative-patterns"/>
+ <dictionary reference="org.foray.eng.Latn.GBR"/>
+ <hyphenation-patterns reference="hyph-patterns-eng"/>
+ <derivative-factories reference="eng-Latn-derivatives"/>
+ </orthography>
+
<orthography language-iso-3char="lat" script-iso-4char="Latn" country-iso-3char="ZZZ">
<lexer class="org.foray.orthography.LexerJavaBreakIterator"
language-iso-3char="lat" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-28 11:20:20
|
Revision: 13187
http://sourceforge.net/p/foray/code/13187
Author: victormote
Date: 2023-08-28 11:20:18 +0000 (Mon, 28 Aug 2023)
Log Message:
-----------
Don't use a dictionary that doesn't match the writing system of this orthography.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2023-08-28 10:03:19 UTC (rev 13186)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2023-08-28 11:20:18 UTC (rev 13187)
@@ -290,10 +290,14 @@
Dictionary adhocDictionary = adhocDictionaries.get(index);
/* Check the referenced dictionary and each of its ancestor dictionaries. */
while (adhocDictionary != null) {
- if (adhocDictionary.getWord(wordChars, 0) == null) {
+ if (adhocDictionary.getWritingSystem().satisfies(this.writingSystem)) {
+ if (adhocDictionary.getWord(wordChars, 0) == null) {
+ adhocDictionary = adhocDictionary.getParentDictionary(this.server);
+ } else {
+ return true;
+ }
+ } else {
adhocDictionary = adhocDictionary.getParentDictionary(this.server);
- } else {
- return true;
}
}
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-28 10:03:21
|
Revision: 13186
http://sourceforge.net/p/foray/code/13186
Author: victormote
Date: 2023-08-28 10:03:19 +0000 (Mon, 28 Aug 2023)
Log Message:
-----------
Conform to aXSL change: DTD change to tie a WritingSystem directory to an orthography.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-08-28 09:28:40 UTC (rev 13185)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-08-28 10:03:19 UTC (rev 13186)
@@ -314,7 +314,7 @@
</unparsed-hyphenation-patterns>
</hyphenation-patterns-resource>
- <configuration>
+ <orthography language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA">
<lexer class="org.foray.orthography.LexerJavaBreakIterator"
language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
<explicit-tokens reference="eng-Latn-explicit-tokens"/>
@@ -323,43 +323,38 @@
<dictionary reference="org.foray.eng.Latn.ZZZ"/>
<hyphenation-patterns reference="hyph-patterns-eng"/>
<derivative-factories reference="eng-Latn-derivatives"/>
- <orthography language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
- <orthography language-iso-3char="eng" script-iso-4char="Zyyy" country-iso-3char="USA"/>
- <orthography language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
- <orthography language-iso-3char="eng" script-iso-4char="Zyyy" country-iso-3char="ZZZ"/>
- </configuration>
+ </orthography>
- <configuration>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
+ <orthography language-iso-3char="lat" script-iso-4char="Latn" country-iso-3char="ZZZ">
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="lat" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
<match-rules reference="lat-Latn-match-rules"/>
<dictionary reference="org.foray.lat.Latn.ZZZ"/>
- <orthography language-iso-3char="lat" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
- </configuration>
+ </orthography>
- <configuration>
+ <orthography language-iso-3char="ita" script-iso-4char="Latn" country-iso-3char="ZZZ">
<lexer class="org.foray.orthography.LexerJavaBreakIterator"
language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
<dictionary reference="org.foray.ita.Latn.ZZZ"/>
- <orthography language-iso-3char="ita" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
- </configuration>
+ </orthography>
- <configuration>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
+ <orthography language-iso-3char="fre" script-iso-4char="Latn" country-iso-3char="ZZZ">
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="fre" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
<derivative-rules reference="fre-Latn-derivative-patterns"/>
<dictionary reference="org.foray.fre.Latn.ZZZ"/>
- <orthography language-iso-3char="fre" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
- </configuration>
+ </orthography>
- <configuration>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
+ <orthography language-iso-3char="grc" script-iso-4char="Latn" country-iso-3char="ZZZ">
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="grc" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
<dictionary reference="org.foray.grc.Latn.ZZZ"/>
- <orthography language-iso-3char="grc" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
- </configuration>
+ </orthography>
- <configuration>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
+ <orthography language-iso-3char="heb" script-iso-4char="Latn" country-iso-3char="ZZZ">
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="heb" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
<dictionary reference="org.foray.heb.Latn.ZZZ"/>
- <orthography language-iso-3char="heb" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
- </configuration>
+ </orthography>
</axsl-orthography-config>
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2023-08-28 09:28:40 UTC (rev 13185)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2023-08-28 10:03:19 UTC (rev 13186)
@@ -28,6 +28,7 @@
package org.foray.orthography;
+import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.primitive.CharSequenceUtils;
import org.foray.orthography.wrapper.CapitalizedWord;
import org.foray.orthography.wrapper.ExactWord;
@@ -84,12 +85,17 @@
/** The explicit tokens for this orthography. */
private ExplicitTokens explicitTokens;
+ /** The writing system for this orthography. */
+ private WritingSystem4a writingSystem;
+
/**
* Constructor.
* @param server The parent hyphenation server.
+ * @param writingSystem The writing system for this orthography.
*/
- public Orthography4a(final OrthographyServer4a server) {
+ public Orthography4a(final OrthographyServer4a server, final WritingSystem4a writingSystem) {
this.server = server;
+ this.writingSystem = writingSystem;
}
/**
@@ -525,4 +531,12 @@
return this.explicitTokens.findToken(sequence, start);
}
+ /**
+ * Returns the writing system for this orthography.
+ * @return The writing system for this orthography.
+ */
+ public WritingSystem4a getWritingSystem() {
+ return this.writingSystem;
+ }
+
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2023-08-28 09:28:40 UTC (rev 13185)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2023-08-28 10:03:19 UTC (rev 13186)
@@ -321,12 +321,10 @@
/* All processing is done at endElement. */
return;
}
- case "configuration": {
- this.currentOrthographyConfig = new Orthography4a(this.hyphenationServer);
- return;
- }
case "orthography": {
- parseElementOrthography(attributes);
+ final WritingSystem4a writingSystem = parseWritingSystem(attributes);
+ this.currentOrthographyConfig = new Orthography4a(this.hyphenationServer, writingSystem);
+ this.hyphenationServer.registerOrthography(writingSystem, this.currentOrthographyConfig);
return;
}
case "noun": {
@@ -433,10 +431,11 @@
}
/**
- * Parses the "orthography" element.
+ * Parses the writing system from "orthography" attributes.
* @param attributes The raw parsed attributes.
+ * @return The parsed writing system.
*/
- private void parseElementOrthography(final Attributes attributes) {
+ private WritingSystem4a parseWritingSystem(final Attributes attributes) {
final String languageString = attributes.getValue("language-iso-3char");
final String countryString = attributes.getValue("country-iso-3char");
final String scriptString = attributes.getValue("script-iso-4char");
@@ -456,7 +455,7 @@
if (writingSystem == null) {
errorMessage("Unable to find script for: {}_{}_{}", languageString, scriptString, countryString);
}
- this.hyphenationServer.registerOrthography(writingSystem, this.currentOrthographyConfig);
+ return writingSystem;
}
@Override
@@ -603,13 +602,10 @@
case "unparsed-hyphenation-patterns": {
return;
}
- case "configuration": {
+ case "orthography": {
this.currentOrthographyConfig = null;
return;
}
- case "orthography": {
- return;
- }
case "explicit-token-list": {
this.currentExplicitTokens = null;
return;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-28 09:28:41
|
Revision: 13185
http://sourceforge.net/p/foray/code/13185
Author: victormote
Date: 2023-08-28 09:28:40 +0000 (Mon, 28 Aug 2023)
Log Message:
-----------
Conform to aXSL change: Add method indicating whether one WritingSystem can satisfy the requirements of another.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Country4a.java
trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Language4a.java
trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Script4a.java
trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Country4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Country4a.java 2023-08-27 13:13:13 UTC (rev 13184)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Country4a.java 2023-08-28 09:28:40 UTC (rev 13185)
@@ -498,6 +498,9 @@
@Override
public boolean equals(final Object other) {
+ if (other == null) {
+ return false;
+ }
if (! (other instanceof Country)) {
return false;
}
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Language4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Language4a.java 2023-08-27 13:13:13 UTC (rev 13184)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Language4a.java 2023-08-28 09:28:40 UTC (rev 13185)
@@ -850,6 +850,9 @@
@Override
public boolean equals(final Object other) {
+ if (other == null) {
+ return false;
+ }
if (! (other instanceof Language)) {
return false;
}
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Script4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Script4a.java 2023-08-27 13:13:13 UTC (rev 13184)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Script4a.java 2023-08-28 09:28:40 UTC (rev 13185)
@@ -636,6 +636,9 @@
@Override
public boolean equals(final Object other) {
+ if (other == null) {
+ return false;
+ }
if (! (other instanceof Script)) {
return false;
}
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java 2023-08-27 13:13:13 UTC (rev 13184)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java 2023-08-28 09:28:40 UTC (rev 13185)
@@ -105,10 +105,10 @@
* @param country The country.
* @param script The script.
*/
- private WritingSystem4a(final Language4a language, final Country4a country, final Script4a script) {
+ private WritingSystem4a(final Language4a language, final Script4a script, final Country4a country) {
this.language = language;
+ this.script = script;
this.country = country;
- this.script = script;
/* Cache the hash code for performance. */
this.hash = ObjectUtils.HASH_CODE_SEED;
@@ -125,9 +125,12 @@
* @return The writing system for the given parameters.
*/
public static WritingSystem4a find(final Language4a language, final Script4a script, final Country4a country) {
- final Language4a languageToUse = language == null ? Language4a.UNDETERMINED : language;
+ if (language == null) {
+ throw new NullPointerException("Language cannot be null.");
+ }
+
final Country4a countryToUse = country == null ? Country4a.UNDETERMINED : country;
- Script4a scriptToUse = script == null ? languageToUse.getDefaultScript(countryToUse) : script;
+ Script4a scriptToUse = script == null ? language.getDefaultScript(countryToUse) : script;
scriptToUse = scriptToUse == null ? Script4a.UNDETERMINED : scriptToUse;
Map<Language4a, Map<Country4a, WritingSystem4a>> scriptMap = REGISTRATION_MAP.get(scriptToUse);
@@ -135,14 +138,14 @@
scriptMap = new HashMap<Language4a, Map<Country4a, WritingSystem4a>>();
REGISTRATION_MAP.put(scriptToUse, scriptMap);
}
- Map<Country4a, WritingSystem4a> languageMap = scriptMap.get(languageToUse);
+ Map<Country4a, WritingSystem4a> languageMap = scriptMap.get(language);
if (languageMap == null) {
languageMap = new HashMap<Country4a, WritingSystem4a>();
- scriptMap.put(languageToUse, languageMap);
+ scriptMap.put(language, languageMap);
}
WritingSystem4a writingSystem = languageMap.get(countryToUse);
if (writingSystem == null) {
- writingSystem = new WritingSystem4a(languageToUse, countryToUse, scriptToUse);
+ writingSystem = new WritingSystem4a(language, scriptToUse, countryToUse);
languageMap.put(countryToUse, writingSystem);
}
return writingSystem;
@@ -358,4 +361,61 @@
return null;
}
+ @Override
+ public boolean satisfies(final WritingSystem other) {
+ if (other == null) {
+ return false;
+ }
+
+ /* Language must match. */
+ if (this.language == null
+ || other.getLanguage() == null) {
+ return false;
+ }
+ if (! this.language.equals(other.getLanguage())) {
+ return false;
+ }
+
+ /* Script must either match or both be null. */
+ final Script thisScript = this.script == null ? this.language.getDefaultScript(this.country) : this.script;
+ final Script otherScript = other.getScript() == null ?
+ other.getLanguage().getDefaultScript(other.getCountry()) : other.getScript();
+ if (thisScript == null) {
+ if (otherScript == null) {
+ /* Both are null. Continue. */
+ } else {
+ return false;
+ }
+ } else {
+ if (otherScript == null) {
+ return false;
+ } else {
+ if (! thisScript.equals(otherScript)) {
+ return false;
+ }
+ }
+ }
+
+ /* Country must either match or both be null. */
+ final Country thisCountry = this.country;
+ final Country otherCountry = other.getCountry();
+ if (thisCountry == null) {
+ if (otherCountry == null) {
+ /* Both are null. Continue. */
+ } else {
+ return false;
+ }
+ } else {
+ if (otherCountry == null) {
+ return false;
+ } else {
+ if (! thisCountry.equals(otherCountry)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-27 13:13:15
|
Revision: 13184
http://sourceforge.net/p/foray/code/13184
Author: victormote
Date: 2023-08-27 13:13:13 +0000 (Sun, 27 Aug 2023)
Log Message:
-----------
Clean up variable names and doc. Change "toString" concepts to match the xml:lang attribute.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java 2023-08-27 03:12:55 UTC (rev 13183)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java 2023-08-27 13:13:13 UTC (rev 13184)
@@ -140,12 +140,12 @@
languageMap = new HashMap<Country4a, WritingSystem4a>();
scriptMap.put(languageToUse, languageMap);
}
- WritingSystem4a orthography = languageMap.get(countryToUse);
- if (orthography == null) {
- orthography = new WritingSystem4a(languageToUse, countryToUse, scriptToUse);
- languageMap.put(countryToUse, orthography);
+ WritingSystem4a writingSystem = languageMap.get(countryToUse);
+ if (writingSystem == null) {
+ writingSystem = new WritingSystem4a(languageToUse, countryToUse, scriptToUse);
+ languageMap.put(countryToUse, writingSystem);
}
- return orthography;
+ return writingSystem;
}
/**
@@ -250,56 +250,49 @@
return count;
}
+
/**
- * Indicates whether the language in the orthography contains a valid 3-character alpha code.
- * @param orthography The orthography to be tested.
+ * Returns the xml:lang value for a given writing system.
+ * For example, the value for English (with no script or country specifier) would be "eng".
+ * For English with country specifier United States, the value would be "eng-USA".
+ * If the Latin script were specified, the value would be "eng-Latn-USA".
+ *
+ * @param writingSystem The writing system to be stringified.
* Note that this does not need to be an instance of this class, but can be any instance of {@link WritingSystem}.
- * @return True if and only if the orthography contains a language with a valid 3-character alpha code.
+ * @return The xml:lang value for {@code writingSystem}.
*/
- public static boolean is3CharacterLanguageCodeValid(final WritingSystem orthography) {
- if (orthography == null
- || (orthography.getLanguage().getAlpha3Code() != null
- && orthography.getLanguage().getAlpha3Code().equals(Language4a.UNDETERMINED.getAlpha3Code()))) {
- return false;
+ public static String getXmlLangValue(final WritingSystem writingSystem) {
+ if (writingSystem == null) {
+ return StringUtils.EMPTY_STRING;
}
- return true;
- }
+ final StringBuilder builder = new StringBuilder();
- /**
- * Indicates whether the country in the orthography contains a valid 3-character alpha code.
- * @param orthography The orthography to be tested.
- * Note that this does not need to be an instance of this class, but can be any instance of {@link WritingSystem}.
- * @return True if and only if the orthography contains a country with a valid 3-character alpha code.
- */
- public static boolean is3CharacterCountryCodeValid(final WritingSystem orthography) {
- if (orthography == null
- || (orthography.getCountry().getAlpha3Code() != null
- && orthography.getCountry().getAlpha3Code().equals(Country4a.UNDETERMINED.getAlpha3Code()))) {
- return false;
+ final Language language = writingSystem.getLanguage();
+ if (language != null
+ && language.getAlpha3Code() != null
+ && language.getAlpha3Code().length() > 0) {
+ builder.append(language.getAlpha3Code());
+ } else {
+ /* There must be at least a valid country code. */
+ return StringUtils.EMPTY_STRING;
}
- return true;
- }
- /**
- * Returns the concatenation of the 3-character language code, the underscore character ("_"), and the 3-character
- * country code for a given orthography.
- * For example, for U.S. English, the returned code should be "eng_USA".
- *
- * @param orthography The orthography containing the language and country information.
- * Note that this does not need to be an instance of this class, but can be any instance of {@link WritingSystem}.
- * @return The concatenation of the language and country code for {@code orthography}.
- */
- public static String get3CharacterLanguageCountryCode(final WritingSystem orthography) {
- if (! WritingSystem4a.is3CharacterLanguageCodeValid(orthography)) {
- return null;
+ final Script script = writingSystem.getScript();
+ if (script != null
+ && script.getAlphaCode() != null
+ && script.getAlphaCode().length() > 0) {
+ builder.append("-");
+ builder.append(script.getAlphaCode());
}
- if (! WritingSystem4a.is3CharacterCountryCodeValid(orthography)) {
- return null;
+
+ final Country country = writingSystem.getCountry();
+ if (country != null
+ && country.getAlpha3Code() != null
+ && country.getAlpha3Code().length() > 0) {
+ builder.append("-");
+ builder.append(country.getAlpha3Code());
}
- final StringBuilder builder = new StringBuilder();
- builder.append(orthography.getLanguage().getAlpha3Code());
- builder.append("_");
- builder.append(orthography.getCountry().getAlpha3Code());
+
return builder.toString();
}
@@ -311,10 +304,10 @@
if (! (other instanceof WritingSystem)) {
return false;
}
- final WritingSystem otherOrthography = (WritingSystem) other;
- if (this.language.equals(otherOrthography.getLanguage())
- && this.country.equals(otherOrthography.getCountry())
- && this.script.equals(otherOrthography.getScript())) {
+ final WritingSystem otherWritingSystem = (WritingSystem) other;
+ if (this.language.equals(otherWritingSystem.getLanguage())
+ && this.country.equals(otherWritingSystem.getCountry())
+ && this.script.equals(otherWritingSystem.getScript())) {
return true;
}
return false;
@@ -327,13 +320,7 @@
@Override
public String toString() {
- final StringBuilder builder = new StringBuilder();
- builder.append(this.language.toString());
- builder.append("-");
- builder.append(this.script.toString());
- builder.append("-");
- builder.append(this.country.toString());
- return builder.toString();
+ return getXmlLangValue(this);
}
/**
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-27 03:12:57
|
Revision: 13183
http://sourceforge.net/p/foray/code/13183
Author: victormote
Date: 2023-08-27 03:12:55 +0000 (Sun, 27 Aug 2023)
Log Message:
-----------
Conform to aXSL change: Add methods supporting remaining components of the xml:lang attribute.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java 2023-08-27 02:09:30 UTC (rev 13182)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java 2023-08-27 03:12:55 UTC (rev 13183)
@@ -356,4 +356,19 @@
return new ULocale(languageString, countryString);
}
+ @Override
+ public String getVariant() {
+ return null;
+ }
+
+ @Override
+ public String getExtension() {
+ return null;
+ }
+
+ @Override
+ public String getPrivateUse() {
+ return null;
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-27 02:09:35
|
Revision: 13182
http://sourceforge.net/p/foray/code/13182
Author: victormote
Date: 2023-08-27 02:09:30 +0000 (Sun, 27 Aug 2023)
Log Message:
-----------
Conform to aXSL change: Add method to Dictionary to return the WritingSystem which it supports.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/SegmentDictionary.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/SimpleDictionary.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParserText.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionarySerializer.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryWordTests.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/SegmentDictionary.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/SegmentDictionary.java 2023-08-26 22:41:22 UTC (rev 13181)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/SegmentDictionary.java 2023-08-27 02:09:30 UTC (rev 13182)
@@ -29,7 +29,9 @@
package org.foray.orthography;
import org.foray.common.data.TernaryTreeMap;
+import org.foray.common.i18n.WritingSystem4a;
+import org.axsl.i18n.WritingSystem;
import org.axsl.orthography.OrthographyServer;
import org.axsl.orthography.Word.PartOfSpeech;
import org.axsl.orthography.Word.PosQualifier;
@@ -56,6 +58,9 @@
/** The unique id of the dictionary that this dictionary overrides. */
private String overrides;
+ /** The writing system supported by this dictionary. */
+ private WritingSystem4a writingSystem;
+
/** The data structure containing the dictionary words. */
private Map<CharSequence, SegmentDictionaryWord> wordMap;
@@ -71,11 +76,12 @@
* @param id The unique id for this dictionary.
* @param overrides The unique id of the dictionary that this dictionary overrides, if any.
* @param uniqueWordSegments The array of word segments that can be used by words in this dictionary.
+ * @param writingSystem The writing system supported by this dictionary.
* @param initialCapacity The number of words that are expected to be added to this dictionary.
* This will give the dictionary a clue about how best to balance memory and performance issues.
*/
- public SegmentDictionary(final String id, final String overrides, final StringWordSegment[] uniqueWordSegments,
- final int initialCapacity) {
+ public SegmentDictionary(final String id, final String overrides, final WritingSystem4a writingSystem,
+ final StringWordSegment[] uniqueWordSegments, final int initialCapacity) {
if (uniqueWordSegments.length > Character.MAX_VALUE + 1) {
throw new IllegalArgumentException("Size of segments: " + uniqueWordSegments.length +
" exceeds capacity: " + (Character.MAX_VALUE + 1));
@@ -82,6 +88,7 @@
}
this.id = id;
this.overrides = overrides;
+ this.writingSystem = writingSystem;
Arrays.sort(uniqueWordSegments);
this.wordSegments = uniqueWordSegments;
@@ -141,10 +148,12 @@
* However, it is at least useful for tests.
* @param id The unique id for the new dictionary.
* @param overrides The unique id of the dictionary that this dictionary overrides, if any.
+ * @param writingSystem The writing system supported by this dictionary.
* @param stringWords The list of string words that will comprise the dictionary.
* @return The new dictionary instance.
*/
- public static SegmentDictionary make(final String id, final String overrides, final List<StringWord> stringWords) {
+ public static SegmentDictionary make(final String id, final String overrides, final WritingSystem4a writingSystem,
+ final List<StringWord> stringWords) {
final Set<StringWordSegmentUtf16> segments = new HashSet<StringWordSegmentUtf16>(stringWords.size() * 3);
for (int index = 0; index < stringWords.size(); index ++) {
final StringWord stringWord = stringWords.get(index);
@@ -155,7 +164,8 @@
}
final StringWordSegmentUtf16[] segmentArray = new StringWordSegmentUtf16[segments.size()];
segments.toArray(segmentArray);
- final SegmentDictionary dictionary = new SegmentDictionary(id, overrides, segmentArray, stringWords.size());
+ final SegmentDictionary dictionary = new SegmentDictionary(id, overrides, writingSystem, segmentArray,
+ stringWords.size());
for (int index = 0; index < stringWords.size(); index ++) {
final StringWord stringWord = stringWords.get(index);
@@ -291,4 +301,9 @@
return server.getDictionary(this.overrides);
}
+ @Override
+ public WritingSystem getWritingSystem() {
+ return this.writingSystem;
+ }
+
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/SimpleDictionary.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/SimpleDictionary.java 2023-08-26 22:41:22 UTC (rev 13181)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/SimpleDictionary.java 2023-08-27 02:09:30 UTC (rev 13182)
@@ -28,6 +28,9 @@
package org.foray.orthography;
+import org.foray.common.i18n.WritingSystem4a;
+
+import org.axsl.i18n.WritingSystem;
import org.axsl.orthography.OrthographyServer;
import org.axsl.orthography.Word;
import org.axsl.orthography.Word.PartOfSpeech;
@@ -120,4 +123,9 @@
return null;
}
+ @Override
+ public WritingSystem getWritingSystem() {
+ return WritingSystem4a.USA;
+ }
+
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2023-08-26 22:41:22 UTC (rev 13181)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2023-08-27 02:09:30 UTC (rev 13182)
@@ -78,8 +78,8 @@
/** The id of the dictionary that this dictionary overrides, if any. */
private String overrides;
- /** The orthography for this dictionary. */
- private WritingSystem4a orthography;
+ /** The writing system for this dictionary. */
+ private WritingSystem4a writingSystem;
/** The soft hyphen char for this dictionary. */
private char softHyphenChar;
@@ -162,7 +162,8 @@
segmentSet.toArray(uniqueWordSegments);
Arrays.sort(uniqueWordSegments);
final SegmentDictionary dictionary = new SegmentDictionary(this.currentDictionary.id,
- this.currentDictionary.overrides, uniqueWordSegments, this.wordMap.size());
+ this.currentDictionary.overrides, this.currentDictionary.writingSystem, uniqueWordSegments,
+ this.wordMap.size());
for (Map.Entry<String, StringWord> entry : this.wordMap.entrySet()) {
dictionary.addWord(entry.getKey(), entry.getValue());
@@ -306,8 +307,8 @@
final String country = attributes.getValue(StringUtils.EMPTY_STRING, "country");
final String script = attributes.getValue(StringUtils.EMPTY_STRING, "script");
- this.currentDictionary.orthography = WritingSystem4a.find(language, script, country);
- debugMessage("Begin dictionary word list parsing: " + this.currentDictionary.orthography.toString());
+ this.currentDictionary.writingSystem = WritingSystem4a.find(language, script, country);
+ debugMessage("Begin dictionary word list parsing: " + this.currentDictionary.writingSystem.toString());
final String soft = attributes.getValue(StringUtils.EMPTY_STRING, "soft-hyphen-char");
if (soft.length() != 1) {
throw new SAXException("Attribute soft-hyphen-char must have exactly one char.");
@@ -318,7 +319,7 @@
throw new SAXException("Attribute hard-hyphen-char must have exactly one char.");
}
this.currentDictionary.hardHyphenChar = hard.charAt(0);
- final Locale locale = this.currentDictionary.orthography.toLocale();
+ final Locale locale = this.currentDictionary.writingSystem.toLocale();
if (locale != null) {
this.collator = Collator.getInstance(locale);
this.collator.setDecomposition(Collator.FULL_DECOMPOSITION);
@@ -456,7 +457,7 @@
case "ordinal": break;
case "word-group": break;
case "axsl-dictionary": {
- debugMessage("End parsing for dictionary: " + this.currentDictionary.orthography.toString());
+ debugMessage("End parsing for dictionary: " + this.currentDictionary.writingSystem.toString());
debugMessage("Qty of unique word segments parsed: " + segmentSet.size());
debugMessage("Qty of words parsed: " + wordMap.size());
break;
@@ -529,7 +530,7 @@
} else {
if (this.collator.compare(collatingContent, this.lastWord) < 0) {
warningMessage("Out of sequence (Collator " +
- this.currentDictionary.orthography.toLocale().toString() + "): " + actualContent);
+ this.currentDictionary.writingSystem.toLocale().toString() + "): " + actualContent);
}
this.lastWord = collatingContent;
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParserText.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParserText.java 2023-08-26 22:41:22 UTC (rev 13181)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParserText.java 2023-08-27 02:09:30 UTC (rev 13182)
@@ -28,6 +28,7 @@
package org.foray.orthography.util;
+import org.foray.common.i18n.WritingSystem4a;
import org.foray.orthography.SegmentDictionary;
import org.foray.orthography.StringWord;
import org.foray.orthography.StringWordSegment;
@@ -81,10 +82,12 @@
* Parses a given InputStream and places the parsed information into the dictionary.
* @param inputStream The input stream to parse.
* @param description Description of {@literal inputStream}, useful for user messages.
+ * @param writingSystem The writing system for the dictionary being created.
* @return The parsed dictionary.
* @throws IOException For IO errors during parsing.
*/
- public SegmentDictionary parse(final InputStream inputStream, final String description) throws IOException {
+ public SegmentDictionary parse(final InputStream inputStream, final String description,
+ final WritingSystem4a writingSystem) throws IOException {
logger.info("Begin dictionary word list parsing: " + description);
final InputStreamReader isReader = new InputStreamReader(inputStream);
@@ -158,7 +161,8 @@
final StringWordSegment[] uniqueWordSegments = new StringWordSegment[segmentSet.size()];
segmentSet.toArray(uniqueWordSegments);
Arrays.sort(uniqueWordSegments);
- final SegmentDictionary dictionary = new SegmentDictionary("created", null, uniqueWordSegments, wordMap.size());
+ final SegmentDictionary dictionary = new SegmentDictionary("created", null, writingSystem, uniqueWordSegments,
+ wordMap.size());
for (Map.Entry<String, StringWord> entry : wordMap.entrySet()) {
dictionary.addWord(entry.getKey(), entry.getValue());
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionarySerializer.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionarySerializer.java 2023-08-26 22:41:22 UTC (rev 13181)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionarySerializer.java 2023-08-27 02:09:30 UTC (rev 13182)
@@ -30,6 +30,7 @@
import org.foray.common.ForayConstants;
import org.foray.common.IoUtil;
+import org.foray.common.i18n.WritingSystem4a;
import org.foray.orthography.SegmentDictionary;
import org.apache.commons.cli.CommandLine;
@@ -125,7 +126,10 @@
/* Parse the dictionary. */
try {
final InputStream inputStream = infile.toURI().toURL().openStream();
- dictionary = parser.parse(inputStream, infile.getAbsolutePath());
+ /* TODO: Allow writing mode to be specified upstream. For now users should just manually change it in the
+ * serialized dictionary file. */
+ final WritingSystem4a writingSystem = WritingSystem4a.USA;
+ dictionary = parser.parse(inputStream, infile.getAbsolutePath(), writingSystem);
} catch (final IOException e) {
this.logger.error(e.getMessage());
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryTests.java 2023-08-26 22:41:22 UTC (rev 13181)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryTests.java 2023-08-27 02:09:30 UTC (rev 13182)
@@ -28,6 +28,7 @@
package org.foray.orthography;
+import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.primitive.CharSequenceUtils;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -97,7 +98,7 @@
* */
assertEquals(8, segmentsArray.length);
- out = new SegmentDictionary("test", null, segmentsArray, 1000);
+ out = new SegmentDictionary("test", null, WritingSystem4a.USA, segmentsArray, 1000);
out.addWord(WORD_ATTENTION.getActualContent().toString(), WORD_ATTENTION);
out.addWord(WORD_INTENTION.getActualContent().toString(), WORD_INTENTION);
out.addWord(WORD_AMBITION.getActualContent().toString(), WORD_AMBITION);
@@ -128,7 +129,7 @@
final List<StringWord> words = new ArrayList<StringWord>();
words.add(WORD_ATTENTION);
words.add(WORD_HARMONIOUS);
- final SegmentDictionary dictionary = SegmentDictionary.make("test", null, words);
+ final SegmentDictionary dictionary = SegmentDictionary.make("test", null, WritingSystem4a.USA, words);
assertEquals(2, dictionary.getSize());
assertEquals("at-ten-tion", dictionary.getWord("attention", 0).toString());
assertEquals("har-mo-ni-ous", dictionary.getWord("harmonious", 0).toString());
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryWordTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryWordTests.java 2023-08-26 22:41:22 UTC (rev 13181)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryWordTests.java 2023-08-27 02:09:30 UTC (rev 13182)
@@ -28,6 +28,7 @@
package org.foray.orthography;
+import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.primitive.CharSequenceUtils;
import org.axsl.kp.KpNode;
@@ -63,7 +64,7 @@
words.add(SegmentDictionaryTests.WORD_AMBITION);
words.add(SegmentDictionaryTests.WORD_INTREPID);
words.add(SegmentDictionaryTests.WORD_HARMONIOUS);
- dictionary = SegmentDictionary.make("test", null, words);
+ dictionary = SegmentDictionary.make("test", null, WritingSystem4a.USA, words);
}
/**
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-26 22:41:25
|
Revision: 13181
http://sourceforge.net/p/foray/code/13181
Author: victormote
Date: 2023-08-26 22:41:22 +0000 (Sat, 26 Aug 2023)
Log Message:
-----------
Capture the lemma at the proper time so that its content doesn't bleed into the word text.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2023-08-26 14:57:23 UTC (rev 13180)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2023-08-26 22:41:22 UTC (rev 13181)
@@ -469,7 +469,12 @@
break;
}
case "vf": break;
- case "lemma": break;
+ case "lemma": {
+ /* Not currently doing anything with this. */
+// final String lemma =
+ getAndClearText();
+ break;
+ }
case "regular-root": break;
case "remote-past": break;
case "past": break;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-26 14:57:25
|
Revision: 13180
http://sourceforge.net/p/foray/code/13180
Author: victormote
Date: 2023-08-26 14:57:23 +0000 (Sat, 26 Aug 2023)
Log Message:
-----------
Turn off text accumulation while parsing text surrogate elements.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-08-26 14:26:45 UTC (rev 13179)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2023-08-26 14:57:23 UTC (rev 13180)
@@ -172,7 +172,8 @@
/** The list of elements whose starting tag should never be straddled by a word. */
private List<String> elementStartList = Arrays.asList(new String[] {"Footnote", "Sidenote"});
- /** Map whose key is elements that are actually tokens for text, and whose value is that text. */
+ /** Map whose key is elements that are actually tokens for text, and whose value is that the text that should be
+ * used for purposes of spell-checking. */
private Map<String, String> textElementMap = new HashMap<String, String>();
{
textElementMap.put("Nbsp", "\u00A0");
@@ -180,7 +181,9 @@
textElementMap.put("Zwsp", StringUtils.EMPTY_STRING);
/* For printing purposes, OmittedWord renders as a series of em-dashes. However, for spell-check purposes, we
* need for it to be a pseudo-word to which rules can be applied. */
- textElementMap.put("OmittedWord", "Aaaaaaaaaa");
+ textElementMap.put("OmittedWord", "omitted");
+ textElementMap.put("PageRef", "999");
+ textElementMap.put("Roman", "III");
}
/** The list of ad-hoc dictionaries, usually parsed from the command-line. */
@@ -273,12 +276,10 @@
if (this.textElementMap.containsKey(localName)) {
final String textElementValue = this.textElementMap.get(localName);
appendText(textElementValue);
+ /* Elements that resolve to text might have elements inside of them or*/
+ setTextParsingActive(false);
return;
}
- if ("Roman".equals(localName)) {
- /* This is not accurate, but should serve the purposes of a spell-checker. */
- appendText("III");
- }
final WritingSystem4a oldWritingSystem = getCurrentWritingSystem();
@@ -318,6 +319,7 @@
/* Elements that are surrogates for text were handled in startElement, and should be ignored here. */
if (this.textElementMap.containsKey(localName)) {
+ setTextParsingActive(true);
return;
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-26 14:26:47
|
Revision: 13179
http://sourceforge.net/p/foray/code/13179
Author: victormote
Date: 2023-08-26 14:26:45 +0000 (Sat, 26 Aug 2023)
Log Message:
-----------
Normal dictionary editing.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-08-25 23:13:24 UTC (rev 13178)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-08-26 14:26:45 UTC (rev 13179)
@@ -401,7 +401,7 @@
<w><t>ab-ohm</t></w>
<w><t>a-boi-deau</t></w>
<w><t>a-boi-teau</t></w>
-<w><t>a-bol-ish</t></w>
+<w><t>a-bol-ish</t><verb><regular-root/></verb></w>
<w><t>a-bol-ish-a-ble</t></w>
<w><t>a-bol-ish-er</t></w>
<w><t>a-bol-ish-ment</t></w>
@@ -1389,7 +1389,7 @@
<w><t>ac-quire-ment</t></w>
<w><t>ac-quir-er</t></w>
<w><t>ac-quir-ing</t></w>
-<w><t>ac-qui-si-tion</t></w>
+<w><t>ac-qui-si-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ac-quis-i-tive</t></w>
<w><t>ac-quis-i-tive-ly</t></w>
<w><t>ac-quis-i-tive-ness</t></w>
@@ -1912,7 +1912,7 @@
<w><t>ad-join</t></w>
<w><t>ad-join-ing</t></w>
<w><t>ad-joint</t></w>
-<w><t>ad-journ</t></w>
+<w><t>ad-journ</t><verb><regular-root/></verb></w>
<w><t>ad-journ-ment</t></w>
<w><t>adjt</t></w>
<w><t>ad-judge</t></w>
@@ -4896,7 +4896,7 @@
<w><t>am-bi-ence</t></w>
<w><t>am-bi-enc-es</t></w>
<w><t>am-bi-ent</t></w>
-<w><t>am-bi-gu-i-ty</t></w>
+<w><t>am-bi-gu-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>am-big-u-ous</t></w>
<w><t>am-big-u-ous-ly</t></w>
<w><t>am-big-u-ous-ness</t></w>
@@ -6383,7 +6383,7 @@
<w><t>an-i-ma</t></w>
<w><t>an-i-mad-ver-sion</t></w>
<w><t>an-i-mad-ver-sion-al</t></w>
-<w><t>an-i-mad-vert</t></w>
+<w><t>an-i-mad-vert</t><verb><regular-root/></verb></w>
<w><t>an-i-mad-vert-er</t></w>
<w><t>an-i-mal</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>an-i-mal-cu-lar</t></w>
@@ -8487,7 +8487,7 @@
<w><t>ap-plaud-a-bly</t></w>
<w><t>ap-plaud-er</t></w>
<w><t>ap-plaud-ing-ly</t></w>
-<w><t>ap-plause</t></w>
+<w><t>ap-plause</t><noun><pluralizable/></noun></w>
<w><t>ap-plau-sive</t></w>
<w><t>ap-ple</t></w>
<phrase><t>ap-ple blight</t></phrase>
@@ -8596,7 +8596,7 @@
<w><t>ap-pre-hen-si-bil-i-ty</t></w>
<w><t>ap-pre-hen-si-ble</t></w>
<w><t>ap-pre-hen-si-bly</t></w>
-<w><t>ap-pre-hen-sion</t></w>
+<w><t>ap-pre-hen-sion</t><noun><plural/></noun></w>
<w><t>ap-pre-hen-sive</t></w>
<w><t>ap-pre-hen-sive-ly</t></w>
<w><t>ap-pre-hen-sive-ness</t></w>
@@ -9977,7 +9977,7 @@
<w><t>As-cen-sion-tide</t></w>
<w><t>as-cen-sive</t></w>
<w><t>as-cent</t></w>
-<w><t>as-cer-tain</t></w>
+<w><t>as-cer-tain</t><verb><regular-root/></verb></w>
<w><t>as-cer-tain-a-ble</t></w>
<w><t>as-cer-tain-a-ble-ness</t></w>
<w><t>as-cer-tain-a-bly</t></w>
@@ -11024,7 +11024,7 @@
<w><t>at-tire-ment</t></w>
<w><t>at-tir-ing</t></w>
<w><t>At-tis</t></w>
-<w><t>at-ti-tude</t></w>
+<w><t>at-ti-tude</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>at-ti-tu-di-nal</t></w>
<w><t>at-ti-tu-di-nar-i-an</t></w>
<w><t>at-ti-tu-di-nar-i-an-ism</t></w>
@@ -11983,7 +11983,7 @@
<w><t>Ay-cliffe</t></w>
<w><t>Ay-de-lotte</t></w>
<w><t>Ay-din</t></w>
-<w><t>aye</t></w>
+<w><t>aye</t><noun><pluralizable/></noun></w>
<w><t>aye=aye</t></w>
<w><t>A-ye-sha</t></w>
<w><t>a-yin</t></w>
@@ -14096,7 +14096,7 @@
<w><t>bat-wing</t></w>
<phrase><t>bat-wing sleeve</t></phrase>
<w><t>bat-wom-an</t></w>
-<w><t>bau-ble</t></w>
+<w><t>bau-ble</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Bau-bo</t></w>
<w><t>Bau-chi</t></w>
<w><t>Bau-cis</t></w>
@@ -18684,7 +18684,7 @@
<w><t>boun-cy</t></w>
<w><t>bound</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb><adjective/></w>
<w><t>bound-a-ble</t></w>
-<w><t>bound-a-ry</t></w>
+<w><t>bound-a-ry</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>bound-a-ry lay-er</t></phrase>
<phrase><t>bound-a-ry rid-er</t></phrase>
<w><t>bound-ed</t></w>
@@ -18786,7 +18786,7 @@
<w><t>bowd-ler-iz-ing</t></w>
<w><t>bow-drill</t></w>
<w><t>bowed-ness</t></w>
-<w><t>bow-el</t></w>
+<w><t>bow-el</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>bow-eled</t></w>
<w><t>bow-el-ing</t></w>
<w><t>Bow-ell</t></w>
@@ -23702,7 +23702,7 @@
<w><t>car-rot-y</t></w>
<w><t>car-rou-sel</t></w>
<w><t>car-rus</t></w>
-<w><t>car-ry</t></w>
+<w><t>car-ry</t><verb><regular-root/></verb></w>
<w><t>Car-ry</t></w>
<w><t>car-ry-a-ble</t></w>
<w><t>car-ry-all</t></w>
@@ -28954,7 +28954,7 @@
<w><t>Clo-ë</t></w>
<w><t>Cloe-li-a</t></w>
<w><t>Cloe-te</t></w>
-<w><t>clog</t></w>
+<w><t>clog</t><verb><regular-root/></verb></w>
<w><t>clog-gi-ly</t></w>
<w><t>clog-gi-ness</t></w>
<w><t>clog-ging</t></w>
@@ -30025,7 +30025,7 @@
<w><t>col-lec-ti-vize</t></w>
<w><t>col-lec-ti-vized</t></w>
<w><t>col-lec-ti-viz-ing</t></w>
-<w><t>col-lec-tor</t></w>
+<w><t>col-lec-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>col-lec-tor-ate</t></w>
<w><t>col-lec-to-rate</t></w>
<w><t>col-lec-tor-ship</t></w>
@@ -30965,7 +30965,7 @@
<w><t>com-pet-i-tive</t></w>
<w><t>com-pet-i-tive-ly</t></w>
<w><t>com-pet-i-tive-ness</t></w>
-<w><t>com-pet-i-tor</t></w>
+<w><t>com-pet-i-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>com-pet-i-tor-ship</t></w>
<w><t>com-pet-i-to-ry</t></w>
<w><t>Com-pi</t></w>
@@ -31742,7 +31742,7 @@
<w><t>con-fla-gra-tive</t></w>
<w><t>con-flate</t></w>
<w><t>con-fla-tion</t></w>
-<w><t>con-flict</t></w>
+<w><t>con-flict</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-flict-ing</t><adjective></adjective></w>
<w><t>con-flict-ing-ly</t></w>
<w><t>con-flic-tion</t></w>
@@ -32018,7 +32018,7 @@
<w><t>con-nect-i-ble</t></w>
<w><t>Con-nect-i-cut</t></w>
<phrase><t>con-nect-ing rod</t></phrase>
-<w><t>con-nec-tion</t></w>
+<w><t>con-nec-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-nec-tion-al</t></w>
<w><t>con-nec-tion-ism</t></w>
<w><t>con-nec-tive</t></w>
@@ -32460,7 +32460,7 @@
<w><t>con-tam-i-nate</t></w>
<w><t>con-tam-i-nat-ed</t></w>
<w><t>con-tam-i-nat-ing</t></w>
-<w><t>con-tam-i-na-tion</t></w>
+<w><t>con-tam-i-na-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-tam-i-na-tive</t></w>
<w><t>con-tam-i-na-tor</t></w>
<w><t>con-tam-i-nous</t></w>
@@ -32841,7 +32841,7 @@
<w><t>con-ven-ti-cle</t></w>
<w><t>con-ven-ti-cler</t></w>
<w><t>con-ven-tic-u-lar</t></w>
-<w><t>con-ven-tion</t></w>
+<w><t>con-ven-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-ven-tion-al</t></w>
<w><t>con-ven-tion-al-ise</t></w>
<w><t>con-ven-tion-al-ised</t></w>
@@ -32990,7 +32990,7 @@
<w><t>con-vuls-i-bil-i-ty</t></w>
<w><t>con-vuls-i-ble</t></w>
<w><t>con-vuls-ing</t></w>
-<w><t>con-vul-sion</t></w>
+<w><t>con-vul-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-vul-sion-ar-ies</t></w>
<w><t>con-vul-sion-ar-y</t></w>
<w><t>con-vul-sive</t></w>
@@ -34228,7 +34228,7 @@
<w><t>count-er</t></w>
<w><t>coun-ter</t></w>
<w><t>coun-ter-ac-cu-sa-tion</t></w>
-<w><t>coun-ter-act</t></w>
+<w><t>coun-ter-act</t><verb><regular-root/></verb></w>
<w><t>coun-ter-act-er</t></w>
<w><t>coun-ter-act-ing-ly</t></w>
<w><t>coun-ter-ac-tion</t></w>
@@ -38818,7 +38818,7 @@
<w><t>de-fea-si-bil-i-ty</t></w>
<w><t>de-fea-si-ble</t></w>
<w><t>de-fea-si-ble-ness</t></w>
-<w><t>de-feat</t></w>
+<w><t>de-feat</t><verb><regular-root/></verb></w>
<w><t>de-feat-er</t></w>
<w><t>de-feat-ism</t></w>
<w><t>de-feat-ist</t></w>
@@ -40764,7 +40764,7 @@
<w><t>de-tached-ly</t></w>
<w><t>de-tach-ed-ness</t></w>
<w><t>de-tach-er</t></w>
-<w><t>de-tach-ment</t></w>
+<w><t>de-tach-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-tail</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>de-tail draw-ing</t></phrase>
<w><t>de-tailed</t></w>
@@ -42496,7 +42496,7 @@
<w><t>dis-ap-point-er</t></w>
<w><t>dis-ap-point-ing-ly</t></w>
<w><t>dis-ap-point-ing-ness</t></w>
-<w><t>dis-ap-point-ment</t></w>
+<w><t>dis-ap-point-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dis-ap-pro-ba-tion</t></w>
<w><t>dis-ap-prov-al</t></w>
<w><t>dis-ap-prove</t></w>
@@ -42572,7 +42572,7 @@
<w><t>dis-cant</t></w>
<w><t>dis-cant-er</t></w>
<w><t>dis-can-tus</t></w>
-<w><t>dis-card</t></w>
+<w><t>dis-card</t><verb><regular-root/></verb></w>
<w><t>dis-card-er</t></w>
<w><t>dis-car-nate</t></w>
<w><t>dis-car-na-tion</t></w>
@@ -42692,7 +42692,7 @@
<w><t>dis-con-so-late</t></w>
<w><t>dis-con-so-late-ly</t></w>
<w><t>dis-con-so-la-tion</t></w>
-<w><t>dis-con-tent</t></w>
+<w><t>dis-con-tent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dis-con-tent-ed</t></w>
<w><t>dis-con-tent-ed-ly</t></w>
<w><t>dis-con-tent-ed-ness</t></w>
@@ -48399,7 +48399,7 @@
<w><t>em-bo-ly</t></w>
<w><t>em-bon-point</t></w>
<w><t>em-bosk</t></w>
-<w><t>em-bos-om</t></w>
+<w><t>em-bos-om</t><verb><regular-root/></verb></w>
<w><t>em-boss</t></w>
<w><t>em-boss-er</t></w>
<w><t>em-boss-ment</t></w>
@@ -49441,7 +49441,7 @@
<phrase><t>en-list-ed man</t></phrase>
<w><t>en-list-ee</t></w>
<w><t>en-list-er</t></w>
-<w><t>en-list-ment</t></w>
+<w><t>en-list-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>en-liv-en</t><verb><regular-root/></verb></w>
<w><t>en-liv-en-er</t></w>
<w><t>en-liv-en-ing-ly</t></w>
@@ -52038,7 +52038,7 @@
<w><t>e-vil=mind-ed-ness</t></w>
<w><t>e-vil-ness</t></w>
<phrase><t>E-vil One</t></phrase>
-<w><t>e-vince</t></w>
+<w><t>e-vince</t><verb><regular-root/></verb></w>
<w><t>e-vinced</t></w>
<w><t>e-vin-ci-ble</t></w>
<w><t>e-vinc-ing</t></w>
@@ -52420,7 +52420,7 @@
<w><t>ex-e-cu-tion</t></w>
<w><t>ex-e-cu-tion-al</t></w>
<w><t>ex-e-cu-tion-er</t></w>
-<w><t>ex-ec-u-tive</t></w>
+<w><t>ex-ec-u-tive</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Ex-ec-u-tive Coun-cil</t></phrase>
<w><t>ex-ec-u-tive-ly</t></w>
<w><t>ex-ec-u-tive-ness</t></w>
@@ -52746,7 +52746,7 @@
<w><t>ex-pec-to-ra-tor</t></w>
<w><t>ex-pe-di-ence</t></w>
<w><t>ex-pe-di-en-cy</t></w>
-<w><t>ex-pe-di-ent</t></w>
+<w><t>ex-pe-di-ent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-pe-di-en-tial</t></w>
<w><t>ex-pe-di-ent-ly</t></w>
<w><t>ex-ped-i-tate</t></w>
@@ -57977,7 +57977,7 @@
<w><t>Fo-rum</t></w>
<phrase><t>Fo-rum Ro-ma-num</t></phrase>
<w><t>fo-rums</t></w>
-<w><t>for-ward</t></w>
+<w><t>for-ward</t><verb><regular-root/></verb></w>
<phrase><t>for-ward bi-as</t></phrase>
<phrase><t>for-ward de-liv-er-y</t></phrase>
<w><t>for-ward-er</t></w>
@@ -66843,7 +66843,7 @@
<w><t>hand-y-man</t></w>
<w><t>ha-ne-fi-yeh</t></w>
<w><t>Han-ford</t></w>
-<w><t>hang</t></w>
+<w><t>hang</t><verb><regular-root/></verb></w>
<w><t>hang-a-bil-i-ty</t></w>
<w><t>hang-a-ble</t></w>
<phrase><t>hang a-bout</t></phrase>
@@ -75054,7 +75054,7 @@
<w><t>in-ac-ces-si-ble</t></w>
<w><t>in-ac-ces-si-ble-ness</t></w>
<w><t>in-ac-ces-si-bly</t></w>
-<w><t>in-ac-cu-ra-cy</t></w>
+<w><t>in-ac-cu-ra-cy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-ac-cu-rate</t></w>
<w><t>in-ac-cu-rate-ly</t></w>
<w><t>in-ac-cu-rate-ness</t></w>
@@ -76128,7 +76128,7 @@
<w><t>in-du-bi-ta-ble-ness</t></w>
<w><t>in-du-bi-ta-bly</t></w>
<w><t>induc</t></w>
-<w><t>in-duce</t></w>
+<w><t>in-duce</t><verb><regular-root/></verb></w>
<w><t>in-duced</t></w>
<phrase><t>in-duced drag</t></phrase>
<w><t>in-duce-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -76489,7 +76489,7 @@
<w><t>in-fer-tile-ly</t></w>
<w><t>in-fer-tile-ness</t></w>
<w><t>in-fer-til-i-ty</t></w>
-<w><t>in-fest</t></w>
+<w><t>in-fest</t><verb><regular-root/></verb></w>
<w><t>in-fes-ta-tion</t></w>
<w><t>in-fest-er</t></w>
<w><t>in-feu-da-tion</t></w>
@@ -82155,7 +82155,7 @@
<w><t>ju-rigged</t></w>
<w><t>ju-rig-ging</t></w>
<w><t>ju-ris-con-sult</t></w>
-<w><t>ju-ris-dic-tion</t></w>
+<w><t>ju-ris-dic-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ju-ris-dic-tion-al-ly</t></w>
<w><t>ju-ris-dic-tive</t></w>
<w><t>jurisp</t></w>
@@ -86611,7 +86611,7 @@
<w><t>leg-i-ble</t></w>
<w><t>leg-i-ble-ness</t></w>
<w><t>leg-i-bly</t></w>
-<w><t>le-gion</t></w>
+<w><t>le-gion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>le-gion-ar-ies</t></w>
<w><t>le-gion-ar-y</t></w>
<phrase><t>le-gion-ar-y ant</t></phrase>
@@ -89402,7 +89402,7 @@
<w><t>los-ing</t></w>
<w><t>los-ing-ly</t></w>
<w><t>los-ings</t></w>
-<w><t>loss</t></w>
+<w><t>loss</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>loss lead-er</t></phrase>
<phrase><t>loss ra-ti-o</t></phrase>
<w><t>los-sy</t></w>
@@ -93963,7 +93963,8 @@
<w><t>mem-o-ra-ble</t></w>
<w><t>mem-o-ra-ble-ness</t></w>
<w><t>mem-o-ra-bly</t></w>
-<w><t>mem-o-ran-dum</t></w>
+<w><t>mem-o-ran-da</t><noun><plural/></noun></w>
+<w><t>mem-o-ran-dum</t><noun><singular/></noun></w>
<w><t>me-mo-ri-al</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Me-mo-ri-al Day</t></phrase>
<w><t>me-mo-ri-al-ise</t></w>
@@ -96334,7 +96335,7 @@
<w><t>mis-form</t></w>
<w><t>mis-for-ma-tion</t></w>
<w><t>mis-formed</t></w>
-<w><t>mis-for-tune</t></w>
+<w><t>mis-for-tune</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mis-frame</t></w>
<w><t>mis-framed</t></w>
<w><t>mis-fram-ing</t></w>
@@ -98325,7 +98326,7 @@
<w><t>mo-ti-va-tion-al</t></w>
<phrase><t>mo-ti-va-tion-al re-search</t></phrase>
<w><t>mo-ti-va-tive</t></w>
-<w><t>mo-tive</t></w>
+<w><t>mo-tive</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mo-tive-less</t></w>
<w><t>mo-tive-less-ly</t></w>
<w><t>mo-tive-less-ness</t></w>
@@ -99163,7 +99164,7 @@
<phrase><t>Mur-man Coast</t></phrase>
<w><t>Mur-mansk</t></w>
<phrase><t>Mur-mansk Coast</t></phrase>
-<w><t>mur-mur</t></w>
+<w><t>mur-mur</t><noun><pluralizable/></noun></w>
<w><t>mur-mur-a-tion</t></w>
<w><t>mur-mur-er</t></w>
<w><t>mur-mur-less</t></w>
@@ -101845,7 +101846,7 @@
<w><t>Nnam-di</t></w>
<w><t>NNE</t></w>
<w><t>NNW</t></w>
-<w><t>no</t></w>
+<w><t>no</t><noun><singular/></noun></w>
<w><t>no=ac-count</t></w>
<w><t>No-a-chi-an</t></w>
<w><t>No-ach-ic</t></w>
@@ -101937,6 +101938,7 @@
<w><t>No-el=Ba-ker</t></w>
<w><t>No-e-mi</t></w>
<w><t>No-ë-mon</t></w>
+<w><t>noes</t><noun><plural/></noun><comment>Plural of "no", as when voting.</comment></w>
<w><t>no-e-sis</t></w>
<w><t>no-et-ic</t></w>
<w><t>no-et-ics</t></w>
@@ -108276,7 +108278,7 @@
<w><t>ob-lig-a-to-ri-ly</t></w>
<w><t>ob-lig-a-to-ri-ness</t></w>
<w><t>ob-lig-a-to-ry</t></w>
-<w><t>o-blige</t></w>
+<w><t>o-blige</t><verb><regular-root/></verb></w>
<w><t>o-bliged</t></w>
<w><t>o-blig-ed-ness</t></w>
<w><t>ob-li-gee</t></w>
@@ -108867,7 +108869,7 @@
<w><t>of-fend-ed-ness</t></w>
<w><t>of-fend-er</t></w>
<w><t>of-fend-i-ble</t></w>
-<w><t>of-fense</t></w>
+<w><t>of-fense</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>of-fense-less</t></w>
<w><t>of-fense-less-ly</t></w>
<w><t>of-fen-sive</t></w>
@@ -113717,7 +113719,7 @@
<w><t>o-ver-trust</t></w>
<w><t>o-ver-trust-ful</t></w>
<w><t>o-ver-truth-ful</t></w>
-<w><t>o-ver-ture</t></w>
+<w><t>o-ver-ture</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>o-ver-tured</t></w>
<w><t>o-ver-tur-ing</t></w>
<w><t>o-ver-turn</t></w>
@@ -115835,7 +115837,7 @@
<w><t>par-ti-san-ship</t></w>
<w><t>par-ti-ta</t></w>
<w><t>par-tite</t></w>
-<w><t>par-ti-tion</t></w>
+<w><t>par-ti-tion</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>par-ti-tion-er</t></w>
<w><t>par-ti-tion-ist</t></w>
<w><t>par-ti-tion-ment</t></w>
@@ -120989,7 +120991,7 @@
<w><t>pli-er</t></w>
<w><t>pli-ers</t></w>
<w><t>pli-és</t></w>
-<w><t>plight</t></w>
+<w><t>plight</t><verb><regular-root/></verb></w>
<w><t>plight-er</t></w>
<w><t>pli-kit</t></w>
<w><t>plim</t></w>
@@ -125878,7 +125880,7 @@
<w><t>pre-pos-sess-ing</t></w>
<w><t>pre-pos-sess-ing-ly</t></w>
<w><t>pre-pos-sess-ing-ness</t></w>
-<w><t>pre-pos-ses-sion</t></w>
+<w><t>pre-pos-ses-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pre-pos-ses-sion-ar-y</t></w>
<w><t>pre-pos-ter-ous</t></w>
<w><t>pre-pos-ter-ous-ly</t></w>
@@ -126558,7 +126560,7 @@
<w><t>pre-tem-per-ate</t></w>
<w><t>pre-tempt</t></w>
<w><t>pre-temp-ta-tion</t></w>
-<w><t>pre-tence</t></w>
+<w><t>pre-tence</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pre-tence-less</t></w>
<w><t>pre-tend</t><verb><regular-root/></verb></w>
<w><t>pre-tend-ed</t></w>
@@ -127139,6 +127141,7 @@
<phrase><t>priv-y purse</t></phrase>
<phrase><t>priv-y seal</t></phrase>
<w><t>prize</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>prized</t><adjective><extensible value="false"/></adjective></w>
<w><t>prize-fight</t></w>
<w><t>prize-fight-er</t></w>
<w><t>prize-fight-ing</t></w>
@@ -129527,7 +129530,7 @@
<w><t>pu-er-pe-ri-um</t></w>
<phrase><t>Puer-to Ri-co</t></phrase>
<w><t>Pu-fen-dorf</t></w>
-<w><t>puff</t></w>
+<w><t>puff</t><verb><regular-root/></verb></w>
<phrase><t>puff ad-der</t></phrase>
<w><t>puff-ball</t></w>
<w><t>puff-bird</t></w>
@@ -131697,7 +131700,7 @@
<w><t>quor-um</t></w>
<w><t>quo-rum</t></w>
<w><t>quot</t></w>
-<w><t>quo-ta</t></w>
+<w><t>quo-ta</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>quot-a-ble</t></w>
<w><t>quo-ta-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>quo-ta-tion mark</t></phrase>
@@ -133654,7 +133657,7 @@
<w><t>re-com-menc-ing</t></w>
<w><t>rec-om-mend</t><verb><regular-root/></verb></w>
<w><t>rec-om-mend-a-ble</t></w>
-<w><t>rec-om-men-da-tion</t></w>
+<w><t>rec-om-men-da-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>rec-om-mend-a-to-ry</t></w>
<w><t>rec-om-mend-er</t></w>
<w><t>re-com-mis-sion</t></w>
@@ -133825,7 +133828,7 @@
<w><t>re-coun-sel-ing</t></w>
<w><t>re-coun-selled</t></w>
<w><t>re-coun-sel-ling</t></w>
-<w><t>re-count</t></w>
+<w><t>re-count</t><verb><regular-root/></verb></w>
<w><t>re-count-al</t></w>
<w><t>re-coup</t></w>
<w><t>re-coup-a-ble</t></w>
@@ -135563,6 +135566,7 @@
<phrase><t>rel-a-tive hu-mid-i-ty</t></phrase>
<phrase><t>rel-a-tive ma-jor-i-ty</t></phrase>
<phrase><t>rel-a-tive mo-lec-u-lar mass</t></phrase>
+<w><t>rel-a-tive-ly</t><adverb/></w>
<w><t>rel-a-tive-ness</t></w>
<phrase><t>rel-a-tive per-me-a-bil-i-ty</t></phrase>
<phrase><t>rel-a-tive per-mit-tiv-i-ty</t></phrase>
@@ -135916,7 +135920,7 @@
<w><t>re-mon-e-tize</t></w>
<w><t>re-mon-e-tized</t></w>
<w><t>re-mon-e-tiz-ing</t></w>
-<w><t>re-mon-strance</t></w>
+<w><t>re-mon-strance</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Re-mon-strance</t></w>
<w><t>Re-mon-strant</t></w>
<w><t>re-mon-strant</t></w>
@@ -136281,7 +136285,7 @@
<w><t>re-pe-nal-iz-ing</t></w>
<w><t>re-penned</t></w>
<w><t>re-pen-ning</t></w>
-<w><t>re-pent</t></w>
+<w><t>re-pent</t><verb><regular-root/></verb></w>
<w><t>re-pent-ance</t></w>
<w><t>re-pent-ant</t></w>
<w><t>re-pent-ant-ly</t></w>
@@ -136406,7 +136410,7 @@
<w><t>rep-or-to-ri-al-ly</t></w>
<phrase><t>re-port stage</t></phrase>
<w><t>re-pos-al</t></w>
-<w><t>re-pose</t></w>
+<w><t>re-pose</t><verb><regular-root/></verb></w>
<w><t>re=pose</t></w>
<w><t>re-posed</t></w>
<w><t>re-pos-ed-ly</t></w>
@@ -136498,7 +136502,7 @@
<w><t>re-prim-ing</t></w>
<w><t>re-print</t></w>
<w><t>re-print-er</t></w>
-<w><t>re-pris-al</t></w>
+<w><t>re-pris-al</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>re-prise</t></w>
<w><t>re-pro</t></w>
<w><t>re-proach</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
@@ -136657,7 +136661,7 @@
<w><t>req-ui-site</t></w>
<w><t>req-ui-site-ly</t></w>
<w><t>req-ui-site-ness</t></w>
-<w><t>req-ui-si-tion</t></w>
+<w><t>req-ui-si-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>req-ui-si-tion-ar-y</t></w>
<w><t>req-ui-si-tion-er</t></w>
<w><t>req-ui-si-tion-ist</t></w>
@@ -138744,7 +138748,7 @@
<w><t>ritz-y</t></w>
<w><t>riv</t></w>
<w><t>riv-age</t></w>
-<w><t>ri-val</t></w>
+<w><t>ri-val</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>ri-valed</t></w>
<w><t>ri-val-ing</t></w>
<w><t>ri-valled</t></w>
@@ -138768,7 +138772,7 @@
<w><t>Riv-er-side</t></w>
<w><t>Riv-er-ton</t></w>
<w><t>Riv-er-view</t></w>
-<w><t>riv-et</t></w>
+<w><t>riv-et</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>riv-et-ed</t></w>
<w><t>riv-et-er</t></w>
<w><t>riv-et-ing</t></w>
@@ -140794,7 +140798,7 @@
<w><t>sal-low-y</t></w>
<w><t>Sal-lust</t></w>
<w><t>Sal-ly</t></w>
-<w><t>sal-ly</t></w>
+<w><t>sal-ly</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>Sal-ly Ar-my</t></phrase>
<w><t>Sal-lye</t></w>
<w><t>sal-ly-ing</t></w>
@@ -150103,7 +150107,7 @@
<w><t>solv-a-tion</t></w>
<w><t>Sol-vay</t></w>
<phrase><t>Sol-vay pro-cess</t></phrase>
-<w><t>solve</t></w>
+<w><t>solve</t><verb><regular-root/></verb></w>
<w><t>sol-ven-cy</t></w>
<w><t>sol-vent</t></w>
<w><t>solv-er</t></w>
@@ -152396,7 +152400,7 @@
<phrase><t>staff ser-geant</t></phrase>
<w><t>stag</t></w>
<phrase><t>stag bee-tle</t></phrase>
-<w><t>stage</t></w>
+<w><t>stage</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>stage-a-ble</t></w>
<w><t>stage-a-bly</t></w>
<w><t>stage-coach</t></w>
@@ -152778,10 +152782,11 @@
<phrase><t>States Gen-er-al</t></phrase>
<w><t>States=Gen-er-al</t></w>
<w><t>state-side</t></w>
-<w><t>states-man</t></w>
+<w><t>states-man</t><noun><singular/><convertible-to-possessive/></noun></w>
<w><t>states-man-like</t></w>
<w><t>states-man-ly</t></w>
<w><t>states-man-ship</t></w>
+<w><t>states-men</t><noun><plural/></noun></w>
<phrase><t>state so-cial-ism</t></phrase>
<phrase><t>state troop-er</t></phrase>
<w><t>state-wide</t></w>
@@ -155332,7 +155337,7 @@
<w><t>sub-jec-tiv-i-ty</t></w>
<phrase><t>sub-ject mat-ter</t></phrase>
<w><t>sub-ject=rais-ing</t></w>
-<w><t>sub-join</t></w>
+<w><t>sub-join</t><verb><regular-root/></verb></w>
<w><t>sub-join-der</t></w>
<w><t>sub-joint</t></w>
<w><t>sub-judge</t></w>
@@ -157355,7 +157360,7 @@
<w><t>su-per-in-sist-ence</t></w>
<w><t>su-per-in-sist-ent</t></w>
<w><t>su-per-in-tel-lec-tu-al</t></w>
-<w><t>su-per-in-tend</t></w>
+<w><t>su-per-in-tend</t><verb><regular-root/></verb></w>
<w><t>su-per-in-tend-ence</t></w>
<w><t>su-per-in-tend-en-cy</t></w>
<w><t>su-per-in-tend-ent</t></w>
@@ -158116,7 +158121,7 @@
<w><t>surv</t></w>
<w><t>sur-veil-lance</t></w>
<w><t>sur-veil-lant</t></w>
-<w><t>sur-vey</t></w>
+<w><t>sur-vey</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>sur-vey-a-ble</t></w>
<w><t>sur-vey-ing</t></w>
<w><t>sur-vey-or</t></w>
@@ -162022,7 +162027,7 @@
<w><t>Thim-bu</t></w>
<w><t>thi-mer-o-sal</t></w>
<w><t>Thim-phu</t></w>
-<w><t>thin</t></w>
+<w><t>thin</t><verb><regular-root/></verb><adjective><extensible/></adjective></w>
<w><t>thine</t></w>
<w><t>thing</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>thing-a-ma-bob</t></w>
@@ -167839,6 +167844,7 @@
<w><t>un-a-void-a-bil-i-ty</t></w>
<w><t>un-a-void-a-ble</t></w>
<w><t>un-a-void-a-ble-ness</t></w>
+<w><t>un-a-void-a-bly</t><adverb/></w>
<w><t>un-a-void-ing</t></w>
<w><t>un-a-vouched</t></w>
<w><t>un-a-vow-a-ble</t></w>
@@ -172003,7 +172009,7 @@
<w><t>un-foil-a-ble</t></w>
<w><t>un-foiled</t></w>
<w><t>un-foist-ed</t></w>
-<w><t>un-fold</t></w>
+<w><t>un-fold</t><verb><regular-root/></verb></w>
<w><t>un-fold-a-ble</t></w>
<w><t>un-fold-er</t></w>
<w><t>un-fold-ment</t></w>
@@ -174579,7 +174585,7 @@
<w><t>un-neigh-bour-li-ness</t></w>
<w><t>un-neigh-bour-ly</t></w>
<w><t>un-ne-phrit-ic</t></w>
-<w><t>un-nerve</t></w>
+<w><t>un-nerve</t><verb><regular-root/></verb></w>
<w><t>un-ner-vous</t></w>
<w><t>un-nes-tled</t></w>
<w><t>un-net-ted</t></w>
@@ -179653,7 +179659,7 @@
<w><t>Vaal</t></w>
<w><t>Vaa-sa</t></w>
<w><t>vac</t></w>
-<w><t>va-can-cy</t></w>
+<w><t>va-can-cy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>va-cant</t></w>
<w><t>va-cant-ly</t></w>
<w><t>va-cant-ness</t></w>
@@ -181575,7 +181581,7 @@
<w><t>Vis-i-goth-ic</t></w>
<w><t>Vi-sine</t></w>
<w><t>vis-ing</t></w>
-<w><t>vi-sion</t></w>
+<w><t>vi-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>vi-sion-al</t></w>
<w><t>vi-sion-al-ly</t></w>
<w><t>vi-sion-ar-ies</t></w>
@@ -183377,7 +183383,7 @@
<w><t>Wei-gel</t></w>
<w><t>wei-ge-la</t></w>
<w><t>wei-ge-li-a</t></w>
-<w><t>weigh</t></w>
+<w><t>weigh</t><verb><regular-root/></verb></w>
<w><t>weigh-a-ble</t></w>
<w><t>weigh-bridge</t></w>
<w><t>weigh-er</t></w>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-25 23:13:27
|
Revision: 13178
http://sourceforge.net/p/foray/code/13178
Author: victormote
Date: 2023-08-25 23:13:24 +0000 (Fri, 25 Aug 2023)
Log Message:
-----------
Normal editing of dictionaries and orthographies.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml 2023-08-25 14:35:45 UTC (rev 13177)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml 2023-08-25 23:13:24 UTC (rev 13178)
@@ -20,8 +20,8 @@
<w><t>be-hove</t><verb><regular-root/></verb></w>
<w><t>co=la-bour-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ful-fil</t><verb/></w>
-<w><t>ful-fils</t><verb><vu><singular/></vu></verb></w>
<w><t>ful-fil-ment</t><noun/></w>
+<w><t>ful-fils</t><verb><vf><singular/></vf></verb></w>
<w><t>la-bour</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>la-boured</t><adjective/></w>
<w><t>la-bour-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-08-25 14:35:45 UTC (rev 13177)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2023-08-25 23:13:24 UTC (rev 13178)
@@ -285,7 +285,7 @@
<w><t>Ab-i-gail</t></w>
<w><t>A-bi-hu</t></w>
<w><t>Ab-i-lene</t></w>
-<w><t>a-bil-i-ty</t></w>
+<w><t>a-bil-i-ty</t><noun><pluralizable/></noun></w>
<w><t>Ab-i-lyne</t></w>
<w><t>A-bim-e-lech</t></w>
<w><t>A-bi-ne-ri</t></w>
@@ -665,7 +665,7 @@
<w><t>A-bur-y</t></w>
<w><t>a-bus-a-ble</t></w>
<w><t>a-bus-age</t></w>
-<w><t>a-buse</t></w>
+<w><t>a-buse</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>a-bused</t></w>
<w><t>a-bus-er</t></w>
<phrase><t>Ab-u Sim-bel</t></phrase>
@@ -1375,7 +1375,7 @@
<w><t>ac-quaint-ed</t></w>
<w><t>ac-quaint-ed-ness</t></w>
<w><t>ac-quest</t></w>
-<w><t>ac-qui-esce</t></w>
+<w><t>ac-qui-esce</t><verb><regular-root/></verb></w>
<w><t>ac-qui-es-cence</t></w>
<w><t>ac-qui-es-cent</t></w>
<w><t>ac-qui-es-cent-ly</t></w>
@@ -1839,7 +1839,7 @@
<w><t>ad-here</t><verb><regular-root/></verb></w>
<w><t>ad-her-ence</t></w>
<w><t>ad-her-end</t></w>
-<w><t>ad-her-ent</t></w>
+<w><t>ad-her-ent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ad-her-ent-ly</t></w>
<w><t>ad-her-er</t></w>
<w><t>ad-her-ing</t></w>
@@ -2906,7 +2906,7 @@
<w><t>a-ge-net-ic</t></w>
<w><t>A-ge-nois</t></w>
<w><t>A-ge-nor</t></w>
-<w><t>a-gent</t></w>
+<w><t>a-gent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>a-gent=gen-er-al</t></w>
<w><t>a-gen-tial</t></w>
<w><t>a-gen-ti-val</t></w>
@@ -4260,7 +4260,7 @@
<w><t>al-li-a-ble</t></w>
<w><t>al-li-a-ceous</t></w>
<w><t>Al-li-ance</t></w>
-<w><t>al-li-ance</t></w>
+<w><t>al-li-ance</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>al-lied</t></w>
<w><t>Al-lied</t></w>
<w><t>Al-lier</t></w>
@@ -5010,12 +5010,12 @@
<w><t>a-me-na-ble-ness</t></w>
<w><t>a-me-na-bly</t></w>
<phrase><t>a-men cor-ner</t></phrase>
-<w><t>a-mend</t></w>
+<w><t>a-mend</t><verb><regular-root/></verb></w>
<w><t>a-mend-a-ble</t></w>
<w><t>a-mend-a-tory</t></w>
<w><t>a-mend-a-to-ry</t></w>
<w><t>a-mend-er</t></w>
-<w><t>a-mend-ment</t></w>
+<w><t>a-mend-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>a-mends</t></w>
<phrase><t>A-men-ho-tep III</t></phrase>
<phrase><t>Am-en-ho-tep IV</t></phrase>
@@ -6426,7 +6426,7 @@
<w><t>an-i-mism</t></w>
<w><t>an-i-mist</t></w>
<w><t>an-i-mis-tic</t></w>
-<w><t>an-i-mos-i-ty</t></w>
+<w><t>an-i-mos-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>an-i-mus</t></w>
<w><t>an-i-on</t></w>
<w><t>an-i-on-ic</t></w>
@@ -7059,7 +7059,7 @@
<w><t>an-tic-i-pate</t></w>
<w><t>an-tic-i-pat-ed</t></w>
<w><t>an-tic-i-pat-ing</t></w>
-<w><t>an-tic-i-pa-tion</t></w>
+<w><t>an-tic-i-pa-tion</t><noun><pluralizable/></noun></w>
<w><t>an-tic-i-pa-tive</t></w>
<w><t>an-tic-i-pa-tive-ly</t></w>
<w><t>an-tic-i-pa-tor</t></w>
@@ -8548,7 +8548,7 @@
<w><t>ap-point-ment</t></w>
<w><t>ap-poin-tor</t></w>
<w><t>Ap-po-mat-tox</t></w>
-<w><t>ap-por-tion</t></w>
+<w><t>ap-por-tion</t><verb><regular-root/></verb></w>
<w><t>ap-por-tion-a-ble</t></w>
<w><t>ap-por-tion-er</t></w>
<w><t>ap-por-tion-ment</t></w>
@@ -8635,7 +8635,7 @@
<w><t>ap-pro-pri-ate-ly</t></w>
<w><t>ap-pro-pri-ate-ness</t></w>
<w><t>ap-pro-pri-at-ing</t></w>
-<w><t>ap-pro-pri-a-tion</t></w>
+<w><t>ap-pro-pri-a-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ap-pro-pri-a-tive</t></w>
<w><t>ap-pro-pri-a-tive-ness</t></w>
<w><t>ap-pro-pri-a-tor</t></w>
@@ -11853,7 +11853,7 @@
<w><t>a-vouch</t></w>
<w><t>a-vouch-er</t></w>
<w><t>a-vouch-ment</t></w>
-<w><t>a-vow</t></w>
+<w><t>a-vow</t><verb><regular-root/></verb></w>
<w><t>a-vow-a-ble</t></w>
<w><t>a-vow-al</t></w>
<w><t>a-vowed</t></w>
@@ -16148,7 +16148,7 @@
<w><t>bil-i-ver-din</t></w>
<w><t>bilk</t></w>
<w><t>bilk-er</t></w>
-<w><t>bill</t></w>
+<w><t>bill</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>Bill</t></w>
<w><t>bil-la-ble</t></w>
<w><t>bil-la-bong</t></w>
@@ -26135,7 +26135,7 @@
<w><t>che-cha-ko</t></w>
<w><t>Che-chen</t></w>
<w><t>ché-chia</t></w>
-<w><t>check</t></w>
+<w><t>check</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>check-a-ble</t></w>
<w><t>check-back</t></w>
<w><t>check-book</t></w>
@@ -28450,7 +28450,7 @@
<w><t>clart</t></w>
<w><t>clarts</t></w>
<w><t>clar-y</t></w>
-<w><t>clash</t></w>
+<w><t>clash</t><verb><regular-root/></verb></w>
<w><t>clash-er</t></w>
<w><t>clash-ing-ly</t></w>
<w><t>clasp</t></w>
@@ -28461,6 +28461,7 @@
<w><t>class-a-ble</t></w>
<w><t>class-book</t></w>
<w><t>class=con-scious</t></w>
+<w><t>classed</t><verb/></w>
<w><t>class-er</t></w>
<w><t>clas-ses</t></w>
<w><t>clas-sic</t></w>
@@ -29871,7 +29872,7 @@
<w><t>Col-chis</t></w>
<w><t>col-co-thar</t></w>
<w><t>Cold</t></w>
-<w><t>cold</t></w>
+<w><t>cold</t><adjective><extensible/></adjective></w>
<w><t>cold=blood-ed</t></w>
<w><t>cold=blood-ed-ly</t></w>
<w><t>cold=blood-ed-ness</t></w>
@@ -30677,7 +30678,7 @@
<w><t>com-mo-di-ous</t></w>
<w><t>com-mo-di-ous-ly</t></w>
<w><t>com-mo-di-ous-ness</t></w>
-<w><t>com-mod-i-ty</t></w>
+<w><t>com-mod-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>com-mo-dore</t></w>
<w><t>Com-mo-dus</t></w>
<w><t>com-mon</t></w>
@@ -31298,7 +31299,7 @@
<w><t>con-cen-tra-tive</t></w>
<w><t>con-cen-tra-tive-ness</t></w>
<w><t>con-cen-tra-tor</t></w>
-<w><t>con-cen-tre</t></w>
+<w><t>con-cen-tre</t><verb><regular-root/></verb><comment>Not found in NOAD.</comment></w>
<w><t>con-cen-tric</t></w>
<w><t>con-cen-tri-cal</t></w>
<w><t>con-cen-tri-cal-ly</t></w>
@@ -31634,7 +31635,7 @@
<w><t>con-fec-tion-er-ies</t></w>
<phrase><t>con-fec-tion-ers’ sug-ar</t></phrase>
<w><t>con-fec-tion-er-y</t></w>
-<w><t>con-fed-er-a-cy</t></w>
+<w><t>con-fed-er-a-cy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Con-fed-er-a-cy</t></w>
<w><t>con-fed-er-al-ist</t></w>
<w><t>Con-fed-er-ate</t></w>
@@ -31742,6 +31743,7 @@
<w><t>con-flate</t></w>
<w><t>con-fla-tion</t></w>
<w><t>con-flict</t></w>
+<w><t>con-flict-ing</t><adjective></adjective></w>
<w><t>con-flict-ing-ly</t></w>
<w><t>con-flic-tion</t></w>
<w><t>con-flic-tive</t></w>
@@ -32183,7 +32185,7 @@
<w><t>con-sid-er-ate</t></w>
<w><t>con-sid-er-ate-ly</t></w>
<w><t>con-sid-er-ate-ness</t></w>
-<w><t>con-sid-er-a-tion</t></w>
+<w><t>con-sid-er-a-tion</t><noun><pluralizable/></noun></w>
<w><t>con-sid-ered</t></w>
<w><t>con-sid-er-er</t></w>
<w><t>con-sid-er-ing</t></w>
@@ -32318,7 +32320,7 @@
<w><t>con-sti-tut-ed</t></w>
<w><t>con-sti-tut-er</t></w>
<w><t>con-sti-tut-ing</t></w>
-<w><t>con-sti-tu-tion</t></w>
+<w><t>con-sti-tu-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-sti-tu-tion-al</t></w>
<w><t>con-sti-tu-tion-al-ism</t></w>
<w><t>con-sti-tu-tion-al-ist</t></w>
@@ -32510,7 +32512,7 @@
<w><t>con-tent-ed</t></w>
<w><t>con-tent-ed-ly</t></w>
<w><t>con-tent-ed-ness</t></w>
-<w><t>con-ten-tion</t></w>
+<w><t>con-ten-tion</t><noun><pluralizable/></noun></w>
<w><t>con-ten-tion-al</t></w>
<w><t>con-ten-tious</t></w>
<w><t>con-ten-tious-ly</t></w>
@@ -32633,7 +32635,7 @@
<w><t>con-tra-cep-tion</t></w>
<w><t>con-tra-cep-tive</t></w>
<w><t>con-tra-clock-wise</t></w>
-<w><t>con-tract</t></w>
+<w><t>con-tract</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>con-tract bridge</t></phrase>
<w><t>con-tract-ed</t></w>
<w><t>con-tract-ed-ly</t></w>
@@ -32749,7 +32751,7 @@
<w><t>con-trite-ness</t></w>
<w><t>con-tri-tion</t></w>
<w><t>con-triv-a-ble</t></w>
-<w><t>con-triv-ance</t></w>
+<w><t>con-triv-ance</t><noun><pluralizable/></noun></w>
<w><t>con-trive</t></w>
<w><t>con-trived</t></w>
<w><t>con-triv-er</t></w>
@@ -34401,7 +34403,7 @@
<w><t>coun-try-wom-an</t><noun><singular/></noun></w>
<w><t>coun-try-wom-en</t><noun><plural/></noun></w>
<w><t>count-ship</t></w>
-<w><t>coun-ty</t></w>
+<w><t>coun-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>coun-ty bor-ough</t></phrase>
<phrase><t>coun-ty court</t></phrase>
<phrase><t>coun-ty pal-a-tine</t></phrase>
@@ -35068,7 +35070,7 @@
<phrase><t>cred-it card</t></phrase>
<w><t>cred-it-less</t></w>
<phrase><t>cred-it line</t></phrase>
-<w><t>cred-i-tor</t></w>
+<w><t>cred-i-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cred-i-tor-ship</t></w>
<phrase><t>cred-it rat-ing</t></phrase>
<w><t>cred-its</t></w>
@@ -35511,7 +35513,7 @@
<w><t>crom-lech</t></w>
<w><t>cro-morne</t></w>
<w><t>Cromp-ton</t></w>
-<w><t>Crom-well</t></w>
+<w><t>Crom-well</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Crom-well Cur-rent</t></phrase>
<w><t>Crom-wel-li-an</t></w>
<w><t>crone</t></w>
@@ -38133,7 +38135,7 @@
<w><t>de-bas-ing</t></w>
<w><t>de-bas-ing-ly</t></w>
<w><t>de-bat-a-ble</t></w>
-<w><t>de-bate</t></w>
+<w><t>de-bate</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>de-bate-a-ble</t></w>
<w><t>de-bat-ed</t></w>
<w><t>de-bat-er</t></w>
@@ -38192,7 +38194,7 @@
<w><t>de-bruised</t></w>
<w><t>de-bruis-ing</t></w>
<w><t>Debs</t></w>
-<w><t>debt</t></w>
+<w><t>debt</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>debt-less</t></w>
<phrase><t>debt of hon-or</t></phrase>
<w><t>debt-or</t></w>
@@ -38470,7 +38472,7 @@
<w><t>de-ci-pher-a-ble</t></w>
<w><t>de-ci-pher-er</t></w>
<w><t>de-ci-pher-ment</t></w>
-<w><t>de-ci-sion</t></w>
+<w><t>de-ci-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-ci-sion-al</t></w>
<w><t>de-ci-sive</t></w>
<w><t>de-ci-sive-ly</t></w>
@@ -38500,7 +38502,7 @@
<w><t>de-clam-a-to-ry</t></w>
<w><t>de-clar-a-ble</t></w>
<w><t>de-clar-ant</t></w>
-<w><t>dec-la-ra-tion</t></w>
+<w><t>dec-la-ra-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Dec-la-ra-tion of In-de-pend-ence</t></phrase>
<w><t>de-clar-a-tive</t></w>
<w><t>de-clar-a-tive-ly</t></w>
@@ -39296,7 +39298,7 @@
<w><t>de-lib-er-ate-ly</t></w>
<w><t>de-lib-er-ate-ness</t></w>
<w><t>de-lib-er-at-ing</t></w>
-<w><t>de-lib-er-a-tion</t></w>
+<w><t>de-lib-er-a-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-lib-er-a-tive</t></w>
<w><t>de-lib-er-a-tive-ly</t></w>
<w><t>de-lib-er-a-tive-ness</t></w>
@@ -40210,7 +40212,7 @@
<w><t>dep-re-da-tion-ist</t></w>
<w><t>dep-re-da-tor</t></w>
<w><t>dep-re-da-to-ry</t></w>
-<w><t>de-press</t></w>
+<w><t>de-press</t><verb><regular-root/></verb></w>
<w><t>de-pres-sant</t></w>
<w><t>de-pressed</t></w>
<w><t>de-press-i-bil-i-ty</t></w>
@@ -40230,7 +40232,7 @@
<w><t>de-priv-al</t></w>
<w><t>dep-ri-va-tion</t><noun><plural/><convertible-to-possessive/></noun></w>
<w><t>de-priv-a-tive</t></w>
-<w><t>de-prive</t></w>
+<w><t>de-prive</t><verb><regular-root/></verb></w>
<w><t>de-prived</t></w>
<w><t>de-priv-er</t></w>
<w><t>de-priv-ing</t></w>
@@ -40258,7 +40260,7 @@
<w><t>dep-u-tize</t></w>
<w><t>dep-u-tized</t></w>
<w><t>dep-u-tiz-ing</t></w>
-<w><t>dep-u-ty</t></w>
+<w><t>dep-u-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dep-u-ty-ship</t></w>
<phrase><t>De Quin-cey</t></phrase>
<w><t>der</t></w>
@@ -43343,7 +43345,7 @@
<w><t>dis-re-gard-ful</t></w>
<w><t>dis-re-gard-ful-ly</t></w>
<w><t>dis-re-gard-ful-ness</t></w>
-<w><t>dis-rel-ish</t></w>
+<w><t>dis-rel-ish</t><verb><regular-root/></verb></w>
<w><t>dis-re-mem-ber</t></w>
<w><t>dis-re-pair</t></w>
<w><t>dis-rep-u-ta-bil-i-ty</t></w>
@@ -43409,7 +43411,7 @@
<w><t>dis-sem-i-na-tive</t></w>
<w><t>dis-sem-i-na-tor</t></w>
<w><t>dis-sem-i-nule</t></w>
-<w><t>dis-sen-sion</t></w>
+<w><t>dis-sen-sion</t><noun><pluralizable/></noun></w>
<w><t>dis-sent</t></w>
<w><t>dis-sent-er</t></w>
<w><t>Dis-sent-er</t></w>
@@ -43583,7 +43585,7 @@
<w><t>dis-till-ment</t></w>
<w><t>dis-til-ment</t></w>
<w><t>dis-tinct</t></w>
-<w><t>dis-tinc-tion</t></w>
+<w><t>dis-tinc-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dis-tinc-tion-less</t></w>
<w><t>dis-tinc-tive</t></w>
<w><t>dis-tinc-tive-ly</t></w>
@@ -43622,7 +43624,7 @@
<w><t>dis-tract-i-bil-i-ty</t></w>
<w><t>dis-tract-i-ble</t></w>
<w><t>dis-tract-ing-ly</t></w>
-<w><t>dis-trac-tion</t></w>
+<w><t>dis-trac-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dis-trac-tive</t></w>
<w><t>dis-trac-tive-ly</t></w>
<w><t>dis-trail</t></w>
@@ -43668,7 +43670,7 @@
<phrase><t>Dis-trict of Co-lum-bi-a</t></phrase>
<w><t>dis-trin-gas</t></w>
<phrase><t>Dis-tri-to Fe-de-ral</t></phrase>
-<w><t>dis-trust</t></w>
+<w><t>dis-trust</t><noun/><verb><regular-root/></verb></w>
<w><t>dis-trust-er</t></w>
<w><t>dis-trust-ful</t></w>
<w><t>dis-trust-ful-ly</t></w>
@@ -43878,7 +43880,7 @@
<w><t>di-vi-si-ble</t></w>
<w><t>di-vis-i-ble-ness</t></w>
<w><t>di-vis-i-bly</t></w>
-<w><t>di-vi-sion</t></w>
+<w><t>di-vi-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>di-vi-sion-al</t></w>
<w><t>di-vi-sion-al-ly</t></w>
<w><t>di-vi-sion-ar-y</t></w>
@@ -47647,7 +47649,7 @@
<w><t>e-lec-tive</t></w>
<w><t>e-lec-tive-ly</t></w>
<w><t>e-lec-tive-ness</t></w>
-<w><t>e-lec-tor</t></w>
+<w><t>e-lec-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>e-lec-tor-al</t></w>
<phrase><t>e-lec-to-ral col-lege</t></phrase>
<w><t>e-lec-tor-al-ly</t></w>
@@ -48135,7 +48137,7 @@
<w><t>El-li-son</t></w>
<w><t>El-lis-ville</t></w>
<w><t>El-lo-ra</t></w>
-<w><t>Ells-worth</t></w>
+<w><t>Ells-worth</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>elm</t></w>
<w><t>El-man</t></w>
<phrase><t>El Man-su-ra</t></phrase>
@@ -48953,7 +48955,7 @@
<w><t>en-cri-nite</t></w>
<w><t>en-croach</t></w>
<w><t>en-croach-er</t></w>
-<w><t>en-croach-ment</t></w>
+<w><t>en-croach-ment</t><noun><pluralizable/></noun></w>
<w><t>en-crust</t></w>
<w><t>en-crust-ant</t></w>
<w><t>en-crus-ta-tion</t></w>
@@ -49627,7 +49629,7 @@
<w><t>en-tail-er</t></w>
<w><t>en-tail-ment</t></w>
<w><t>en-ta-moe-ba</t></w>
-<w><t>en-tan-gle</t></w>
+<w><t>en-tan-gle</t><verb><regular-root/></verb></w>
<w><t>en-tan-gle-a-ble</t></w>
<w><t>en-tan-gled-ly</t></w>
<w><t>en-tan-gled-ness</t></w>
@@ -51079,7 +51081,7 @@
<w><t>Es-qui-line</t></w>
<w><t>Es-qui-mau</t></w>
<w><t>Es-qui-mau-an</t></w>
-<w><t>es-quire</t></w>
+<w><t>es-quire</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>es-quired</t></w>
<w><t>es-quir-ing</t></w>
<w><t>es-quisse=es-quisse</t></w>
@@ -52189,7 +52191,7 @@
<w><t>ex-celled</t><verb><regular-root value="false"/></verb></w>
<w><t>Ex-cel-lence</t></w>
<w><t>ex-cel-lence</t></w>
-<w><t>ex-cel-len-cy</t></w>
+<w><t>ex-cel-len-cy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Ex-cel-len-cy</t></w>
<w><t>ex-cel-lent</t></w>
<w><t>ex-cel-lent-ly</t></w>
@@ -52549,7 +52551,7 @@
<w><t>ex-i-geant</t></w>
<w><t>ex-i-geante</t></w>
<w><t>ex-i-gence</t></w>
-<w><t>ex-i-gen-cy</t></w>
+<w><t>ex-i-gen-cy</t><noun><pluralizable/></noun></w>
<w><t>ex-i-gent</t></w>
<w><t>ex-i-gent-ly</t></w>
<w><t>ex-i-gi-ble</t></w>
@@ -52880,7 +52882,7 @@
<w><t>ex-plod-er</t></w>
<w><t>ex-plod-ing</t></w>
<phrase><t>ex-plod-ing star</t></phrase>
-<w><t>ex-ploit</t></w>
+<w><t>ex-ploit</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>ex-ploit-a-ble</t></w>
<w><t>ex-ploi-ta-tion</t></w>
<w><t>ex-ploit-a-tive</t></w>
@@ -53520,7 +53522,7 @@
<w><t>fact-ful</t></w>
<w><t>fac-tice</t></w>
<w><t>Fac-tice</t></w>
-<w><t>fac-tion</t></w>
+<w><t>fac-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fac-tion-al</t></w>
<w><t>fac-tion-al-ism</t></w>
<w><t>fac-tion-al-ist</t></w>
@@ -54383,7 +54385,7 @@
<w><t>fa-vored-ness</t></w>
<w><t>fa-vor-er</t></w>
<w><t>fa-vor-ing-ly</t></w>
-<w><t>fa-vor-ite</t></w>
+<w><t>fa-vor-ite</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective></adjective></w>
<phrase><t>fa-vor-ite son</t></phrase>
<w><t>fa-vor-it-ism</t></w>
<w><t>fa-vor-less</t></w>
@@ -55367,7 +55369,7 @@
<w><t>fif-ty=fif-ty</t></w>
<w><t>fif-ty-pen-ny</t></w>
<w><t>fifty-ty=fif-ty</t></w>
-<w><t>fig</t></w>
+<w><t>fig</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fig-eat-er</t></w>
<w><t>fig-gi-er</t></w>
<w><t>fig-gi-est</t></w>
@@ -56141,7 +56143,7 @@
<w><t>flam-boy-an-cy</t></w>
<w><t>flam-boy-ant</t></w>
<w><t>flam-boy-ant-ly</t></w>
-<w><t>flame</t></w>
+<w><t>flame</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>flame=col-ored</t></w>
<w><t>flame-fish</t></w>
<w><t>flame-fish-es</t></w>
@@ -60448,7 +60450,7 @@
<w><t>Gar-rett</t></w>
<w><t>Gar-rick</t></w>
<w><t>gar-ring</t></w>
-<w><t>gar-ri-son</t></w>
+<w><t>gar-ri-son</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>Gar-ri-son</t></w>
<w><t>gar-rot</t></w>
<w><t>gar-rote</t></w>
@@ -61523,7 +61525,7 @@
<w><t>Ge-rou-si-a</t></w>
<w><t>Ger-rard</t></w>
<w><t>Ger-ri</t></w>
-<w><t>Ger-ry</t></w>
+<w><t>Ger-ry</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ger-ry-man-der</t></w>
<w><t>ger-ry-man-der-er</t></w>
<w><t>Gers</t></w>
@@ -64727,7 +64729,7 @@
<w><t>grouch-i-ness</t></w>
<w><t>grouch-y</t></w>
<w><t>Grou-chy</t></w>
-<w><t>ground</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>ground</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>ground-a-ble</t></w>
<w><t>ground-a-bly</t></w>
<w><t>ground-age</t></w>
@@ -64985,7 +64987,7 @@
<w><t>guard-house</t></w>
<w><t>guard-hous-es</t></w>
<w><t>Guar-di</t></w>
-<w><t>guard-i-an</t></w>
+<w><t>guard-i-an</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>guard-i-an-less</t></w>
<w><t>guard-i-an-ship</t></w>
<w><t>guard-less</t></w>
@@ -67876,7 +67878,7 @@
<w><t>heart-wood</t></w>
<w><t>heart-worm</t></w>
<w><t>heart-y</t></w>
-<w><t>heat</t></w>
+<w><t>heat</t><verb><regular-root/></verb></w>
<w><t>heat-a-ble</t></w>
<phrase><t>heat bar-ri-er</t></phrase>
<phrase><t>heat ca-pac-i-ty</t></phrase>
@@ -68436,7 +68438,7 @@
<w><t>Hel-vé-tius</t></w>
<w><t>helv-ing</t></w>
<w><t>He-lyne</t></w>
-<w><t>hem</t></w>
+<w><t>hem</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>he-ma-chrome</t></w>
<w><t>he-ma-cy-tom-e-ter</t></w>
<w><t>he-mag-glu-tin-ate</t></w>
@@ -74565,7 +74567,7 @@
<w><t>im-peach-a-bil-i-ty</t></w>
<w><t>im-peach-a-ble</t></w>
<w><t>im-peach-er</t></w>
-<w><t>im-peach-ment</t></w>
+<w><t>im-peach-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>im-pearl</t></w>
<w><t>im-pec-ca-bil-i-ty</t></w>
<w><t>im-pec-ca-ble</t></w>
@@ -74635,7 +74637,7 @@
<phrase><t>im-per-fect com-pe-ti-tion</t></phrase>
<w><t>im-per-fect-i-bil-i-ty</t></w>
<w><t>im-per-fect-i-ble</t></w>
-<w><t>im-per-fec-tion</t></w>
+<w><t>im-per-fec-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>im-per-fec-tive</t></w>
<w><t>im-per-fect-ly</t></w>
<w><t>im-per-fect-ness</t></w>
@@ -74831,7 +74833,7 @@
<w><t>im-por-tance</t></w>
<w><t>im-por-tant</t></w>
<w><t>im-por-tant-ly</t></w>
-<w><t>im-por-ta-tion</t></w>
+<w><t>im-por-ta-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>im-por-tee</t></w>
<w><t>im-port-er</t></w>
<w><t>im-por-tu-na-cy</t></w>
@@ -74857,7 +74859,7 @@
<w><t>im-pos-si-ble</t></w>
<w><t>im-pos-si-ble-ness</t></w>
<w><t>im-pos-si-bly</t></w>
-<w><t>im-post</t></w>
+<w><t>im-post</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>im-post-er</t></w>
<w><t>im-pos-thume</t></w>
<w><t>im-pos-tor</t></w>
@@ -76759,7 +76761,7 @@
<w><t>in-grat-i-tude</t></w>
<w><t>in-gra-ves-cence</t></w>
<w><t>in-gra-ves-cent</t></w>
-<w><t>in-gre-di-ent</t></w>
+<w><t>in-gre-di-ent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>In-gres</t></w>
<w><t>in-gress</t></w>
<w><t>in-gres-sion</t></w>
@@ -77025,7 +77027,7 @@
<w><t>in-no-vate</t></w>
<w><t>in-no-vat-ed</t></w>
<w><t>in-no-vat-ing</t></w>
-<w><t>in-no-va-tion</t></w>
+<w><t>in-no-va-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-no-va-tion-al</t></w>
<w><t>in-no-va-tive</t></w>
<w><t>in-no-va-tor</t></w>
@@ -79154,7 +79156,7 @@
<w><t>in-tru-sive</t></w>
<w><t>in-tru-sive-ly</t></w>
<w><t>in-tru-sive-ness</t></w>
-<w><t>in-trust</t></w>
+<w><t>in-trust</t><verb><regular-root/></verb></w>
<w><t>in-tu-bate</t></w>
<w><t>in-tu-bat-ed</t></w>
<w><t>in-tu-ba-tion</t></w>
@@ -79403,7 +79405,7 @@
<w><t>in-vo-lute-ly</t></w>
<w><t>in-vo-lu-tion</t></w>
<w><t>in-vo-lu-tion-al</t></w>
-<w><t>in-volve</t></w>
+<w><t>in-volve</t><verb><regular-root/></verb></w>
<w><t>in-volved</t></w>
<w><t>in-volv-ed-ly</t></w>
<w><t>in-volv-ed-ness</t></w>
@@ -81046,7 +81048,7 @@
<w><t>jaws</t></w>
<w><t>Jax-ar-tes</t></w>
<w><t>jay</t></w>
-<w><t>Jay</t></w>
+<w><t>Jay</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Jay-a-war-den-a</t></w>
<w><t>jay-bird</t></w>
<w><t>Jay-cee</t></w>
@@ -81937,7 +81939,7 @@
<w><t>judg-mat-ic</t></w>
<w><t>judg-mat-i-cal</t></w>
<w><t>judg-mat-i-cal-ly</t></w>
-<w><t>judg-ment</t></w>
+<w><t>judg-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Judg-ment</t></w>
<w><t>judg-men-tal</t></w>
<phrase><t>Judg-ment Day</t></phrase>
@@ -86490,7 +86492,7 @@
<w><t>Le-doux</t></w>
<phrase><t>Led Zep-pe-lin</t></phrase>
<w><t>lee</t></w>
-<w><t>Lee</t></w>
+<w><t>Lee</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>lee-board</t></w>
<w><t>leech</t></w>
<w><t>Leech-burg</t></w>
@@ -86624,7 +86626,7 @@
<phrase><t>leg-is-la-tive as-sem-bly</t></phrase>
<phrase><t>leg-is-la-tive coun-cil</t></phrase>
<w><t>leg-is-la-tive-ly</t></w>
-<w><t>leg-is-la-tor</t></w>
+<w><t>leg-is-la-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>leg-is-la-to-ri-al</t></w>
<w><t>leg-is-la-tor-ship</t></w>
<w><t>leg-is-la-tress</t></w>
@@ -86631,7 +86633,7 @@
<w><t>leg-is-la-tri-ces</t></w>
<w><t>leg-is-la-trix</t></w>
<w><t>leg-is-la-trix-es</t></w>
-<w><t>leg-is-la-ture</t></w>
+<w><t>leg-is-la-ture</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>le-gist</t></w>
<w><t>le-git</t></w>
<w><t>leg-i-tim</t></w>
@@ -87896,7 +87898,7 @@
<w><t>lim-it-a-ble-ness</t></w>
<w><t>lim-i-tar-i-an</t></w>
<w><t>lim-i-tar-y</t></w>
-<w><t>lim-i-ta-tion</t></w>
+<w><t>lim-i-ta-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>lim-i-ta-tive</t></w>
<w><t>lim-it-ed</t></w>
<phrase><t>lim-it-ed com-pa-ny</t></phrase>
@@ -90545,7 +90547,7 @@
<w><t>mach-i-nat-ing</t></w>
<w><t>mach-i-na-tion</t></w>
<w><t>mach-i-na-tor</t></w>
-<w><t>ma-chine</t></w>
+<w><t>ma-chine</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ma-chine-a-ble</t></w>
<phrase><t>ma-chine bolt</t></phrase>
<w><t>ma-chined</t></w>
@@ -90738,7 +90740,7 @@
<w><t>Ma-di-an</t></w>
<w><t>Ma-dill</t></w>
<phrase><t>Ma-di-na do Bo-e</t></phrase>
-<w><t>Mad-i-son</t></w>
+<w><t>Mad-i-son</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Mad-i-son Av-e-nue</t></phrase>
<w><t>mad-ly</t></w>
<w><t>Mad-lyn</t></w>
@@ -91185,7 +91187,7 @@
<w><t>ma-jor-ette</t></w>
<phrase><t>ma-jor gen-er-al</t></phrase>
<w><t>ma-jor=gen-er-al-ship</t></w>
-<w><t>ma-jor-i-ty</t></w>
+<w><t>ma-jor-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>ma-jor-i-ty car-ri-er</t></phrase>
<phrase><t>ma-jor league</t></phrase>
<w><t>ma-jor=lea-guer</t></w>
@@ -92498,7 +92500,7 @@
<w><t>mar-tial-ness</t></w>
<w><t>Mar-tian</t></w>
<w><t>mar-tin</t></w>
-<w><t>Mar-tin</t></w>
+<w><t>Mar-tin</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Mar-ti-n’</t></w>
<w><t>Mar-ti-na</t></w>
<phrase><t>Mar-tin du Gard</t></phrase>
@@ -94414,7 +94416,7 @@
<w><t>mes-en-ter-on</t></w>
<w><t>mes-en-ter-on-ic</t></w>
<w><t>mes-en-ter-y</t></w>
-<w><t>mesh</t></w>
+<w><t>mesh</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>Me-shach</t></w>
<phrase><t>mesh con-nec-tion</t></phrase>
<w><t>Me-shed</t></w>
@@ -95872,7 +95874,7 @@
<phrase><t>min-i-mum wage</t></phrase>
<w><t>min-i-mus</t></w>
<w><t>min-ing</t></w>
-<w><t>min-ion</t></w>
+<w><t>min-ion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>min-i-pill</t></w>
<w><t>min-i-skirt</t></w>
<w><t>min-i-skirt-ed</t></w>
@@ -97652,7 +97654,7 @@
<phrase><t>mo-nop-o-lis-tic com-pe-ti-tion</t></phrase>
<w><t>mo-nop-o-lize</t></w>
<w><t>mo-nop-o-loid</t></w>
-<w><t>mo-nop-o-ly</t></w>
+<w><t>mo-nop-o-ly</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Mo-nop-o-ly</t></w>
<w><t>mon-o-pol-y-logue</t></w>
<w><t>mon-o-pro-pel-lant</t></w>
@@ -98142,7 +98144,7 @@
<w><t>Mor-phy</t></w>
<w><t>Mor-rie</t></w>
<w><t>Mor-ril-ton</t></w>
-<w><t>Mor-ris</t></w>
+<w><t>Mor-ris</t><noun><convertible-to-possessive/></noun></w>
<phrase><t>Mor-ris chair</t></phrase>
<phrase><t>mor-ris dance</t></phrase>
<w><t>Mor-ri-son</t></w>
@@ -100321,7 +100323,7 @@
<w><t>na-wab-ship</t></w>
<w><t>Nax-al-ite</t></w>
<w><t>Nax-os</t></w>
-<w><t>nay</t></w>
+<w><t>nay</t><noun><pluralizable/></noun></w>
<w><t>Na-ya-rit</t></w>
<w><t>Na-zaire</t></w>
<w><t>Naz-a-rene</t></w>
@@ -108152,7 +108154,7 @@
<w><t>oat-en</t></w>
<w><t>oat-er</t></w>
<w><t>Oates</t></w>
-<w><t>oath</t></w>
+<w><t>oath</t><noun><pluralizable/></noun></w>
<w><t>oat-meal</t></w>
<w><t>OAU</t></w>
<w><t>Oa-xa-ca</t></w>
@@ -108881,7 +108883,7 @@
<w><t>of-fer-to-ri-al</t></w>
<w><t>of-fer-to-ry</t></w>
<w><t>off-hand</t></w>
-<w><t>of-fice</t></w>
+<w><t>of-fice</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>of-fice bear-er</t></phrase>
<phrase><t>of-fice block</t></phrase>
<phrase><t>of-fice boy</t></phrase>
@@ -110694,12 +110696,13 @@
<w><t>OTC</t></w>
<w><t>O-tel-ia</t></w>
<w><t>O-tel-lo</t></w>
-<w><t>oth-er</t><pronoun/><adjective/></w>
+<w><t>oth-er</t><pronoun></pronoun><adjective/></w>
<w><t>oth-er=di-rect-ed</t></w>
<w><t>oth-er-gates</t></w>
<w><t>oth-er-guess</t></w>
<w><t>oth-er-ness</t></w>
<w><t>oth-ers</t><pronoun/></w>
+<w><t>oth-er’s</t></w>
<w><t>oth-er-where</t></w>
<w><t>oth-er-while</t></w>
<w><t>oth-er-whiles</t></w>
@@ -113413,7 +113416,7 @@
<w><t>o-ver-seed</t></w>
<w><t>o-ver-see-ing</t></w>
<w><t>o-ver-seen</t></w>
-<w><t>o-ver-se-er</t></w>
+<w><t>o-ver-se-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>o-ver-sell</t></w>
<w><t>o-ver-sell-ing</t></w>
<w><t>o-ver-sen-si-ble</t></w>
@@ -117282,7 +117285,7 @@
<w><t>pe-on-ism</t></w>
<w><t>Pe-o-ny</t></w>
<w><t>pe-o-ny</t></w>
-<w><t>peo-ple</t></w>
+<w><t>peo-ple</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>peo-ple-less</t></w>
<w><t>peo-pler</t></w>
<phrase><t>peo-ple’s de-moc-ra-cy</t></phrase>
@@ -118817,7 +118820,7 @@
<w><t>phil-o-pe-na</t></w>
<w><t>phil-o-pro-gen-i-tive</t></w>
<w><t>philos</t></w>
-<w><t>phi-los-o-pher</t></w>
+<w><t>phi-los-o-pher</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>phi-los-o-pher-ship</t></w>
<phrase><t>phi-los-o-pher’s stone</t></phrase>
<w><t>phil-o-soph-ic</t></w>
@@ -120457,7 +120460,7 @@
<w><t>pla-gi-o-cli-max</t></w>
<w><t>pla-gi-ot-ro-pism</t></w>
<w><t>pla-gi-o-trop-ism</t></w>
-<w><t>plague</t></w>
+<w><t>plague</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>pla-guer</t></w>
<w><t>pla-guey</t></w>
<w><t>pla-guing</t></w>
@@ -123725,7 +123728,7 @@
<w><t>pre-ced-ed</t></w>
<w><t>prec-e-dence</t></w>
<w><t>prec-e-den-cy</t></w>
-<w><t>prec-e-dent</t></w>
+<w><t>prec-e-dent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pre-ced-ent</t></w>
<w><t>prec-e-dent-ed</t></w>
<w><t>prec-e-den-tial</t></w>
@@ -127672,7 +127675,7 @@
<w><t>pro=Hai-tian</t></w>
<w><t>pro=Ha-wai-ian</t></w>
<w><t>pro=Hel-len-ic</t></w>
-<w><t>pro-hib-it</t></w>
+<w><t>pro-hib-it</t><verb><regular-root/></verb></w>
<w><t>pro-hib-it-er</t></w>
<w><t>Pro-hi-bi-tion</t></w>
<w><t>pro-hi-bi-tion</t></w>
@@ -128033,7 +128036,7 @@
<phrase><t>prop-er noun</t></phrase>
<w><t>prop-er-tied</t></w>
<w><t>Pro-per-ti-us</t></w>
-<w><t>prop-er-ty</t></w>
+<w><t>prop-er-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>prop-er-ty man</t></phrase>
<w><t>pro=Pe-ru-vi-an</t></w>
<w><t>pro-phage</t></w>
@@ -129737,7 +129740,7 @@
<w><t>pun-ish</t><verb><regular-root/></verb></w>
<w><t>pun-ish-a-ble</t></w>
<w><t>pun-ish-er</t></w>
-<w><t>pun-ish-ment</t></w>
+<w><t>pun-ish-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pu-ni-tive</t></w>
<w><t>pu-ni-tive-ly</t></w>
<w><t>pu-ni-tive-ness</t></w>
@@ -132348,7 +132351,7 @@
<w><t>Rand-ers</t></w>
<w><t>ran-die</t></w>
<w><t>rand-ies</t></w>
-<w><t>Ran-dolph</t></w>
+<w><t>Ran-dolph</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ran-dom</t></w>
<phrase><t>ran-dom ac-cess</t></phrase>
<w><t>ran-dom-ise</t></w>
@@ -132586,6 +132589,7 @@
<w><t>ra-ther</t></w>
<w><t>rat-hole</t></w>
<w><t>raths-kel-ler</t></w>
+<w><t>rat-i-fi-ca-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>rat-i-fi-ca-tion-ist</t></w>
<w><t>rat-i-fied</t></w>
<w><t>rat-i-fi-er</t></w>
@@ -134313,6 +134317,7 @@
<w><t>re-ducer</t></w>
<w><t>re-duc-er</t></w>
<w><t>re-duc-i-bil-i-ty</t></w>
+<w><t>re-duc-i-ble</t><adjective><extensible value="false"/></adjective></w>
<w><t>re-duc-i-ble-ness</t></w>
<w><t>re-duc-i-bly</t></w>
<w><t>re-duc-ing</t></w>
@@ -135571,7 +135576,7 @@
<w><t>re-la-tum</t></w>
<w><t>re-launch</t></w>
<w><t>re-laun-der</t></w>
-<w><t>re-lax</t></w>
+<w><t>re-lax</t><verb><regular-root/></verb></w>
<w><t>re-lax-a-laid</t></w>
<w><t>re-lax-a-lay-ing</t></w>
<w><t>re-lax-ant</t></w>
@@ -135952,7 +135957,7 @@
<w><t>re-mov-a-bly</t></w>
<w><t>re-mov-al</t></w>
<w><t>re-mov-al-ist</t></w>
-<w><t>re-move</t></w>
+<w><t>re-move</t><verb><regular-root/></verb></w>
<w><t>re-moved</t></w>
<w><t>re-mov-ed-ly</t></w>
<w><t>re-mov-ed-ness</t></w>
@@ -136243,7 +136248,7 @@
<w><t>re-pay-a-ble</t></w>
<w><t>re-pay-ment</t></w>
<w><t>re-pays</t><verb><regular-root value="false"/></verb></w>
-<w><t>re-peal</t></w>
+<w><t>re-peal</t><verb><regular-root/></verb></w>
<w><t>Re-peal</t></w>
<w><t>re-peal-a-bil-i-ty</t></w>
<w><t>re-peal-a-ble</t></w>
@@ -136784,7 +136789,7 @@
<w><t>re-sent-ful</t></w>
<w><t>re-sent-ful-ly</t></w>
<w><t>re-sent-ful-ness</t></w>
-<w><t>re-sent-ment</t></w>
+<w><t>re-sent-ment</t><noun><pluralizable/></noun></w>
<w><t>re-sep-a-rate</t></w>
<w><t>re-sep-a-rat-ed</t></w>
<w><t>re-sep-a-rat-ing</t></w>
@@ -137148,7 +137153,7 @@
<w><t>re-stor-er</t></w>
<w><t>re-stor-ing</t></w>
<w><t>re-straight-en</t></w>
-<w><t>re-strain</t></w>
+<w><t>re-strain</t><verb><regular-root/></verb></w>
<w><t>re-strain-a-bil-i-ty</t></w>
<w><t>re-strain-a-ble</t></w>
<w><t>re-strain-ed-ly</t></w>
@@ -138690,7 +138695,7 @@
<w><t>ris-i-ble</t></w>
<w><t>ris-ing</t></w>
<phrase><t>ris-ing trot</t></phrase>
-<w><t>risk</t></w>
+<w><t>risk</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>risk cap-i-tal</t></phrase>
<w><t>risk-er</t></w>
<w><t>risk-i-er</t></w>
@@ -143047,10 +143052,11 @@
<w><t>seal-skin</t></w>
<phrase><t>Seal-y-ham ter-ri-er</t></phrase>
<w><t>seam</t></w>
-<w><t>sea-man</t></w>
+<w><t>sea-man</t><noun><singular/></noun></w>
<w><t>sea-man-like</t></w>
<w><t>sea-man-ship</t></w>
<w><t>sea-mark</t></w>
+<w><t>sea-men</t><noun><plural/></noun></w>
<w><t>seam-er</t></w>
<w><t>Se-a-mi</t></w>
<w><t>seam-i-er</t></w>
@@ -145281,7 +145287,7 @@
<w><t>sen-ate</t></w>
<w><t>Sen-ate</t></w>
<w><t>Sen-a-to-bi-a</t></w>
-<w><t>sen-a-tor</t></w>
+<w><t>sen-a-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>sen-a-to-ri-al</t></w>
<w><t>sen-a-to-ri-al-ly</t></w>
<w><t>sen-a-tor-ship</t></w>
@@ -146327,7 +146333,7 @@
<w><t>Sha-ra-ku</t></w>
<w><t>shard</t></w>
<w><t>Shar-da-na</t></w>
-<w><t>share</t></w>
+<w><t>share</t><noun><pluralizable/></noun><verb><regular-root/></verb></w>
<w><t>share-a-ble</t></w>
<phrase><t>share cer-tif-i-cate</t></phrase>
<w><t>share-crop</t></w>
@@ -146620,7 +146626,7 @@
<w><t>she-ri-a</t></w>
<w><t>Sher-i-dan</t></w>
<w><t>she-rif</t></w>
-<w><t>sher-iff</t></w>
+<w><t>sher-iff</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>sher-iff court</t></phrase>
<w><t>Sher-ley</t></w>
<w><t>sher-lock</t></w>
@@ -146766,6 +146772,7 @@
<w><t>ship-board</t></w>
<w><t>ship-boy</t></w>
<w><t>ship-build-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>ship=build-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>ship chan-dler</t></phrase>
<w><t>ship-en-tine</t></w>
<w><t>ship-fit-ter</t></w>
@@ -147878,7 +147885,7 @@
<w><t>Si-na-tra</t></w>
<w><t>Sin-bad</t></w>
<w><t>since</t></w>
-<w><t>sin-cere</t></w>
+<w><t>sin-cere</t><adjective><extensible/></adjective></w>
<w><t>sin-cere-ly</t></w>
<w><t>sin-cere-ness</t></w>
<w><t>sin-cer-i-ty</t></w>
@@ -149059,6 +149066,7 @@
<w><t>small=mind-ed-ly</t></w>
<w><t>small=mind-ed-ness</t></w>
<phrase><t>small-mouth bass</t></phrase>
+<w><t>small-ness</t><noun><singular/></noun></w>
<phrase><t>small pi-ca</t></phrase>
<phrase><t>small po-ta-toes</t></phrase>
<w><t>small-pox</t></w>
@@ -151097,7 +151105,7 @@
<phrase><t>spec-u-lum met-al</t></phrase>
<w><t>sped</t></w>
<w><t>Spee</t></w>
-<w><t>speech</t></w>
+<w><t>speech</t><noun><pluralizable/></noun></w>
<phrase><t>speech com-mun-i-ty</t></phrase>
<w><t>speech-i-fy</t></w>
<w><t>speech-less</t></w>
@@ -152539,7 +152547,7 @@
<w><t>stam-mer-ing-ness</t></w>
<w><t>stamm-rel</t></w>
<w><t>stam-nos</t></w>
-<w><t>stamp</t></w>
+<w><t>stamp</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>stamp du-ty</t></phrase>
<w><t>stam-ped-a-ble</t></w>
<w><t>stam-pede</t></w>
@@ -158046,7 +158054,7 @@
<w><t>sur-mised-ly</t></w>
<w><t>sur-mis-er</t></w>
<w><t>sûr-mis-ing</t></w>
-<w><t>sur-mount</t></w>
+<w><t>sur-mount</t><verb><regular-root/></verb></w>
<w><t>sur-mount-a-ble</t></w>
<w><t>sur-mount-a-ble-ness</t></w>
<w><t>sur-mount-er</t></w>
@@ -158171,7 +158179,7 @@
<w><t>sus-pen-soid</t></w>
<w><t>sus-pen-sor</t></w>
<w><t>sus-pen-so-ry</t></w>
-<w><t>sus-pi-cion</t></w>
+<w><t>sus-pi-cion</t><noun><pluralizable/></noun></w>
<w><t>sus-pi-cion-ful</t></w>
<w><t>sus-pi-cion-less</t></w>
<w><t>sus-pi-cious</t></w>
@@ -159135,7 +159143,7 @@
<w><t>sys-sar-co-sis</t></w>
<w><t>syst</t></w>
<w><t>sys-tal-tic</t></w>
-<w><t>sys-tem</t></w>
+<w><t>sys-tem</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>sys-tem-at-ic</t></w>
<w><t>sys-tem-at-i-cal</t></w>
<w><t>sys-tem-at-ics</t></w>
@@ -162137,7 +162145,7 @@
<phrase><t>Thir-ty Years’ War</t></phrase>
<w><t>this</t></w>
<w><t>This-be</t></w>
-<w><t>this-tle</t></w>
+<w><t>this-tle</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>this-tle-down</t></w>
<w><t>this-tle-like</t></w>
<w><t>this-tly</t></w>
@@ -163852,7 +163860,7 @@
<w><t>tot-ting</t></w>
<w><t>tou-can</t></w>
<w><t>tou-can-et</t></w>
-<w><t>touch</t></w>
+<w><t>touch</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>tou-ch</t></w>
<w><t>touch-a-ble</t></w>
<w><t>touch-a-ble-ness</t></w>
@@ -165108,7 +165116,7 @@
<w><t>tri-bro-mo-ac-et-al-de-hyde</t></w>
<w><t>tri-bro-mo-eth-a-nol</t></w>
<w><t>trib-u-la-tion</t></w>
-<w><t>tri-bu-nal</t></w>
+<w><t>tri-bu-nal</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>trib-u-nate</t></w>
<w><t>trib-une</t></w>
<w><t>trib-une-ship</t></w>
@@ -166863,7 +166871,7 @@
<w><t>tyr-an-nous</t></w>
<w><t>tyr-an-nous-ly</t></w>
<w><t>tyr-an-nous-ness</t></w>
-<w><t>tyr-an-ny</t></w>
+<w><t>tyr-an-ny</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>tyrant</t></w>
<w><t>ty-rant</t></w>
<phrase><t>ty-rant fly-catch-er</t></phrase>
@@ -171894,6 +171902,7 @@
<w><t>un-fix-a-ble</t></w>
<w><t>un-fix-at-ed</t></w>
<w><t>un-fix-a-tive</t></w>
+<w><t>un-fixed</t><adjective><extensible value="false"/></adjective></w>
<w><t>un-fix-i-ty</t></w>
<w><t>un-flagged</t></w>
<w><t>un-flag-ging</t></w>
@@ -172598,7 +172607,8 @@
<w><t>un-hanked</t></w>
<w><t>un-hap-pi</t></w>
<w><t>un-hap-pi-er</t></w>
-<w><t>un-hap-pi-ness</t></w>
+<w><t>un-hap-pi-er</t></w>
+<w><t>un-hap-pi-ly</t><adverb/></w>
<w><t>un-hap-py</t></w>
<w><t>un-ha-rangued</t></w>
<w><t>un-har-assed</t></w>
@@ -179542,7 +179552,7 @@
<w><t>u-su-ri-ous-ly</t></w>
<w><t>u-su-ri-ous-ness</t></w>
<w><t>u-surp</t></w>
-<w><t>u-sur-pa-tion</t></w>
+<w><t>u-sur-pa-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>u-surp-a-tive</t></w>
<w><t>u-surp-er</t></w>
<w><t>u-surp-ing-ly</t></w>
@@ -182058,11 +182068,11 @@
<w><t>vo-ta-ress</t></w>
<w><t>vo-ta-rist</t></w>
<w><t>vo-ta-ry</t></w>
-<w><t>vote</t></w>
+<w><t>vote</t><noun><pluralizable/></noun></w>
<w><t>vote-a-ble</t></w>
<w><t>vot-ed</t></w>
<w><t>vote-less</t></w>
-<w><t>vot-er</t></w>
+<w><t>vot-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>vot-ing</t></w>
<phrase><t>vot-ing ma-chine</t></phrase>
<w><t>vo-tive</t></w>
@@ -182695,6 +182705,7 @@
<w><t>war-rant</t></w>
<w><t>war-rant-a-ble</t></w>
<w><t>war-rant-a-ble-ness</t></w>
+<w><t>war-ran-ted</t><adjective><extensible value="false"/></adjective><comment>Not in NOAD.</comment></w>
<w><t>war-ran-tee</t></w>
<w><t>war-rant-er</t></w>
<phrase><t>war-rant of-fic-er</t></phrase>
@@ -186431,7 +186442,7 @@
<w><t>y-clept</t></w>
<phrase><t>Y con-nec-tion</t></phrase>
<w><t>ye</t></w>
-<w><t>yea</t></w>
+<w><t>yea</t><noun><pluralizable/></noun></w>
<w><t>yeah</t></w>
<w><t>yeal-ing</t></w>
<w><t>yean</t></w>
@@ -186873,7 +186884,7 @@
<w><t>zeal</t></w>
<w><t>Zea-land</t></w>
<w><t>zeal-less</t></w>
-<w><t>zeal-ot</t></w>
+<w><t>zeal-ot</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Zeal-ot</t></w>
<w><t>zeal-ot-ry</t></w>
<w><t>zeal-ous</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2023-08-25 14:35:45 UTC (rev 13177)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2023-08-25 23:13:24 UTC (rev 13178)
@@ -34,6 +34,7 @@
<w><t>et</t></w>
<w><t>e-van-gel-i-ar-i-um</t></w>
<w><t>ex</t></w>
+<w><t>fac-to</t></w>
<w><t>fide</t></w>
<w><t>fit</t></w>
<w><t>hoc</t></w>
@@ -71,6 +72,7 @@
<w><t>sig-no</t></w>
<w><t>su-tor</t></w>
<w><t>sym-bol-um</t></w>
+<w><t>tem-po-re</t></w>
<w><t>trans-eunte</t></w>
<w><t>ul-ti-ma</t></w>
<w><t>ul-tra</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-08-25 14:35:45 UTC (rev 13177)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-08-25 23:13:24 UTC (rev 13178)
@@ -148,7 +148,7 @@
</derivative-rule>
</derivative-pattern>
<derivative-pattern desc="ends with /-ing/">
- <match>^([a-zA-Z\-]+)ing$</match>
+ <match>^([a-zëA-Z\-]+)ing$</match>
<replace>$1</replace>
<derivative-rule>
<verb><regular-root/></verb>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-25 14:35:47
|
Revision: 13177
http://sourceforge.net/p/foray/code/13177
Author: victormote
Date: 2023-08-25 14:35:45 +0000 (Fri, 25 Aug 2023)
Log Message:
-----------
Fix orthography config for testing.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
trunk/foray/foray-orthography/src/test/resources/orthography-config.xml
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2023-08-25 12:02:23 UTC (rev 13176)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/OrthographyServer4a.java 2023-08-25 14:35:45 UTC (rev 13177)
@@ -300,7 +300,10 @@
// throw new IllegalStateException(String.format(
// "Dictionary already registered for ID: %1s", resource.getId()));
// }
- this.dictionaryMap.put(resource.getId(), resource);
+ /* Not every orthography has a dictionary resource. */
+ if (resource != null) {
+ this.dictionaryMap.put(resource.getId(), resource);
+ }
}
@Override
Modified: trunk/foray/foray-orthography/src/test/resources/orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/test/resources/orthography-config.xml 2023-08-25 12:02:23 UTC (rev 13176)
+++ trunk/foray/foray-orthography/src/test/resources/orthography-config.xml 2023-08-25 14:35:45 UTC (rev 13177)
@@ -21,7 +21,7 @@
<derivative-factory class="org.foray.orthography.wrapper.LatinPast1WordFactory"/>
</derivative-factory-list>
- <dictionary-resource id="dictionary-eng-moby">
+ <dictionary-resource id="org.foray.eng.Latn.ZZZ">
<parsed-resource>
<resource-location type="classpath">/resources/org/foray/dictionaries/eng-Latn-ZZZ.jbso</resource-location>
</parsed-resource>
@@ -126,7 +126,7 @@
</hyphenation-patterns-resource>
<configuration>
- <dictionary reference="dictionary-eng-moby"/>
+ <dictionary reference="org.foray.eng.Latn.ZZZ"/>
<hyphenation-patterns reference="hyph-patterns-eng"/>
<derivative-factories reference="derivatives-eng"/>
<lexer class="org.foray.orthography.LexerJavaBreakIterator"
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2023-08-25 12:02:29
|
Revision: 13176
http://sourceforge.net/p/foray/code/13176
Author: victormote
Date: 2023-08-25 12:02:23 +0000 (Fri, 25 Aug 2023)
Log Message:
-----------
Rename English archaic file. Clean up dictionary references.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
Added Paths:
-----------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA-archaic.dict.xml
Removed Paths:
-------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
Copied: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA-archaic.dict.xml (from rev 13175, trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml)
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA-archaic.dict.xml (rev 0)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA-archaic.dict.xml 2023-08-25 12:02:23 UTC (rev 13176)
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!DOCTYPE axsl-dictionary
+ PUBLIC "-//aXSL//DTD Dictionary V0.1//EN"
+ "http://www.axsl.org/dtds/0.1/en/axsl-dictionary.dtd">
+
+<axsl-dictionary
+ id="org.foray.eng.Latn.USA.1920" overrides="org.foray.eng.Latn.USA"
+ language="eng" script="Latn" epoch="1920"
+ hard-hyphen-char="=" soft-hyphen-char="-">
+
+<w><t>ac-o-lyth</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>arch-bish-op-rick</t><noun/></w>
+<w><t>be-hoof</t><noun/></w>
+<w><t>Car-tha-gen-i-an</t><noun><pluralizable/><convertible-to-possessive/></noun><comment>Carthaginian.</comment></w>
+<w><t>ceil</t><verb><regular-root/></verb></w>
+<w><t>ceil-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>eat-eth</t><verb><regular-root value="false"/></verb></w>
+<w><t>ex-pound-eth</t><verb><regular-root value="false"/></verb></w>
+<w><t>ex-ult-eth</t><verb><regular-root value="false"/></verb></w>
+<w><t>go-eth</t><verb/></w>
+<w><t>hat-eth</t></w>
+<w><t>hum-bleth</t><verb><regular-root value="false"/></verb></w>
+<w><t>Jno</t><abbrev referenced-word="John"/></w>
+<w><t>Kal-a-bar</t><comment>Calabar</comment></w>
+<w><t>Kam-e-run</t><comment>Cameroon</comment></w>
+<w><t>Kam-e-runs</t><comment>Related to Cameroon</comment></w>
+<w><t>liv-eth</t><verb><regular-root value="false"/></verb></w>
+<w><t>lo</t><interjection/><comment>Imperative of "look".</comment></w>
+<w><t>lov-eth</t><verb><regular-root value="false"/></verb></w>
+<w><t>mim-ick-ry</t><noun><pluralizable/></noun></w>
+<w><t>oth-er-ways</t><adjective/><adverb/></w>
+<w><t>per-suad-est</t><verb><regular-root value="false"/></verb></w>
+<w><t>pre-ëm-i-nence</t></w>
+<w><t>pre-ëm-i-nent-ly</t><adverb/></w>
+<w><t>pro-nounc-eth</t><verb><regular-root value="false"/></verb></w>
+<w><t>re-prov-eth</t></w>
+<w><t>seek-est</t><verb><regular-root value="false"/></verb></w>
+<w><t>sleep-est</t><verb><regular-root value="false"/></verb></w>
+<w><t>speak-eth</t><verb><regular-root value="false"/></verb></w>
+<w><t>ten-our</t></w>
+<w><t>un-lade</t><verb><regular-root/></verb></w>
+<w><t>walk-eth</t><verb><regular-root value="false"/></verb></w>
+<w><t>wip-eth</t><verb><regular-root value="false"/></verb></w>
+
+</axsl-dictionary>
Deleted: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2023-08-25 11:53:36 UTC (rev 13175)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2023-08-25 12:02:23 UTC (rev 13176)
@@ -1,46 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!DOCTYPE axsl-dictionary
- PUBLIC "-//aXSL//DTD Dictionary V0.1//EN"
- "http://www.axsl.org/dtds/0.1/en/axsl-dictionary.dtd">
-
-<axsl-dictionary
- id="org.foray.eng.Latn.USA.1920" overrides="org.foray.eng.Latn.USA"
- language="eng" script="Latn" epoch="1920"
- hard-hyphen-char="=" soft-hyphen-char="-">
-
-<w><t>ac-o-lyth</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>arch-bish-op-rick</t><noun/></w>
-<w><t>be-hoof</t><noun/></w>
-<w><t>Car-tha-gen-i-an</t><noun><pluralizable/><convertible-to-possessive/></noun><comment>Carthaginian.</comment></w>
-<w><t>ceil</t><verb><regular-root/></verb></w>
-<w><t>ceil-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>eat-eth</t><verb><regular-root value="false"/></verb></w>
-<w><t>ex-pound-eth</t><verb><regular-root value="false"/></verb></w>
-<w><t>ex-ult-eth</t><verb><regular-root value="false"/></verb></w>
-<w><t>go-eth</t><verb/></w>
-<w><t>hat-eth</t></w>
-<w><t>hum-bleth</t><verb><regular-root value="false"/></verb></w>
-<w><t>Jno</t><abbrev referenced-word="John"/></w>
-<w><t>Kal-a-bar</t><comment>Calabar</comment></w>
-<w><t>Kam-e-run</t><comment>Cameroon</comment></w>
-<w><t>Kam-e-runs</t><comment>Related to Cameroon</comment></w>
-<w><t>liv-eth</t><verb><regular-root value="false"/></verb></w>
-<w><t>lo</t><interjection/><comment>Imperative of "look".</comment></w>
-<w><t>lov-eth</t><verb><regular-root value="false"/></verb></w>
-<w><t>mim-ick-ry</t><noun><pluralizable/></noun></w>
-<w><t>oth-er-ways</t><adjective/><adverb/></w>
-<w><t>per-suad-est</t><verb><regular-root value="false"/></verb></w>
-<w><t>pre-ëm-i-nence</t></w>
-<w><t>pre-ëm-i-nent-ly</t><adverb/></w>
-<w><t>pro-nounc-eth</t><verb><regular-root value="false"/></verb></w>
-<w><t>re-prov-eth</t></w>
-<w><t>seek-est</t><verb><regular-root value="false"/></verb></w>
-<w><t>sleep-est</t><verb><regular-root value="false"/></verb></w>
-<w><t>speak-eth</t><verb><regular-root value="false"/></verb></w>
-<w><t>ten-our</t></w>
-<w><t>un-lade</t><verb><regular-root/></verb></w>
-<w><t>walk-eth</t><verb><regular-root value="false"/></verb></w>
-<w><t>wip-eth</t><verb><regular-root value="false"/></verb></w>
-
-</axsl-dictionary>
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-08-25 11:53:36 UTC (rev 13175)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2023-08-25 12:02:23 UTC (rev 13176)
@@ -257,12 +257,12 @@
<dictionary-resource id="org.foray.eng.Latn.USA.1920">
<unparsed-dictionary>
<dictionary-element>
- <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml</resource-location>
+ <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA-archaic.dict.xml</resource-location>
</dictionary-element>
</unparsed-dictionary>
</dictionary-resource>
- <dictionary-resource id="dictionary-italian">
+ <dictionary-resource id="org.foray.ita.Latn.ZZZ">
<unparsed-dictionary>
<dictionary-element>
<resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/ita-Latn-ZZZ.dict.xml</resource-location>
@@ -270,7 +270,7 @@
</unparsed-dictionary>
</dictionary-resource>
- <dictionary-resource id="dictionary-latin">
+ <dictionary-resource id="org.foray.lat.Latn.ZZZ">
<unparsed-dictionary>
<dictionary-element>
<resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml</resource-location>
@@ -278,7 +278,7 @@
</unparsed-dictionary>
</dictionary-resource>
- <dictionary-resource id="dictionary-french">
+ <dictionary-resource id="org.foray.fre.Latn.ZZZ">
<unparsed-dictionary>
<dictionary-element>
<resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml</resource-location>
@@ -286,7 +286,7 @@
</unparsed-dictionary>
</dictionary-resource>
- <dictionary-resource id="dictionary-greek-translit-latin">
+ <dictionary-resource id="org.foray.grc.Latn.ZZZ">
<unparsed-dictionary>
<dictionary-element>
<resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/grc-Latn-ZZZ.dict.xml</resource-location>
@@ -294,7 +294,7 @@
</unparsed-dictionary>
</dictionary-resource>
- <dictionary-resource id="dictionary-hebrew-translit-latin">
+ <dictionary-resource id="org.foray.heb.Latn.ZZZ">
<unparsed-dictionary>
<dictionary-element>
<resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml</resource-location>
@@ -332,7 +332,7 @@
<configuration>
<lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
<match-rules reference="lat-Latn-match-rules"/>
- <dictionary reference="dictionary-latin"/>
+ <dictionary reference="org.foray.lat.Latn.ZZZ"/>
<orthography language-iso-3char="lat" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
</configuration>
@@ -339,7 +339,7 @@
<configuration>
<lexer class="org.foray.orthography.LexerJavaBreakIterator"
language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
- <dictionary reference="dictionary-italian"/>
+ <dictionary reference="org.foray.ita.Latn.ZZZ"/>
<orthography language-iso-3char="ita" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
</configuration>
@@ -346,19 +346,19 @@
<configuration>
<lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
<derivative-rules reference="fre-Latn-derivative-patterns"/>
- <dictionary reference="dictionary-french"/>
+ <dictionary reference="org.foray.fre.Latn.ZZZ"/>
<orthography language-iso-3char="fre" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
</configuration>
<configuration>
<lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
- <dictionary reference="dictionary-greek-translit-latin"/>
+ <dictionary reference="org.foray.grc.Latn.ZZZ"/>
<orthography language-iso-3char="grc" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
</configuration>
<configuration>
<lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
- <dictionary reference="dictionary-hebrew-translit-latin"/>
+ <dictionary reference="org.foray.heb.Latn.ZZZ"/>
<orthography language-iso-3char="heb" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
</configuration>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|