foray-commit Mailing List for FOray (Page 47)
Modular XSL-FO Implementation for Java.
Status: Alpha
Brought to you by:
victormote
You can subscribe to this list here.
| 2006 |
Jan
|
Feb
|
Mar
(139) |
Apr
(98) |
May
(250) |
Jun
(394) |
Jul
(84) |
Aug
(13) |
Sep
(420) |
Oct
(186) |
Nov
(1) |
Dec
(3) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2007 |
Jan
(108) |
Feb
(202) |
Mar
(291) |
Apr
(247) |
May
(374) |
Jun
(227) |
Jul
(231) |
Aug
(60) |
Sep
(31) |
Oct
(45) |
Nov
(18) |
Dec
|
| 2008 |
Jan
(38) |
Feb
(71) |
Mar
(142) |
Apr
|
May
(59) |
Jun
(6) |
Jul
(10) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2009 |
Jan
(12) |
Feb
(4) |
Mar
(88) |
Apr
(121) |
May
(17) |
Jun
(30) |
Jul
|
Aug
(5) |
Sep
|
Oct
(1) |
Nov
|
Dec
|
| 2010 |
Jan
(11) |
Feb
(76) |
Mar
(11) |
Apr
|
May
(11) |
Jun
|
Jul
|
Aug
(44) |
Sep
(14) |
Oct
(7) |
Nov
|
Dec
|
| 2011 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(9) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(10) |
Nov
|
Dec
|
| 2012 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
(3) |
Jul
(4) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2016 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(168) |
| 2017 |
Jan
(77) |
Feb
(11) |
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2018 |
Jan
|
Feb
|
Mar
(1) |
Apr
(6) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2019 |
Jan
|
Feb
(88) |
Mar
(118) |
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2020 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(6) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(141) |
| 2021 |
Jan
(170) |
Feb
(20) |
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
|
Sep
|
Oct
(62) |
Nov
(189) |
Dec
(162) |
| 2022 |
Jan
(201) |
Feb
(118) |
Mar
(8) |
Apr
|
May
(2) |
Jun
(47) |
Jul
(19) |
Aug
(14) |
Sep
(3) |
Oct
|
Nov
(28) |
Dec
(235) |
| 2023 |
Jan
(112) |
Feb
(23) |
Mar
(2) |
Apr
(2) |
May
|
Jun
(1) |
Jul
|
Aug
(70) |
Sep
(92) |
Oct
(20) |
Nov
(1) |
Dec
(1) |
| 2024 |
Jan
|
Feb
|
Mar
(1) |
Apr
(1) |
May
(14) |
Jun
(11) |
Jul
(1) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2025 |
Jan
(10) |
Feb
(29) |
Mar
|
Apr
(162) |
May
(245) |
Jun
(83) |
Jul
|
Aug
(1) |
Sep
|
Oct
|
Nov
(2) |
Dec
|
|
From: <vic...@us...> - 2022-08-30 18:39:34
|
Revision: 12726
http://sourceforge.net/p/foray/code/12726
Author: victormote
Date: 2022-08-30 18:39:31 +0000 (Tue, 30 Aug 2022)
Log Message:
-----------
Rough-in parsing and storage of explicit tokens.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
Added Paths:
-----------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java
Added: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java (rev 0)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java 2022-08-30 18:39:31 UTC (rev 12726)
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2022 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.orthography;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A collection of strings that should be treated as explicit tokens by a tokenizer/lexer.
+ */
+public class ExplicitTokens {
+
+ /** The initial size of the token collection. */
+ private static final int INITIAL_MAP_CAPACITY = 50;
+
+ /** The collection of explicit tokens. */
+ private Set<String> tokens = new HashSet<String>(INITIAL_MAP_CAPACITY);
+ /* TODO: Convert this to a TernaryTree. */
+
+ /** The size, in chars, of the largest member of {@link #tokens}. */
+ private int maxTokenSize;
+
+ /**
+ * Adds an explicit token to this orthography.
+ * @param token The explicit token to be added.
+ */
+ public void addToken(final String token) {
+ if (token.length() > this.maxTokenSize) {
+ this.maxTokenSize = token.length();
+ }
+ this.tokens.add(token);
+ }
+
+ /**
+ * Searches a character sequence starting at a given index to see if the indexed character is the beginning of an
+ * explicit token, returning that token if it is.
+ * @param sequence The character sequence (usually a {@link String}) being searched.
+ * @param start The index to the first character in {@code sequence} to be tested.
+ * @return The matching token, if there is one, or null if not.
+ */
+ public String findToken(final CharSequence sequence, final int start) {
+ int index = start + 1;
+ while (index < sequence.length()
+ && index - start < this.maxTokenSize) {
+ /* TODO: This String creation horrible and should only be used for proof of concept. */
+ final String testString = sequence.subSequence(start, index).toString();
+ if (this.tokens.contains(testString)) {
+ return testString;
+ }
+ index ++;
+ }
+ return null;
+ }
+
+}
Property changes on: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/ExplicitTokens.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2022-08-30 13:06:58 UTC (rev 12725)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2022-08-30 18:39:31 UTC (rev 12726)
@@ -81,6 +81,9 @@
/** Regex pattern used to break compound words into their components. */
private Pattern compoundWordBreaker = Pattern.compile(Character.toString(compoundWordMarker));
+ /** The explicit tokens for this orthography. */
+ private ExplicitTokens explicitTokens;
+
/**
* Constructor.
* @param server The parent hyphenation server.
@@ -480,4 +483,26 @@
return false;
}
+ /**
+ * Sets the explicit tokens for this orthography.
+ * @param tokens The new explicit tokens for this orthography.
+ */
+ public void setExplicitTokens(final ExplicitTokens tokens) {
+ this.explicitTokens = tokens;
+ }
+
+ /**
+ * Searches a character sequence starting at a given index to see if the indexed character is the beginning of an
+ * explicit token, returning that token if it is.
+ * @param sequence The character sequence (usually a {@link String}) being searched.
+ * @param start The index to the first character in {@code sequence} to be tested.
+ * @return The matching token, if there is one, or null if not.
+ */
+ public String findToken(final CharSequence sequence, final int start) {
+ if (this.explicitTokens == null) {
+ return null;
+ }
+ return this.explicitTokens.findToken(sequence, start);
+ }
+
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2022-08-30 13:06:58 UTC (rev 12725)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2022-08-30 18:39:31 UTC (rev 12726)
@@ -39,6 +39,7 @@
import org.foray.orthography.DerivativePattern;
import org.foray.orthography.DerivativeRule;
import org.foray.orthography.DictionaryResource;
+import org.foray.orthography.ExplicitTokens;
import org.foray.orthography.HyphenationPatternsResource;
import org.foray.orthography.Orthography4a;
import org.foray.orthography.OrthographyServer4a;
@@ -79,6 +80,9 @@
/** Stateful variable. */
private DictionaryResource currentDictionaryResource;
+ /** The current ExplicitTokens instance being parsed. */
+ private ExplicitTokens currentExplicitTokens;
+
/** Stateful variable. */
private DictionaryResource.WordListElement currentWordListElement;
@@ -123,7 +127,10 @@
// /** The map of match rule lists, keyed by id. */
// private Map<String, List<Pattern>> matchRuleLists = new HashMap<String, List<Pattern>>();
-//
+
+ /** The map of parsed {@link ExplicitTokens} instances, keyed by id. */
+ private Map<String, ExplicitTokens> explicitTokensMap = new HashMap<String, ExplicitTokens>();
+
/** The map of derivative factory lists, keyed by id. */
private Map<String, List<WordWrapperFactory<?>>> derivativeLists =
new HashMap<String, List<WordWrapperFactory<?>>>();
@@ -415,9 +422,19 @@
case "convertible-to-possessive": return;
case "possessive": return;
case "extensible": return;
- case "explicit-token-list": return;
+ case "explicit-token-list": {
+ final String idString = attributes.getValue("id");
+ this.currentExplicitTokens = new ExplicitTokens();
+ this.explicitTokensMap.put(idString, currentExplicitTokens);
+ return;
+ }
case "explicit-token": return;
- case "explicit-tokens": return;
+ case "explicit-tokens": {
+ final String reference = attributes.getValue("reference");
+ final ExplicitTokens tokens = this.explicitTokensMap.get(reference);
+ this.currentOrthographyConfig.setExplicitTokens(tokens);
+ return;
+ }
default: {
/* Make sure user knows about unknown tag. */
errorMessage("Unknown tag in orthography configuration: {}", localName);
@@ -602,9 +619,14 @@
case "orthography": {
return;
}
+ case "explicit-token-list": {
+ this.currentExplicitTokens = null;
+ return;
+ }
case "explicit-token": {
- /* TODO: Complete this. For now, clear the text. */
- getAndClearText();
+ final String token = getAndClearText();
+ this.currentExplicitTokens.addToken(token);
+ return;
}
}
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-08-30 13:07:12
|
Revision: 12725
http://sourceforge.net/p/foray/code/12725
Author: victormote
Date: 2022-08-30 13:06:58 +0000 (Tue, 30 Aug 2022)
Log Message:
-----------
Improvements to orthography data.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
Added Paths:
-----------
trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2022-08-30 11:50:20 UTC (rev 12724)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2022-08-30 13:06:58 UTC (rev 12725)
@@ -14,6 +14,7 @@
<w><t>ceil</t><verb><regular-root/></verb></w>
<w><t>ceil-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>eat-eth</t><verb><regular-root value="false"/></verb></w>
+<w><t>ex-pound-eth</t><verb><regular-root value="false"/></verb></w>
<w><t>ex-ult-eth</t><verb><regular-root value="false"/></verb></w>
<w><t>go-eth</t><verb/></w>
<w><t>hum-bleth</t><verb><regular-root value="false"/></verb></w>
@@ -29,6 +30,7 @@
<w><t>per-suad-est</t><verb><regular-root value="false"/></verb></w>
<w><t>pre-ëm-i-nence</t></w>
<w><t>pre-ëm-i-nent-ly</t><adverb/></w>
+<w><t>pro-nounc-eth</t><verb><regular-root value="false"/></verb></w>
<w><t>re-prov-eth</t></w>
<w><t>seek-est</t><verb><regular-root value="false"/></verb></w>
<w><t>sleep-est</t><verb><regular-root value="false"/></verb></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2022-08-30 11:50:20 UTC (rev 12724)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2022-08-30 13:06:58 UTC (rev 12725)
@@ -8659,7 +8659,7 @@
<w><t>ap-pulse</t></w>
<w><t>ap-pul-sive</t></w>
<w><t>ap-pul-sive-ly</t></w>
-<w><t>ap-pur-te-nance</t></w>
+<w><t>ap-pur-te-nance</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ap-pur-te-nant</t></w>
<w><t>a-pr</t></w>
<w><t>a-prax-i-a</t></w>
@@ -28599,7 +28599,7 @@
<w><t>clean-ness</t></w>
<w><t>clean-out</t></w>
<w><t>cleans-a-ble</t></w>
-<w><t>cleanse</t></w>
+<w><t>cleanse</t><verb><regular-root/></verb></w>
<w><t>cleans-er</t></w>
<w><t>clean=shav-en</t></w>
<w><t>cleans-ing</t></w>
@@ -30919,7 +30919,7 @@
<w><t>com-pa-tri-ot-ism</t></w>
<w><t>Comp-a-zine</t></w>
<w><t>com-peer</t></w>
-<w><t>com-pel</t></w>
+<w><t>com-pel</t><verb><regular-root/></verb></w>
<w><t>com-pel-la-ble</t></w>
<w><t>com-pel-la-bly</t></w>
<w><t>com-pel-la-tion</t></w>
@@ -52193,7 +52193,8 @@
<w><t>ex-cel-si-or</t></w>
<w><t>ex-cen-tric</t></w>
<w><t>ex-cep-a-ble</t></w>
-<w><t>ex-cept</t></w>
+<w><t>ex-cept</t><verb><regular-root value="false"/></verb><preposition/></w>
+<w><t>ex-cept-ed</t><verb><regular-root value="false"/></verb></w>
<w><t>ex-cept-ing</t></w>
<w><t>ex-cep-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-cep-tion-a-ble</t></w>
@@ -52206,6 +52207,7 @@
<w><t>ex-cep-tion-less</t></w>
<w><t>ex-cep-tive</t></w>
<w><t>ex-cep-tive-ly</t></w>
+<w><t>ex-cepts</t><verb><regular-root value="false"/></verb></w>
<w><t>ex-cerpt</t></w>
<w><t>ex-cerp-ta</t></w>
<w><t>ex-cerpt-er</t></w>
@@ -74104,7 +74106,7 @@
<w><t>il-lus-trate</t><verb><regular-root/></verb></w>
<w><t>il-lus-trat-ed</t></w>
<w><t>il-lus-trat-ing</t></w>
-<w><t>il-lus-tra-tion</t></w>
+<w><t>il-lus-tra-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>il-lus-tra-tion-al</t></w>
<w><t>il-lus-tra-tive</t></w>
<w><t>il-lus-tra-tive-ly</t></w>
@@ -117489,7 +117491,7 @@
<w><t>perennimeal-y=mouthed</t></w>
<w><t>Per-etz</t></w>
<w><t>perf</t></w>
-<w><t>per-fect</t></w>
+<w><t>per-fect</t><verb><regular-root/></verb><adjective><extensible value="false"/></adjective></w>
<phrase><t>per-fect com-pe-ti-tion</t></phrase>
<w><t>per-fect-ed-ly</t></w>
<w><t>per-fect-er</t></w>
@@ -124224,8 +124226,7 @@
<w><t>pre-de-ceiv-er</t></w>
<w><t>pre-de-ceiv-ing</t></w>
<w><t>pre-de-cep-tion</t></w>
-<w><t>pred-e-ces-sor</t></w>
-<w><t>pre-de-ces-sor</t></w>
+<w><t>pred-e-ces-sor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pre-de-cide</t></w>
<w><t>pre-de-cid-ed</t></w>
<w><t>pre-de-cid-ing</t></w>
@@ -129825,6 +129826,7 @@
<w><t>purg-ing</t></w>
<w><t>pu-ri</t></w>
<w><t>Pu-ri</t></w>
+<w><t>pu-ri-fi-ca-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pu-ri-fi-ca-tor</t></w>
<w><t>pu-ri-fied</t></w>
<w><t>pu-ri-form</t></w>
@@ -138497,6 +138499,7 @@
<w><t>ri-gid-i-fied</t></w>
<w><t>ri-gid-i-fy</t></w>
<w><t>ri-gid-i-fy-ing</t></w>
+<w><t>rig-id-ly</t><adverb/></w>
<phrase><t>Ri-gil Kent</t></phrase>
<w><t>rig-ma-role</t></w>
<w><t>rig-ol</t></w>
@@ -159598,8 +159601,10 @@
<w><t>Tal-lys</t></w>
<w><t>tal-ly-shop</t></w>
<w><t>Tal-mud</t></w>
+<w><t>Tal-mud-ic</t><adjective><extensible value="false"/></adjective></w>
+<w><t>Tal-mud-i-cal</t><adjective><extensible value="false"/></adjective></w>
<w><t>Tal-mud-ism</t></w>
-<w><t>Tal-mud-ist</t></w>
+<w><t>Tal-mud-ist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>tal-on</t></w>
<w><t>tal-oned</t></w>
<w><t>ta-loo-ka</t></w>
@@ -164875,7 +164880,7 @@
<phrase><t>treas-ur-y cer-tif-i-cate</t></phrase>
<phrase><t>treas-ur-y note</t></phrase>
<w><t>treat</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
-<w><t>trea-tise</t></w>
+<w><t>trea-tise</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>treat-ment</t></w>
<w><t>trea-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>trea-ty-less</t></w>
@@ -183129,7 +183134,7 @@
<w><t>WCTU</t></w>
<w><t>we</t></w>
<w><t>weak</t><adjective><extensible/></adjective></w>
-<w><t>weak-en</t></w>
+<w><t>weak-en</t><verb><regular-root/></verb></w>
<w><t>weak-en-er</t></w>
<phrase><t>weak-er sex</t></phrase>
<w><t>weak-fish</t></w>
@@ -185713,7 +185718,7 @@
<phrase><t>Worces-ter chi-na</t></phrase>
<phrase><t>Worces-ter sauce</t></phrase>
<w><t>Worces-ter-shire</t></w>
-<w><t>word</t></w>
+<w><t>word</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>Word</t></w>
<w><t>word-age</t></w>
<phrase><t>word as-so-ci-a-tion</t></phrase>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml 2022-08-30 11:50:20 UTC (rev 12724)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml 2022-08-30 13:06:58 UTC (rev 12725)
@@ -16,6 +16,7 @@
<w><t>France</t></w>
<w><t>jér-é-mi-ade</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>la</t></w>
+<w><t>lit-er-a-teur</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>masse</t></w>
<w><t>route</t></w>
<w><t>voy-age</t></w>
Added: trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml (rev 0)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml 2022-08-30 13:06:58 UTC (rev 12725)
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!DOCTYPE axsl-dictionary
+ PUBLIC "-//aXSL//DTD Dictionary V0.1//EN"
+ "http://www.axsl.org/dtds/0.1/en/axsl-dictionary.dtd">
+
+<axsl-dictionary language="heb" script="Latn" hard-hyphen-char="="
+ soft-hyphen-char="-">
+
+<!--
+-->
+
+<w><t>béth</t><noun></noun></w>
+</axsl-dictionary>
Property changes on: trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2022-08-30 11:50:20 UTC (rev 12724)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2022-08-30 13:06:58 UTC (rev 12725)
@@ -10,6 +10,7 @@
<!--
-->
+<w><t>ac-tu</t></w>
<w><t>ad</t></w>
<w><t>ann-um</t></w>
<w><t>a-pel-la</t></w>
@@ -43,8 +44,10 @@
<w><t>prop-a-gan-da</t></w>
<w><t>qui</t></w>
<w><t>sac-rae</t></w>
+<w><t>sac-ris</t></w>
<w><t>sem-per</t></w>
<w><t>sig-no</t></w>
+<w><t>trans-eunte</t></w>
<w><t>vin-ces</t></w>
<w><t>vive</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2022-08-30 11:50:20 UTC (rev 12724)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2022-08-30 13:06:58 UTC (rev 12725)
@@ -199,6 +199,18 @@
</derivative-pattern>
</derivative-pattern-list>
+ <derivative-pattern-list id="fre-Latn-derivative-patterns">
+ <derivative-pattern desc="ends with /-s/">
+ <match>^([a-zA-Z\-]+)s$</match>
+ <replace>$1</replace>
+ <derivative-rule>
+ <noun><pluralizable/></noun>
+ <derivative-type type="plural"/>
+ </derivative-rule>
+ </derivative-pattern>
+ </derivative-pattern-list>
+
+
<derivative-factory-list id="eng-Latn-derivatives">
<derivative-factory class="org.foray.orthography.wrapper.LatinPlural1WordFactory"/>
<derivative-factory class="org.foray.orthography.wrapper.LatinPlural2WordFactory"/>
@@ -252,6 +264,14 @@
</unparsed-dictionary>
</dictionary-resource>
+ <dictionary-resource id="dictionary-hebrew-translit-latin">
+ <unparsed-dictionary>
+ <dictionary-element>
+ <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/heb-Latn-ZZZ.dict.xml</resource-location>
+ </dictionary-element>
+ </unparsed-dictionary>
+ </dictionary-resource>
+
<hyphenation-patterns-resource id="hyph-patterns-eng">
<parsed-resource>
<resource-location type="classpath">/resources/org/foray/orthography/hyphPatterns/eng.jbso</resource-location>
@@ -294,6 +314,7 @@
<configuration>
<lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
+ <derivative-rules reference="fre-Latn-derivative-patterns"/>
<dictionary reference="dictionary-french"/>
<orthography language-iso-3char="fre" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
</configuration>
@@ -304,4 +325,10 @@
<orthography language-iso-3char="grc" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
</configuration>
+ <configuration>
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
+ <dictionary reference="dictionary-hebrew-translit-latin"/>
+ <orthography language-iso-3char="heb" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
+ </configuration>
+
</axsl-orthography-config>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-08-30 11:50:22
|
Revision: 12724
http://sourceforge.net/p/foray/code/12724
Author: victormote
Date: 2022-08-30 11:50:20 +0000 (Tue, 30 Aug 2022)
Log Message:
-----------
Only remove initial trailing punctuation if it is actually followed be a word break.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java 2022-08-30 11:23:09 UTC (rev 12723)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java 2022-08-30 11:50:20 UTC (rev 12724)
@@ -74,11 +74,11 @@
/** The punctuation characters which may, depending on context, be treated as intraword punctuation. */
private static final String POSSIBLE_INTRAWORD_PUNCTUATION = new String(new char[] {
- Basic_Latin_Block.APOSTROPHE,
- General_Punctuation_Block.RIGHT_SINGLE_QUOTATION_MARK,
+ Basic_Latin_Block.APOSTROPHE, //English example: 'Tis the season
+ General_Punctuation_Block.RIGHT_SINGLE_QUOTATION_MARK, //English example: ’Tis the season
Basic_Latin_Block.LEFT_PARENTHESIS, //English example of alternate spelling:
Basic_Latin_Block.RIGHT_PARENTHESIS, // pa(e)leography
- Basic_Latin_Block.FULL_STOP, //English example: "Section 8.16"
+ Basic_Latin_Block.FULL_STOP, //English example: Section 8.16
});
/** The punctuation characters which, when they immediately follow a word, can be separated from that word during
@@ -383,6 +383,21 @@
}
/**
+ * Indicates whether a given Unicode code point is a word break character.
+ * @param c The codepoint to be tested.
+ * @return True if and only if {@code c} is a word break character.
+ */
+ public static boolean isWordBreakChar(final int c) {
+ switch(c) {
+ case ' ': return true;
+ case '\r': return true;
+ case '\n': return true;
+ case '\t': return true;
+ }
+ return false;
+ }
+
+ /**
* Finds the index of the first letter in a character sequence.
* @param chars The character sequence to be tested.
* @return The index to the first letter in {@code chars}, or -1 if no lettes is found.
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2022-08-30 11:23:09 UTC (rev 12723)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2022-08-30 11:50:20 UTC (rev 12724)
@@ -468,27 +468,12 @@
}
/**
- * Indicates whether a given Unicode code point is a break character.
- * @param c The codepoint to be tested.
- * @return True if and only if {@code c} is a break character.
- */
- public boolean isBreakChar(final int c) {
- switch(c) {
- case ' ': return true;
- case '\r': return true;
- case '\n': return true;
- case '\t': return true;
- }
- return false;
- }
-
- /**
* Computes the word-breaking type of a char.
* @param c The char being tested.
* @return The word-breaking type of {@code c}.
*/
public CharType computeCharType(final int c) {
- if (isBreakChar(c)) {
+ if (CharacterUtils.isWordBreakChar(c)) {
return CharType.BREAK_CHAR;
}
if (isWordChar(c)) {
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2022-08-30 11:23:09 UTC (rev 12723)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2022-08-30 11:50:20 UTC (rev 12724)
@@ -86,15 +86,6 @@
* handle spell-checking, and use XSLT stylesheets to transform to that schema.
* This class would parse only the new DTD.</p>
*
- * <p>TODO: Leading-attached and trailing-attached punctuation can be separated from their words by being processed at
- * different times.
- * For example, assuming a base language of "eng", consider</p>
- * <pre>(<ForeignPhrase xml:lang="fre">en route</ForeignPhrase> from ...</pre>.
- * <p>In this case, the opening parenthesis will be treated as a word because it is in a different language than the
- * French phrase following.
- * One possible solution is to leave some tokens in the buffer so that they can be evaluated with subsequent tokens
- * before assigning them word/non-word status.</p>
- *
* <p>TODO: Handle multi-word dictionary items better.
* In the dictionary, we need to distinguish between phrases that are the concatenation of 1) two or more valid words
* in their own right, and 2) two or more words that are not valid words.
@@ -357,8 +348,9 @@
}
/* Remove any leading chars in the text buffer that are actually trailing punctuation. */
- while (textAccumulator.length() > 0
- && CharacterUtils.isAttachedTrailingPunctuation(textAccumulator.charAt(0))) {
+ while (textAccumulator.length() > 1
+ && CharacterUtils.isAttachedTrailingPunctuation(textAccumulator.charAt(0))
+ && CharacterUtils.isWordBreakChar(textAccumulator.charAt(1))) {
textAccumulator.deleteCharAt(0);
}
@@ -440,8 +432,8 @@
}
/**
- * Finds the bottom-most writing system specified in the element stack.
- * @return The bottom-most writing system specified in the element stack.
+ * Finds the nearest writing system specified in the element stack (i.e. the top-most one in the stack).
+ * @return The nearest writing system specified in the element stack.
*/
private WritingSystem4a getCurrentWritingSystem() {
for (int index = this.elementStack.size() - 1; index > -1; index --) {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-08-30 11:23:12
|
Revision: 12723
http://sourceforge.net/p/foray/code/12723
Author: victormote
Date: 2022-08-30 11:23:09 +0000 (Tue, 30 Aug 2022)
Log Message:
-----------
Remove attached leading and trailing punctuation that is in the buffer before tokenizing the text.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
trunk/foray/foray-xml/src/main/java/org/foray/xml/SaxParser.java
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2022-08-29 22:03:57 UTC (rev 12722)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2022-08-30 11:23:09 UTC (rev 12723)
@@ -33,6 +33,7 @@
<w><t>seek-est</t><verb><regular-root value="false"/></verb></w>
<w><t>sleep-est</t><verb><regular-root value="false"/></verb></w>
<w><t>speak-eth</t><verb><regular-root value="false"/></verb></w>
+<w><t>ten-our</t></w>
<w><t>un-lade</t><verb><regular-root/></verb></w>
<w><t>walk-eth</t><verb><regular-root value="false"/></verb></w>
<w><t>wip-eth</t><verb><regular-root value="false"/></verb></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2022-08-29 22:03:57 UTC (rev 12722)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2022-08-30 11:23:09 UTC (rev 12723)
@@ -11411,7 +11411,7 @@
<w><t>au-then-ti-ca-tor</t></w>
<w><t>au-then-tic-i-ty</t></w>
<w><t>au-thi-gen-ic</t></w>
-<w><t>au-thor</t></w>
+<w><t>au-thor</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>auth-or-ess</t></w>
<w><t>au-thor-ess</t></w>
<w><t>au-tho-ri-al</t></w>
@@ -13215,7 +13215,7 @@
<w><t>bap-tise</t></w>
<w><t>bap-tised</t></w>
<w><t>bap-tis-ing</t></w>
-<w><t>bap-tism</t></w>
+<w><t>bap-tism</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>bap-tis-mal</t></w>
<w><t>bap-tis-mal-ly</t></w>
<phrase><t>bap-tism of fire</t></phrase>
@@ -32652,7 +32652,7 @@
<w><t>con-trac-ture</t></w>
<w><t>con-trac-tured</t></w>
<w><t>con-tra-dance</t></w>
-<w><t>con-tra-dict</t></w>
+<w><t>con-tra-dict</t><verb><regular-root/></verb></w>
<w><t>con-tra-dict-a-ble</t></w>
<w><t>con-tra-dict-er</t></w>
<w><t>con-tra-dic-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -33781,7 +33781,7 @@
<w><t>cor-ru-gat-ing</t></w>
<w><t>cor-ru-ga-tion</t></w>
<w><t>cor-ru-ga-tor</t></w>
-<w><t>cor-rupt</t></w>
+<w><t>cor-rupt</t><verb><regular-root/></verb></w>
<w><t>cor-rupt-ed-ly</t></w>
<w><t>cor-rupt-ed-ness</t></w>
<w><t>cor-rupt-er</t></w>
@@ -52946,7 +52946,7 @@
<w><t>ex-press-age</t></w>
<w><t>ex-press-er</t></w>
<w><t>ex-press-i-ble</t></w>
-<w><t>ex-pres-sion</t></w>
+<w><t>ex-pres-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-pres-sion-al</t></w>
<w><t>ex-pres-sion-ism</t></w>
<w><t>Ex-pres-sion-ism</t></w>
@@ -79577,6 +79577,7 @@
<w><t>Ire-land-er</t></w>
<w><t>ire-less</t></w>
<w><t>I-re-na</t></w>
+<w><t>Ire-nae-us</t><noun><singular/><convertible-to-possessive/></noun></w>
<w><t>I-re-ne</t></w>
<w><t>I-rène</t></w>
<w><t>i-ren-ic</t></w>
@@ -81910,7 +81911,7 @@
<w><t>Ju-de-an</t></w>
<w><t>Ju-dette</t></w>
<w><t>Ju-dez-mo</t></w>
-<w><t>judge</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>judge</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>judge-a-ble</t></w>
<phrase><t>judge ad-vo-cate</t></phrase>
<phrase><t>judge ad-vo-cate gen-er-al</t></phrase>
@@ -92508,6 +92509,7 @@
<w><t>mart-net</t></w>
<w><t>mar-tyr</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mar-tyr-dom</t></w>
+<w><t>mar-tyred</t><adjective><extensible value="false"/></adjective></w>
<w><t>mar-tyr-i-a</t></w>
<w><t>mar-tyr-ies</t></w>
<w><t>mar-tyr-ise</t></w>
@@ -135520,7 +135522,7 @@
<w><t>re-lat-a-bil-i-ty</t></w>
<w><t>re-lat-a-ble</t></w>
<w><t>re-latch</t></w>
-<w><t>re-late</t></w>
+<w><t>re-late</t><verb><regular-root/></verb></w>
<w><t>re-lat-ed</t></w>
<w><t>re-lat-ed-ness</t></w>
<w><t>re-lat-er</t></w>
@@ -157859,7 +157861,7 @@
<w><t>sup-port-less</t></w>
<w><t>sup-port-less-ly</t></w>
<w><t>sup-pos-al</t></w>
-<w><t>sup-pose</t></w>
+<w><t>sup-pose</t><verb><regular-root/></verb></w>
<w><t>sup-posed</t></w>
<w><t>sup-pos-ed-ly</t></w>
<w><t>sup-pos-ing</t></w>
@@ -161182,7 +161184,7 @@
<phrase><t>ter-tiar-y col-lege</t></phrase>
<phrase><t>ter-ti-um quid</t></phrase>
<w><t>ter-ti-us</t></w>
-<w><t>Ter-tul-li-an</t></w>
+<w><t>Ter-tul-li-an</t><noun><singular/><convertible-to-possessive/></noun></w>
<w><t>Te-ruel</t></w>
<w><t>ter-va-lence</t></w>
<w><t>ter-va-len-cy</t></w>
@@ -163008,6 +163010,7 @@
<w><t>Tir-than-ka-ra</t></w>
<w><t>Tir-u-chi-ra-pal-li</t></w>
<w><t>Ti-ru-nel-vel-i</t></w>
+<w><t>’Tis</t><contraction referenced-word="It is"/></w>
<w><t>Ti-sa</t></w>
<w><t>Ti-sam-e-nus</t></w>
<w><t>ti-sane</t></w>
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2022-08-29 22:03:57 UTC (rev 12722)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2022-08-30 11:23:09 UTC (rev 12723)
@@ -29,6 +29,7 @@
package org.foray.orthography.util;
import org.foray.common.i18n.WritingSystem4a;
+import org.foray.common.primitive.CharacterUtils;
import org.foray.common.primitive.ObjectUtils;
import org.foray.common.primitive.StringUtils;
import org.foray.common.primitive.XmlUtils;
@@ -347,6 +348,20 @@
final Orthography4a orthography = writingSystem == null ? null : this.server.getOrthography(writingSystem);
final Lexer lexer = orthography == null ? new LexerJavaBreakIterator(writingSystem) : orthography.getLexer();
+ final StringBuilder textAccumulator = getTextAccumulator();
+
+ /* Remove any trailing chars in the text buffer that are actually leading punctuation. */
+ while (textAccumulator.length() > 0
+ && CharacterUtils.isAttachedLeadingPunctuation(textAccumulator.charAt(textAccumulator.length() - 1))) {
+ textAccumulator.deleteCharAt(textAccumulator.length() - 1);
+ }
+
+ /* Remove any leading chars in the text buffer that are actually trailing punctuation. */
+ while (textAccumulator.length() > 0
+ && CharacterUtils.isAttachedTrailingPunctuation(textAccumulator.charAt(0))) {
+ textAccumulator.deleteCharAt(0);
+ }
+
final List<CharSequence> words = lexer.tokenize(getAndClearText());
if (words == null) {
@@ -419,7 +434,26 @@
return writingSystem;
}
+ @Override
+ public void reset() {
+ throw new UnsupportedOperationException();
+ }
+
/**
+ * Finds the bottom-most writing system specified in the element stack.
+ * @return The bottom-most writing system specified in the element stack.
+ */
+ private WritingSystem4a getCurrentWritingSystem() {
+ for (int index = this.elementStack.size() - 1; index > -1; index --) {
+ final Element element = this.elementStack.get(index);
+ if (element.writingSystem != null) {
+ return element.writingSystem;
+ }
+ }
+ return null;
+ }
+
+ /**
* Returns the command-line options for the {@link #main(String[])} method.
* @return Command-line options.
*/
@@ -501,23 +535,4 @@
}
- @Override
- public void reset() {
- throw new UnsupportedOperationException();
- }
-
- /**
- * Finds the top-most writing system specified in the element stack.
- * @return The top-most writing system specified in the element stack.
- */
- private WritingSystem4a getCurrentWritingSystem() {
- for (int index = this.elementStack.size() - 1; index > -1; index --) {
- final Element element = this.elementStack.get(index);
- if (element.writingSystem != null) {
- return element.writingSystem;
- }
- }
- return null;
- }
-
}
Modified: trunk/foray/foray-xml/src/main/java/org/foray/xml/SaxParser.java
===================================================================
--- trunk/foray/foray-xml/src/main/java/org/foray/xml/SaxParser.java 2022-08-29 22:03:57 UTC (rev 12722)
+++ trunk/foray/foray-xml/src/main/java/org/foray/xml/SaxParser.java 2022-08-30 11:23:09 UTC (rev 12723)
@@ -324,6 +324,16 @@
this.textAccumulator.append(chars);
}
+ /**
+ * Returns the {@link StringBuilder} in which text is being accumulated.
+ * This should not ordinarily be needed by subclasses, but is exposed here for cases where it is more efficient to
+ * operate directly on the builder than with immutable strings.
+ * @return The text accumulator.
+ */
+ protected StringBuilder getTextAccumulator() {
+ return this.textAccumulator;
+ }
+
@Override
public void error(final SAXParseException ex) {
this.logger.error(ex.getMessage());
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-08-29 22:04:05
|
Revision: 12722
http://sourceforge.net/p/foray/code/12722
Author: victormote
Date: 2022-08-29 22:03:57 +0000 (Mon, 29 Aug 2022)
Log Message:
-----------
Handle punctuation inside of words better.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java 2022-08-29 21:08:57 UTC (rev 12721)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java 2022-08-29 22:03:57 UTC (rev 12722)
@@ -76,6 +76,8 @@
private static final String POSSIBLE_INTRAWORD_PUNCTUATION = new String(new char[] {
Basic_Latin_Block.APOSTROPHE,
General_Punctuation_Block.RIGHT_SINGLE_QUOTATION_MARK,
+ Basic_Latin_Block.LEFT_PARENTHESIS, //English example of alternate spelling:
+ Basic_Latin_Block.RIGHT_PARENTHESIS, // pa(e)leography
Basic_Latin_Block.FULL_STOP, //English example: "Section 8.16"
});
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2022-08-29 21:08:57 UTC (rev 12721)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2022-08-29 22:03:57 UTC (rev 12722)
@@ -105,6 +105,9 @@
* parenthesis. */
ATTACHED_TRAILING_PUNCTUATION,
+ /** Character is a either attached leading punctuation mark or intraword punctuation. */
+ ATTACHED_LEADING_OR_INTRAWORD_PUNCTUATION,
+
/** Character is a either attached trailing punctuation mark or intraword punctuation. */
ATTACHED_TRAILING_OR_INTRAWORD_PUNCTUATION,
@@ -254,6 +257,32 @@
}
break;
}
+ case ATTACHED_LEADING_OR_INTRAWORD_PUNCTUATION: {
+ switch (nextBreakType) {
+ case WORD_CHAR: {
+ switch (previousBreakType) {
+ case WORD_CHAR: {
+ /* This also is part of the word. */
+ breakTypes[breakIndex] = CharType.WORD_CHAR;
+ break;
+ }
+ default: {
+ breakTypes[breakIndex] = CharType.ATTACHED_LEADING_PUNCTUATION;
+ break;
+ }
+ }
+ break;
+ }
+ case ATTACHED_LEADING_PUNCTUATION: {
+ /* This is additional leading punctuation. */
+ breakTypes[breakIndex] = CharType.ATTACHED_LEADING_PUNCTUATION;
+ break;
+ }
+ default:
+ break;
+ }
+ break;
+ }
default: {
break;
}
@@ -466,7 +495,11 @@
return CharType.WORD_CHAR;
}
if (CharacterUtils.isAttachedLeadingPunctuation(c)) {
- return CharType.ATTACHED_LEADING_PUNCTUATION;
+ if (CharacterUtils.isPossibleIntrawordPunctuation(c)) {
+ return CharType.ATTACHED_LEADING_OR_INTRAWORD_PUNCTUATION;
+ } else {
+ return CharType.ATTACHED_LEADING_PUNCTUATION;
+ }
}
if (CharacterUtils.isAttachedTrailingPunctuation(c)) {
if (CharacterUtils.isPossibleIntrawordPunctuation(c)) {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-08-29 21:09:02
|
Revision: 12721
http://sourceforge.net/p/foray/code/12721
Author: victormote
Date: 2022-08-29 21:08:57 +0000 (Mon, 29 Aug 2022)
Log Message:
-----------
Clear text cache for (incomplete) new elements so that subsequent elements are not affected.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2022-08-29 13:15:03 UTC (rev 12720)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2022-08-29 21:08:57 UTC (rev 12721)
@@ -602,7 +602,11 @@
case "orthography": {
return;
}
+ case "explicit-token": {
+ /* TODO: Complete this. For now, clear the text. */
+ getAndClearText();
}
+ }
}
/**
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-08-29 13:15:07
|
Revision: 12720
http://sourceforge.net/p/foray/code/12720
Author: victormote
Date: 2022-08-29 13:15:03 +0000 (Mon, 29 Aug 2022)
Log Message:
-----------
Orthography data improvements.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2022-08-28 18:07:05 UTC (rev 12719)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2022-08-29 13:15:03 UTC (rev 12720)
@@ -7,10 +7,13 @@
<axsl-dictionary language="eng" script="Latn" hard-hyphen-char="="
soft-hyphen-char="-">
+<w><t>ac-o-lyth</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>arch-bish-op-rick</t><noun/></w>
<w><t>be-hoof</t><noun/></w>
<w><t>Car-tha-gen-i-an</t><noun><pluralizable/><convertible-to-possessive/></noun><comment>Carthaginian.</comment></w>
<w><t>ceil</t><verb><regular-root/></verb></w>
<w><t>ceil-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>eat-eth</t><verb><regular-root value="false"/></verb></w>
<w><t>ex-ult-eth</t><verb><regular-root value="false"/></verb></w>
<w><t>go-eth</t><verb/></w>
<w><t>hum-bleth</t><verb><regular-root value="false"/></verb></w>
@@ -32,5 +35,6 @@
<w><t>speak-eth</t><verb><regular-root value="false"/></verb></w>
<w><t>un-lade</t><verb><regular-root/></verb></w>
<w><t>walk-eth</t><verb><regular-root value="false"/></verb></w>
+<w><t>wip-eth</t><verb><regular-root value="false"/></verb></w>
</axsl-dictionary>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2022-08-28 18:07:05 UTC (rev 12719)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2022-08-29 13:15:03 UTC (rev 12720)
@@ -1333,7 +1333,7 @@
<w><t>a-coe-naes-the-sia</t></w>
<w><t>Ac-ol</t></w>
<w><t>a-cold</t></w>
-<w><t>ac-o-lyte</t></w>
+<w><t>ac-o-lyte</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>A-con-ca-gua</t></w>
<w><t>ac-o-nite</t></w>
<w><t>ac-o-nit-ic</t></w>
@@ -1742,7 +1742,7 @@
<phrase><t>Ad-di-son’s dis-ease</t></phrase>
<w><t>ad-dit-a-ment</t></w>
<w><t>ad-dit-a-men-ta-ry</t></w>
-<w><t>ad-di-tion</t></w>
+<w><t>ad-di-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ad-di-tion-al</t></w>
<w><t>ad-di-tion-al-ly</t></w>
<w><t>ad-di-tive</t></w>
@@ -8480,7 +8480,7 @@
<w><t>ap-pe-tiz-ing-ly</t></w>
<w><t>Ap-pia</t></w>
<phrase><t>Ap-pi-an Way</t></phrase>
-<w><t>ap-plaud</t></w>
+<w><t>ap-plaud</t><verb><regular-root/></verb></w>
<w><t>ap-plaud-a-ble</t></w>
<w><t>ap-plaud-a-bly</t></w>
<w><t>ap-plaud-er</t></w>
@@ -9277,7 +9277,7 @@
<w><t>ar-got-ic</t></w>
<w><t>Ar-go-vie</t></w>
<w><t>ar-gu-a-ble</t></w>
-<w><t>ar-gue</t></w>
+<w><t>ar-gue</t><verb><regular-root/></verb></w>
<w><t>ar-gued</t></w>
<w><t>Ar-güe-das</t></w>
<w><t>ar-gu-er</t></w>
@@ -10291,12 +10291,12 @@
<w><t>as-sert-ed-ly</t></w>
<w><t>as-sert-er</t></w>
<w><t>as-sert-i-ble</t></w>
-<w><t>as-ser-tion</t></w>
+<w><t>as-ser-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>as-ser-tion-al</t></w>
<w><t>as-ser-tive</t></w>
<w><t>as-ser-tive-ly</t></w>
<w><t>as-ser-tive-ness</t></w>
-<w><t>as-ser-tor</t></w>
+<w><t>as-ser-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>as-ser-to-ri-ly</t></w>
<w><t>as-ser-to-ry</t></w>
<w><t>as-ses</t></w>
@@ -13726,7 +13726,7 @@
<w><t>bas-cin-et</t></w>
<w><t>bas-ci-net</t></w>
<w><t>bas-cule</t></w>
-<w><t>base</t></w>
+<w><t>base</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>base-ball</t></w>
<w><t>base-board</t></w>
<w><t>base-born</t></w>
@@ -16609,7 +16609,7 @@
<w><t>bi-sex-u-al-i-ty</t></w>
<w><t>bi-sex-u-al-ly</t></w>
<w><t>bish</t></w>
-<w><t>bish-op</t></w>
+<w><t>bish-op</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Bish-op</t></w>
<phrase><t>Bish-op Auck-land</t></phrase>
<w><t>bish-op-bird</t></w>
@@ -18908,6 +18908,7 @@
<w><t>Bo-zen</t></w>
<w><t>bo-zo</t></w>
<w><t>Boz-za-ris</t></w>
+<w><t>Bp</t><abbrev referenced-word="Bishop"/></w>
<w><t>BPC</t></w>
<w><t>bpi</t></w>
<w><t>bpt</t></w>
@@ -22518,7 +22519,7 @@
<w><t>can-died</t></w>
<w><t>Can-di-ot</t></w>
<w><t>Can-di-ote</t></w>
-<w><t>can-dle</t></w>
+<w><t>can-dle</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>can-dle-beam</t></w>
<w><t>can-dle-ber-ry</t></w>
<w><t>can-dle-fish</t></w>
@@ -24231,7 +24232,7 @@
<w><t>cat-e-chol</t></w>
<w><t>cat-e-chol-a-mine</t></w>
<w><t>cat-e-chu</t></w>
-<w><t>cat-e-chu-men</t></w>
+<w><t>cat-e-chu-men</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cat-e-chu-me-nal</t></w>
<w><t>cat-e-chu-me-nate</t></w>
<w><t>cat-e-chu-men-i-cal</t></w>
@@ -28152,7 +28153,7 @@
<w><t>cit-a-del</t></w>
<w><t>ci-ta-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ci-ta-to-ry</t></w>
-<w><t>cite</t></w>
+<w><t>cite</t><verb><regular-root/></verb></w>
<w><t>cite-a-ble</t></w>
<w><t>cit-ed</t></w>
<w><t>cit-er</t></w>
@@ -31469,7 +31470,7 @@
<w><t>con-cu-pis-cence</t></w>
<w><t>con-cu-pis-cent</t></w>
<w><t>con-cu-pis-ci-ble</t></w>
-<w><t>con-cur</t></w>
+<w><t>con-cur</t><verb><regular-root/></verb></w>
<w><t>con-curred</t></w>
<w><t>con-cur-rence</t></w>
<w><t>concur-ren-cy</t></w>
@@ -31713,7 +31714,7 @@
<w><t>con-firm-a-ble</t></w>
<w><t>con-firm-and</t></w>
<w><t>con-fir-mand</t></w>
-<w><t>con-fir-ma-tion</t></w>
+<w><t>con-fir-ma-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-firm-a-tive</t></w>
<w><t>con-firm-a-to-ry</t></w>
<w><t>con-firmed</t></w>
@@ -31877,7 +31878,7 @@
<w><t>con-gre-gat-ed</t></w>
<w><t>con-gre-gat-ing</t></w>
<w><t>con-gre-ga-tion</t><noun><pluralizable/></noun></w>
-<w><t>con-gre-ga-tion-al</t></w>
+<w><t>con-gre-ga-tion-al</t><adjective><extensible value="false"/></adjective></w>
<phrase><t>Con-gre-ga-tion-al Church</t></phrase>
<w><t>Con-gre-ga-tion-al-ism</t></w>
<w><t>con-gre-ga-tion-al-ism</t><noun><convertible-to-possessive/></noun></w>
@@ -33790,7 +33791,7 @@
<w><t>cor-rup-ti-ble</t></w>
<w><t>cor-rupt-i-ble-ness</t></w>
<w><t>cor-rupt-i-bly</t></w>
-<w><t>cor-rup-tion</t></w>
+<w><t>cor-rup-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cor-rup-tion-ist</t></w>
<w><t>cor-rup-tive</t></w>
<w><t>cor-rup-tive-ly</t></w>
@@ -34180,7 +34181,7 @@
<w><t>coul-ter</t></w>
<w><t>cou-ma-rin</t></w>
<w><t>cou-ma-rone</t></w>
-<w><t>coun-cil</t></w>
+<w><t>coun-cil</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>coun-cil-lor</t></w>
<w><t>coun-cil-lor-ship</t></w>
<w><t>coun-cil-man</t></w>
@@ -34475,7 +34476,7 @@
<w><t>court=mar-tialled</t></w>
<w><t>court=mar-tial-ling</t></w>
<w><t>court=mar-tials</t></w>
-<w><t>Court-ney</t></w>
+<w><t>Court-ney</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Court of Ap-peal</t></phrase>
<phrase><t>Court of Ex-cheq-uer</t></phrase>
<phrase><t>court of hon-or</t></phrase>
@@ -37975,7 +37976,7 @@
<w><t>de-a-cet-y-lat-ed</t></w>
<w><t>de-a-cet-y-lat-ing</t></w>
<w><t>de-a-cet-y-la-tion</t></w>
-<w><t>dea-con</t></w>
+<w><t>dea-con</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dea-con-ess</t></w>
<w><t>dea-con-ry</t></w>
<w><t>dea-con-ship</t></w>
@@ -40047,7 +40048,7 @@
<w><t>de-part-men-tal-iz-ing</t></w>
<w><t>de-part-men-tal-ly</t></w>
<phrase><t>de-part-ment store</t></phrase>
-<w><t>de-par-ture</t></w>
+<w><t>de-par-ture</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-pas-ture</t></w>
<w><t>de-paup-er-ate</t></w>
<w><t>de-pau-per-a-tion</t></w>
@@ -44058,7 +44059,7 @@
<w><t>doc-tri-nal</t></w>
<w><t>doc-tri-nal-i-ty</t></w>
<w><t>doc-tri-nal-ly</t></w>
-<w><t>doc-trine</t></w>
+<w><t>doc-trine</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>doc-trine of de-scent</t></phrase>
<w><t>doc-u-ment</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>doc-u-men-tal</t></w>
@@ -46499,7 +46500,7 @@
<w><t>ear=mind-ed</t></w>
<w><t>ear-mind-ed-ness</t></w>
<w><t>ear-muff</t></w>
-<w><t>earn</t></w>
+<w><t>earn</t><verb><regular-root/></verb></w>
<phrase><t>earned in-come</t></phrase>
<w><t>earn-er</t></w>
<w><t>ear-nest</t></w>
@@ -49483,7 +49484,7 @@
<w><t>e-nol-o-gy</t></w>
<w><t>E-no-ne</t></w>
<w><t>e-norm</t></w>
-<w><t>e-nor-mi-ty</t></w>
+<w><t>e-nor-mi-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>e-nor-mous</t></w>
<w><t>e-nor-mous-ly</t></w>
<w><t>e-nor-mous-ness</t></w>
@@ -53828,7 +53829,7 @@
<w><t>fal-sies</t></w>
<w><t>fal-si-fi-a-bil-i-ty</t></w>
<w><t>fal-si-fi-a-ble</t></w>
-<w><t>fal-si-fi-ca-tion</t></w>
+<w><t>fal-si-fi-ca-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fal-si-fied</t></w>
<w><t>fal-si-fi-er</t></w>
<w><t>fal-si-fy</t></w>
@@ -54441,7 +54442,7 @@
<w><t>fea-si-bil-i-ty</t></w>
<w><t>fea-si-ble</t></w>
<w><t>fea-si-bly</t></w>
-<w><t>feast</t></w>
+<w><t>feast</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>feast-er</t></w>
<w><t>feast-ful</t></w>
<w><t>feast-ful-ly</t></w>
@@ -54998,7 +54999,7 @@
<w><t>fes-ti-nate-ly</t></w>
<w><t>fes-ti-nat-ing</t></w>
<w><t>fes-ti-na-tion</t></w>
-<w><t>fes-ti-val</t></w>
+<w><t>fes-ti-val</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fes-tive</t></w>
<w><t>fes-tive-ly</t></w>
<w><t>fes-tive-ness</t></w>
@@ -70009,7 +70010,7 @@
<w><t>his-to-phys-i-o-log-i-cal</t></w>
<w><t>his-to-phys-i-ol-o-gy</t></w>
<w><t>his-to-plas-mo-sis</t></w>
-<w><t>his-to-ri-an</t></w>
+<w><t>his-to-ri-an</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>his-to-ri-at-ed</t></w>
<w><t>his-tor-ic</t></w>
<w><t>his-tor-i-cal</t></w>
@@ -74841,7 +74842,7 @@
<w><t>im-pos-ing-ly</t></w>
<w><t>im-pos-ing-ness</t></w>
<phrase><t>im-pos-ing stone</t></phrase>
-<w><t>im-po-si-tion</t></w>
+<w><t>im-po-si-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>im-pos-si-bil-i-ty</t></w>
<w><t>im-pos-si-ble</t></w>
<w><t>im-pos-si-ble-ness</t></w>
@@ -75791,7 +75792,7 @@
<w><t>In-de-pend-en-cy</t></w>
<w><t>in-de-pend-en-cy</t></w>
<w><t>in-de-pend-ent</t></w>
-<w><t>In-de-pend-ent</t></w>
+<w><t>In-de-pend-ent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>in-de-pend-ent clause</t></phrase>
<w><t>in-de-pend-ent-ly</t></w>
<phrase><t>in-de-pend-ent school</t></phrase>
@@ -76855,7 +76856,7 @@
<w><t>in-iq-ui-tous</t></w>
<w><t>in-iq-ui-tous-ly</t></w>
<w><t>in-iq-ui-tous-ness</t></w>
-<w><t>in-iq-ui-ty</t></w>
+<w><t>in-iq-ui-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>init</t></w>
<w><t>in-i-tial</t></w>
<w><t>in-i-tialed</t></w>
@@ -79243,7 +79244,7 @@
<w><t>in-va-sion</t></w>
<w><t>in-va-sive</t></w>
<w><t>in-vect-ed</t></w>
-<w><t>in-vec-tive</t></w>
+<w><t>in-vec-tive</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-vec-tive-ly</t></w>
<w><t>in-vec-tive-ness</t></w>
<w><t>in-veigh</t></w>
@@ -80388,7 +80389,7 @@
<phrase><t>I-tal-ian So-ma-li-land</t></phrase>
<phrase><t>I-tal-ian son-net</t></phrase>
<phrase><t>I-tal-ian ver-mouth</t></phrase>
-<w><t>i-tal-ic</t></w>
+<w><t>i-tal-ic</t><noun><pluralizable/></noun><adjective><extensible value="false"/></adjective></w>
<w><t>I-tal-ic</t></w>
<w><t>i-tal-i-cise</t></w>
<w><t>I-tal-i-cism</t></w>
@@ -80860,7 +80861,7 @@
<w><t>Jan-is-sar-ies</t></w>
<w><t>Jan-is-sar-y</t></w>
<w><t>jan-is-sar-y</t></w>
-<w><t>jan-i-tor</t></w>
+<w><t>jan-i-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>jan-i-to-ri-al</t></w>
<w><t>jan-i-tor-ship</t></w>
<w><t>jan-i-tress</t></w>
@@ -96566,7 +96567,7 @@
<w><t>mis-re-port</t></w>
<w><t>mis-re-port-er</t></w>
<w><t>mis-rep-re-sent</t></w>
-<w><t>mis-rep-re-sen-ta-tion</t></w>
+<w><t>mis-rep-re-sen-ta-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mis-rep-re-sen-ta-tive</t></w>
<w><t>mis-rep-re-sent-er</t></w>
<w><t>mis-re-print</t></w>
@@ -101509,7 +101510,7 @@
<w><t>nig-gler</t></w>
<w><t>nig-gling</t></w>
<w><t>nigh</t></w>
-<w><t>night</t></w>
+<w><t>night</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>night blind-ness</t></phrase>
<phrase><t>night=bloom-ing ce-re-us</t></phrase>
<w><t>night-cap</t></w>
@@ -122778,6 +122779,7 @@
<w><t>post-pneu-mon-ic</t></w>
<w><t>post-pone</t></w>
<w><t>post-poned</t></w>
+<w><t>post-pone-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>post-pon-ing</t></w>
<w><t>post-po-si-tion</t></w>
<w><t>post-po-si-tion-al</t></w>
@@ -124939,7 +124941,7 @@
<w><t>pre-fig-ur-a-tive</t></w>
<w><t>pre-fig-ur-a-tive-ly</t></w>
<w><t>pre-fig-ur-a-tive-ness</t></w>
-<w><t>pre-fig-ure</t></w>
+<w><t>pre-fig-ure</t><verb><regular-root/></verb></w>
<w><t>pre-fig-ure-ment</t></w>
<w><t>pre-fill-er</t></w>
<w><t>pre-fi-nance</t></w>
@@ -133523,6 +133525,7 @@
<w><t>reck</t></w>
<w><t>reck-less</t><adjective><extensible value="false"/></adjective></w>
<w><t>reck-less-ly</t><adverb/></w>
+<w><t>reck-less-ness</t><noun/></w>
<w><t>Reck-ling-hau-sen</t></w>
<w><t>reck-on</t></w>
<w><t>reck-on-a-ble</t></w>
@@ -137026,7 +137029,7 @@
<w><t>re-spond</t><verb><regular-root/></verb></w>
<w><t>re-spond-ence</t></w>
<w><t>re-spond-en-cy</t></w>
-<w><t>re-spond-ent</t></w>
+<w><t>re-spond-ent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Re-spon-sa</t></w>
<w><t>re-sponse</t></w>
<w><t>re-sponse-less</t></w>
@@ -142492,7 +142495,7 @@
<w><t>scoff-law</t></w>
<w><t>Sco-field</t></w>
<w><t>scoke</t></w>
-<w><t>scold</t></w>
+<w><t>scold</t><verb><regular-root/></verb></w>
<w><t>scold-a-ble</t></w>
<w><t>scold-er</t></w>
<w><t>scold-ing-ly</t></w>
@@ -145318,7 +145321,7 @@
<w><t>sen-sa-tion-ist</t></w>
<w><t>sen-sa-tion-is-tic</t></w>
<w><t>sen-sa-tion-less</t></w>
-<w><t>sense</t></w>
+<w><t>sense</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>sense da-tum</t></phrase>
<w><t>sense-less</t></w>
<w><t>sense-less-ly</t></w>
@@ -153366,7 +153369,7 @@
<phrase><t>stick in-sect</t></phrase>
<w><t>stick-it</t></w>
<w><t>stick-jaw</t></w>
-<w><t>stick-le</t></w>
+<w><t>stick-le</t><verb><regular-root/></verb></w>
<w><t>stick-le-back</t></w>
<w><t>stick-ler</t></w>
<w><t>stick-less</t></w>
@@ -154191,7 +154194,7 @@
<w><t>strig-il</t></w>
<w><t>strig-il-ate</t></w>
<w><t>stri-gose</t></w>
-<w><t>strike</t></w>
+<w><t>strike</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root value="false"/></verb></w>
<w><t>strike-board</t></w>
<w><t>strike-bound</t></w>
<w><t>strike-break-er</t></w>
@@ -154198,6 +154201,7 @@
<w><t>strike-break-ing</t></w>
<w><t>strike-o-ver</t></w>
<w><t>strik-er</t></w>
+<w><t>strikes</t><verb><regular-root value="false"/><vf><singular/></vf></verb></w>
<w><t>strik-ing</t></w>
<phrase><t>strik-ing cir-cle</t></phrase>
<w><t>Stri-mon</t></w>
@@ -174261,9 +174265,10 @@
<w><t>un-mis-led</t></w>
<w><t>un-miss-a-ble</t></w>
<w><t>un-missed</t></w>
-<w><t>un-mis-tak-a-ble</t></w>
-<w><t>un-mis-tak-a-bly</t></w>
-<w><t>un-mis-take-a-ble</t></w>
+<w><t>un-mis-tak-a-ble</t><adjective><extensible value="false"/></adjective></w>
+<w><t>un-mis-tak-a-bly</t><adverb/></w>
+<w><t>un-mis-take-a-ble</t><adjective><extensible value="false"/></adjective></w>
+<w><t>un-mis-take-a-bly</t><adverb/></w>
<w><t>un-mis-tak-en</t></w>
<w><t>un-mis-tak-ing</t></w>
<w><t>un-mis-trust-ed</t></w>
@@ -183068,7 +183073,7 @@
<w><t>wa-waved</t></w>
<w><t>wa-wav-ing</t></w>
<w><t>wawl</t></w>
-<w><t>wax</t></w>
+<w><t>wax</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>wax-ber-ry</t></w>
<w><t>wax-bill</t></w>
<w><t>wax-en</t></w>
@@ -185987,7 +185992,7 @@
<w><t>wrist-watch</t></w>
<w><t>writ</t></w>
<w><t>write</t><verb><regular-root value="false"/></verb></w>
-<w><t>writ-er</t></w>
+<w><t>writ-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>writ-er’s cramp</t></phrase>
<w><t>writes</t><verb/></w>
<w><t>writhe</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2022-08-28 18:07:05 UTC (rev 12719)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2022-08-29 13:15:03 UTC (rev 12720)
@@ -12,21 +12,28 @@
<w><t>ad</t></w>
<w><t>ann-um</t></w>
+<w><t>a-pel-la</t></w>
<w><t>ar-gu-ment-um</t></w>
<w><t>bel-li</t></w>
+<w><t>cap-tan-dum</t></w>
<w><t>ca-sus</t></w>
+<w><t>cre-dat</t></w>
<w><t>de</t></w>
+<w><t>e-go</t></w>
<w><t>et</t></w>
<w><t>fide</t></w>
<w><t>fit</t></w>
<w><t>hoc</t></w>
<w><t>ho-mi-nem</t></w>
+<w><t>hy-dro-ma-ni-a</t></w>
<w><t>i.e</t><abbrev referenced-word="id est"/><comment>Until "i.e." can be handled properly.</comment></w>
<w><t>in</t></w>
<w><t>in-fi-del-i-um</t></w>
+<w><t>ju-dae-us</t></w>
<w><t>lo-co</t></w>
<w><t>nas-ci-tur</t></w>
<w><t>non</t></w>
+<w><t>or-i-gin-es</t></w>
<w><t>pa-ra-tus</t></w>
<w><t>pa-rent-is</t></w>
<w><t>part-i-bus</t></w>
@@ -34,7 +41,9 @@
<w><t>pe-ti-tio</t></w>
<w><t>prin-ci-pii</t></w>
<w><t>prop-a-gan-da</t></w>
-<w><t>qui</t></w><w><t>sem-per</t></w>
+<w><t>qui</t></w>
+<w><t>sac-rae</t></w>
+<w><t>sem-per</t></w>
<w><t>sig-no</t></w>
<w><t>vin-ces</t></w>
<w><t>vive</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2022-08-28 18:07:05 UTC (rev 12719)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2022-08-29 13:15:03 UTC (rev 12720)
@@ -25,6 +25,7 @@
<match desc="A single capital letter, such as a person's initial">^[A-Z]$</match>
<match desc="Contracted year">^’[0-9]+$</match>
<match desc="Two dimensions">^[0-9]{1,3}(,[0-9]{3})*(\.[0-9]*)?×[0-9]{1,3}(,[0-9]{3})*(\.[0-9]*)?$</match>
+ <match desc="Standalone symbols">^[§]$</match>
</match-rule-list>
<derivative-pattern-list id="eng-Latn-derivative-patterns">
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-08-28 18:07:09
|
Revision: 12719
http://sourceforge.net/p/foray/code/12719
Author: victormote
Date: 2022-08-28 18:07:05 +0000 (Sun, 28 Aug 2022)
Log Message:
-----------
Improvements to spell-checking and related dictionaries.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2022-08-28 16:03:54 UTC (rev 12718)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2022-08-28 18:07:05 UTC (rev 12719)
@@ -26,6 +26,7 @@
<w><t>per-suad-est</t><verb><regular-root value="false"/></verb></w>
<w><t>pre-ëm-i-nence</t></w>
<w><t>pre-ëm-i-nent-ly</t><adverb/></w>
+<w><t>re-prov-eth</t></w>
<w><t>seek-est</t><verb><regular-root value="false"/></verb></w>
<w><t>sleep-est</t><verb><regular-root value="false"/></verb></w>
<w><t>speak-eth</t><verb><regular-root value="false"/></verb></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2022-08-28 16:03:54 UTC (rev 12718)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2022-08-28 18:07:05 UTC (rev 12719)
@@ -54,6 +54,7 @@
-->
<w><t>&</t></w>
+<w><t>&c</t><abbrev referenced-word="etc., et cetera"/></w>
<w><t>a</t></w>
<w><t>a-a</t></w>
<w><t>aaaaaaaaaa</t><noun><convertible-to-possessive/></noun><comment>Pseudo-word used as a replacement for omitted word elements.</comment></w>
@@ -1002,7 +1003,7 @@
<w><t>ac-cus-a-bly</t></w>
<w><t>ac-cus-al</t></w>
<w><t>ac-cus-ant</t></w>
-<w><t>ac-cu-sa-tion</t></w>
+<w><t>ac-cu-sa-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ac-cu-sa-ti-val</t></w>
<w><t>ac-cu-sa-tive</t></w>
<w><t>ac-cu-sa-tive-ly</t></w>
@@ -2895,7 +2896,7 @@
<w><t>A-gen</t></w>
<w><t>A-ge-na</t></w>
<w><t>A-ge-nais</t></w>
-<w><t>a-gen-cy</t></w>
+<w><t>a-gen-cy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>a-gen-da</t></w>
<w><t>a-gen-dum</t></w>
<w><t>a-ge-ne-sia</t></w>
@@ -5018,7 +5019,7 @@
<phrase><t>Am-en-ho-tep IV</t></phrase>
<phrase><t>A-men-hot-pe III</t></phrase>
<phrase><t>Am-en-hot-pe IV</t></phrase>
-<w><t>a-men-i-ty</t></w>
+<w><t>a-men-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>a-men-or-rhe-a</t></w>
<w><t>a-men-or-rhe-al</t></w>
<w><t>a-men-or-rhe-ic</t></w>
@@ -8517,7 +8518,7 @@
<w><t>ap-pli-ca-ble-ness</t></w>
<w><t>ap-pli-ca-bly</t></w>
<w><t>ap-pli-cant</t></w>
-<w><t>ap-pli-ca-tion</t></w>
+<w><t>ap-pli-ca-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ap-plic-a-tive</t></w>
<w><t>ap-pli-ca-tive</t></w>
<w><t>ap-pli-ca-tive-ly</t></w>
@@ -14051,7 +14052,7 @@
<w><t>bat-ti-est</t></w>
<w><t>bat-tik</t></w>
<w><t>bat-ting</t></w>
-<w><t>bat-tle</t></w>
+<w><t>bat-tle</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>bat-tle=ax</t></w>
<w><t>bat-tle=axe</t></w>
<phrase><t>bat-tle cruis-er</t></phrase>
@@ -17543,7 +17544,7 @@
<w><t>blu-ish-ness</t></w>
<w><t>Blum</t></w>
<w><t>Blu-ma</t></w>
-<w><t>blun-der</t></w>
+<w><t>blun-der</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>blun-der-buss</t></w>
<w><t>blun-der-er</t></w>
<w><t>blun-der-ful</t></w>
@@ -24590,7 +24591,7 @@
<w><t>Cav-en-dish</t></w>
<w><t>cav-en-dish</t></w>
<w><t>cav-er</t></w>
-<w><t>cav-ern</t></w>
+<w><t>cav-ern</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cav-ern-ous</t></w>
<w><t>cav-ern-ous-ly</t></w>
<w><t>cav-es-son</t></w>
@@ -28149,7 +28150,7 @@
<w><t>cit</t></w>
<w><t>cit-a-ble</t></w>
<w><t>cit-a-del</t></w>
-<w><t>ci-ta-tion</t></w>
+<w><t>ci-ta-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ci-ta-to-ry</t></w>
<w><t>cite</t></w>
<w><t>cite-a-ble</t></w>
@@ -30283,8 +30284,8 @@
<phrase><t>col-our phase</t></phrase>
<w><t>col-pi-tis</t></w>
<w><t>Col-po-da</t></w>
-<w><t>col-por-tage</t></w>
-<w><t>col-por-teur</t></w>
+<w><t>col-por-tage</t><noun><singular/><convertible-to-possessive/></noun></w>
+<w><t>col-por-teur</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>col-pot-o-my</t></w>
<w><t>colt</t></w>
<w><t>Colt</t></w>
@@ -30963,7 +30964,7 @@
<w><t>com-pet-i-to-ry</t></w>
<w><t>Com-pi</t></w>
<w><t>Com-piègne</t></w>
-<w><t>com-pi-la-tion</t></w>
+<w><t>com-pi-la-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>com-pi-la-tion film</t></phrase>
<w><t>com-pil-a-to-ry</t></w>
<w><t>com-pile</t></w>
@@ -31352,7 +31353,7 @@
<phrase><t>con-cert o-ver-ture</t></phrase>
<phrase><t>con-cert pitch</t></phrase>
<w><t>con-ces-si-ble</t></w>
-<w><t>con-ces-sion</t></w>
+<w><t>con-ces-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-ces-sion-aire</t></w>
<w><t>con-ces-sion-ar-ies</t></w>
<w><t>con-ces-sion-ar-y</t></w>
@@ -37011,7 +37012,7 @@
<w><t>cy-press</t></w>
<phrase><t>cy-press pine</t></phrase>
<phrase><t>cy-press vine</t></phrase>
-<w><t>Cyp-ri-an</t></w>
+<w><t>Cyp-ri-an</t><noun><singular/><convertible-to-possessive/></noun></w>
<w><t>cy-pri-nid</t></w>
<w><t>cy-prin-o-dont</t></w>
<w><t>cyp-ri-noid</t></w>
@@ -38500,7 +38501,7 @@
<w><t>de-clar-a-tive-ly</t></w>
<w><t>de-clar-a-tor</t></w>
<w><t>de-clar-a-to-ry</t></w>
-<w><t>de-clare</t></w>
+<w><t>de-clare</t><verb><regular-root/></verb></w>
<w><t>de-clared</t></w>
<w><t>de-clar-ed-ly</t></w>
<w><t>de-clar-er</t></w>
@@ -39864,7 +39865,7 @@
<w><t>de-nom-i-nate</t></w>
<w><t>de-nom-i-nat-ed</t></w>
<w><t>de-nom-i-nat-ing</t></w>
-<w><t>de-nom-i-na-tion</t></w>
+<w><t>de-nom-i-na-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-nom-i-na-tion-al</t></w>
<w><t>de-nom-i-na-tion-al-ism</t></w>
<w><t>de-nom-i-na-tion-al-ist</t></w>
@@ -39976,7 +39977,7 @@
<w><t>de-nun-ci-ate</t></w>
<w><t>de-nun-ci-at-ed</t></w>
<w><t>de-nun-ci-at-ing</t></w>
-<w><t>de-nun-ci-a-tion</t></w>
+<w><t>de-nun-ci-a-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>de-nun-ci-a-tive</t></w>
<w><t>de-nun-ci-a-tive-ly</t></w>
<w><t>de-nun-ci-a-tor</t></w>
@@ -48658,7 +48659,8 @@
<w><t>em-pha-size</t></w>
<w><t>em-pha-sized</t></w>
<w><t>em-pha-siz-ing</t></w>
-<w><t>em-phat-ic</t></w>
+<w><t>em-phat-ic</t><adjective/></w>
+<w><t>em-phat-i-cal-ly</t><adverb/></w>
<w><t>em-phat-i-cal-ness</t></w>
<w><t>em-phy-se-ma</t></w>
<w><t>em-phy-sem-a-tous</t></w>
@@ -53810,7 +53812,7 @@
<w><t>false=heart-ed</t></w>
<w><t>false=heart-ed-ly</t></w>
<w><t>false=heart-ed-ness</t></w>
-<w><t>false-hood</t></w>
+<w><t>false-hood</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>false im-pris-on-ment</t></phrase>
<w><t>false-ly</t></w>
<w><t>false-ness</t></w>
@@ -79090,7 +79092,7 @@
<phrase><t>in-trin-sic sem-i-con-duc-tor</t></phrase>
<w><t>intro</t></w>
<w><t>in-tro</t></w>
-<w><t>in-tro-duce</t></w>
+<w><t>in-tro-duce</t><verb><regular-root/></verb></w>
<w><t>in-tro-duced</t></w>
<w><t>in-tro-duc-er</t></w>
<w><t>in-tro-duc-i-ble</t></w>
@@ -93338,7 +93340,7 @@
<w><t>me-an-drous</t></w>
<w><t>mean-ie</t></w>
<w><t>mean-ies</t></w>
-<w><t>mean-ing</t></w>
+<w><t>mean-ing</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mean-ing-ful</t></w>
<w><t>mean-ing-ful-ly</t></w>
<w><t>mean-ing-ful-ness</t></w>
@@ -96165,7 +96167,7 @@
<w><t>mis-charge</t></w>
<w><t>mis-charged</t></w>
<w><t>mis-charg-ing</t></w>
-<w><t>mis-chief</t></w>
+<w><t>mis-chief</t><noun><pluralizable/></noun></w>
<w><t>mis-chief=mak-er</t></w>
<w><t>mis-chief=mak-ing</t></w>
<w><t>mis-chie-vous</t></w>
@@ -111350,7 +111352,7 @@
<w><t>out-race</t></w>
<w><t>out-raced</t></w>
<w><t>out-rac-ing</t></w>
-<w><t>out-rage</t></w>
+<w><t>out-rage</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>out-raged</t></w>
<w><t>out-ra-geous</t></w>
<w><t>out-ra-geous-ly</t></w>
@@ -115439,7 +115441,7 @@
<w><t>par-die</t></w>
<w><t>pard-ine</t></w>
<w><t>pard-ner</t></w>
-<w><t>par-don</t></w>
+<w><t>par-don</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>par-don-a-ble</t></w>
<w><t>par-don-a-ble-ness</t></w>
<w><t>par-don-a-bly</t></w>
@@ -116642,7 +116644,7 @@
<w><t>pe-cu-liar-ise</t></w>
<w><t>pe-cu-liar-ised</t></w>
<w><t>pe-cu-liar-is-ing</t></w>
-<w><t>pe-cu-li-ar-i-ty</t></w>
+<w><t>pe-cu-li-ar-i-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pe-cu-liar-ize</t></w>
<w><t>pe-cu-liar-ized</t></w>
<w><t>pe-cu-liar-iz-ing</t></w>
@@ -125309,8 +125311,8 @@
<w><t>pre-judge-ment</t></w>
<w><t>pre-judg-er</t></w>
<w><t>pre-judg-ment</t></w>
-<w><t>prej-u-dice</t></w>
-<w><t>prej-u-diced</t></w>
+<w><t>prej-u-dice</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>prej-u-diced</t><adjective><extensible value="false"/></adjective></w>
<w><t>prej-u-diced-ly</t></w>
<w><t>prej-u-dice-less</t></w>
<w><t>pre-ju-di-ci-a-ble</t></w>
@@ -128216,7 +128218,7 @@
<w><t>pros-e-cu-tor</t></w>
<w><t>prose-like</t></w>
<w><t>pros-e-lyt-adj</t></w>
-<w><t>pros-e-lyte</t></w>
+<w><t>pros-e-lyte</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>pros-e-lyt-er</t></w>
<w><t>pros-e-lyt-i-cal</t></w>
<w><t>pros-e-lyt-ise</t></w>
@@ -137435,7 +137437,7 @@
<w><t>re-traced</t></w>
<w><t>re-trac-ing</t></w>
<w><t>re-track</t></w>
-<w><t>re-tract</t></w>
+<w><t>re-tract</t><verb><regular-root/></verb></w>
<w><t>re-tract-a-bil-i-ty</t></w>
<w><t>re-tract-a-ble</t></w>
<w><t>re-trac-ta-tion</t></w>
@@ -150296,7 +150298,7 @@
<w><t>so-phis-ti-cat-ed</t></w>
<w><t>so-phis-ti-cat-ing</t></w>
<w><t>so-phis-ti-ca-tion</t></w>
-<w><t>soph-ist-ry</t></w>
+<w><t>soph-ist-ry</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Soph-o-cle-an</t></w>
<w><t>Soph-o-cles</t></w>
<w><t>soph-o-more</t></w>
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2022-08-28 16:03:54 UTC (rev 12718)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2022-08-28 18:07:05 UTC (rev 12719)
@@ -174,7 +174,7 @@
/** The list of elements whose ending tag should never be straddled by a word. */
private List<String> elementTerminalList = Arrays.asList(new String[] {"Para", "Sidenote", "Signature", "EmDash",
"PoetryLine", "Title", "Subtitle", "Dateline", "TitlePara", "TitleAbbrev", "LineBreak", "CopyrightDate",
- "Addressee", "EnDash", "InlineContent", "Caption", "Head1", "Head2"});
+ "Addressee", "EnDash", "InlineContent", "Caption", "Head1", "Head2", "SubtitleAbbrev"});
/** The list of elements whose starting tag should never be straddled by a word. */
private List<String> elementStartList = Arrays.asList(new String[] {"Footnote", "Sidenote"});
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-08-28 16:03:58
|
Revision: 12718
http://sourceforge.net/p/foray/code/12718
Author: victormote
Date: 2022-08-28 16:03:54 +0000 (Sun, 28 Aug 2022)
Log Message:
-----------
Add orthography configuration for Ancient Greek transliterated to Latin text.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java 2022-08-27 20:22:24 UTC (rev 12717)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java 2022-08-28 16:03:54 UTC (rev 12718)
@@ -169,8 +169,7 @@
* By "well-formed" is meant a language string that conforms to BCP 47.
* @param langtag The standard language string to be parsed, which may also have country and script components.
* @return The writing system for the given parameters.
- * @see <a href="https://tools.ietf.org/search/bcp47">BCP (Best Current Practice) 47, Tags for Identifying
- * Languages</a>
+ * @see <a href="https://www.w3.org/International/articles/language-tags/">W3C Language tags in HTML and XML</a>
*/
public static WritingSystem4a find(final String langtag) {
if (langtag == null
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2022-08-27 20:22:24 UTC (rev 12717)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2022-08-28 16:03:54 UTC (rev 12718)
@@ -243,6 +243,14 @@
</unparsed-dictionary>
</dictionary-resource>
+ <dictionary-resource id="dictionary-greek-translit-latin">
+ <unparsed-dictionary>
+ <dictionary-element>
+ <resource-location type="url">file:///C:/vic/foray/trunk/foray/foray-orthography/src/main/data/dictionaries/grc-Latn-ZZZ.dict.xml</resource-location>
+ </dictionary-element>
+ </unparsed-dictionary>
+ </dictionary-resource>
+
<hyphenation-patterns-resource id="hyph-patterns-eng">
<parsed-resource>
<resource-location type="classpath">/resources/org/foray/orthography/hyphPatterns/eng.jbso</resource-location>
@@ -289,4 +297,10 @@
<orthography language-iso-3char="fre" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
</configuration>
+ <configuration>
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
+ <dictionary reference="dictionary-greek-translit-latin"/>
+ <orthography language-iso-3char="grc" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
+ </configuration>
+
</axsl-orthography-config>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-08-27 20:22:27
|
Revision: 12717
http://sourceforge.net/p/foray/code/12717
Author: victormote
Date: 2022-08-27 20:22:24 +0000 (Sat, 27 Aug 2022)
Log Message:
-----------
Conform to aXSL change: Add method to retrieve a default script from a Language.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Language4a.java
trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Language4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Language4a.java 2022-08-27 12:50:46 UTC (rev 12716)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/i18n/Language4a.java 2022-08-27 20:22:24 UTC (rev 12717)
@@ -29,6 +29,7 @@
package org.foray.common.i18n;
+import org.axsl.i18n.Country;
import org.axsl.i18n.Language;
import java.util.HashMap;
@@ -247,12 +248,12 @@
Language4a.register(new Language4a("cus", null, "Cushitic(Other)", "couchitiques, autres langues", null));
Language4a.register(new Language4a("wel", "cy", "Welsh", "gallois", null));
Language4a.register(new Language4a("dak", null, "Dakota", "dakota", null));
- Language4a.register(new Language4a("dan", "da", "Danish", "danois", null));
+ Language4a.register(new Language4a("dan", "da", "Danish", "danois", Script4a.LATIN));
Language4a.register(new Language4a("dar", null, "Dargwa", "dargwa", null));
Language4a.register(new Language4a("day", null, "Dayak", "dayak", null));
Language4a.register(new Language4a("del", null, "Delaware", "delaware", null));
Language4a.register(new Language4a("den", null, "Slave(Athapascan)", "esclave(athapascan)", null));
- Language4a.register(new Language4a("ger", "de", "German", "allemand", null));
+ Language4a.register(new Language4a("ger", "de", "German", "allemand", Script4a.LATIN));
Language4a.register(new Language4a("dgr", null, "Dogrib", "dogrib", null));
Language4a.register(new Language4a("din", null, "Dinka", "dinka", null));
Language4a.register(new Language4a("div", "dv", "Divehi; Dhivehi; Maldivian", " maldivien", null));
@@ -319,7 +320,7 @@
Language4a.register(new Language4a("grb", null, "Grebo", "grebo", null));
Language4a.register(new Language4a("grc", null, "Greek, Ancient(to 1453)", "grec ancien(jusqu'à 1453)", null));
Language4a.register(new Language4a("grn", "gn", "Guarani", "guarani", null));
- Language4a.register(new Language4a("gsw", null, "Alemanic; Swiss German", " alémanique", null));
+ Language4a.register(new Language4a("gsw", null, "Alemanic; Swiss German", " alémanique", Script4a.LATIN));
Language4a.register(new Language4a("guj", "gu", "Gujarati", "goudjrati", null));
Language4a.register(new Language4a("gwi", null, "Gwich´in", "gwich´in", null));
Language4a.register(new Language4a("hai", null, "Haida", "haida", null));
@@ -480,7 +481,7 @@
"nob", "nb", "Norwegian Bokmål; Bokmål, Norwegian", "norvégien bokmål; bokmål, norvégien", null));
Language4a.register(new Language4a("nog", null, "Nogai", "nogaï; nogay", null));
Language4a.register(new Language4a("non", null, "Norse, Old", "norrois, vieux", null));
- Language4a.register(new Language4a("nor", "no", "Norwegian", "norvégien", null));
+ Language4a.register(new Language4a("nor", "no", "Norwegian", "norvégien", Script4a.LATIN));
Language4a.register(new Language4a("nqo", null, "N'ko", "n'ko", null));
Language4a.register(new Language4a("nso", null, "Northern Sotho, Pedi; Sepedi", "sotho du Nord; pedi; sepedi",
null));
@@ -529,7 +530,7 @@
Language4a.register(new Language4a("roa", null, "Romance(Other)", "romanes, autres langues", null));
Language4a.register(new Language4a("roh", "rm", "Raeto-Romance", "rhéto-roman", null));
Language4a.register(new Language4a("rom", null, "Romany", "tsigane", null));
- Language4a.register(new Language4a("rum", "ro", "Romanian", "roumain", null));
+ Language4a.register(new Language4a("rum", "ro", "Romanian", "roumain", Script4a.LATIN));
Language4a.register(new Language4a("run", "rn", "Rundi", "rundi", null));
Language4a.register(new Language4a(
"rup", null, "Aromanian; Arumanian; Macedo-Romanian", "aroumain; macédo-roumain", null));
@@ -586,7 +587,7 @@
Language4a.register(new Language4a("sus", null, "Susu", "soussou", null));
Language4a.register(new Language4a("sux", null, "Sumerian", "sumérien", null));
Language4a.register(new Language4a("swa", "sw", "Swahili", "swahili", null));
- Language4a.register(new Language4a("swe", "sv", "Swedish", "suédois", null));
+ Language4a.register(new Language4a("swe", "sv", "Swedish", "suédois", Script4a.LATIN));
Language4a.register(new Language4a("syr", null, "Syriac", "syriaque", null));
Language4a.register(new Language4a("tah", "ty", "Tahitian", "tahitien", null));
Language4a.register(new Language4a("tai", null, "Tai(Other)", "thaïes, autres langues", null));
@@ -785,11 +786,9 @@
return this.frenchName;
}
- /**
- * Returns the default script to be used for this language.
- * @return The default script to be used for this language.
- */
- public Script4a getDefaultScript() {
+ @Override
+ public Script4a getDefaultScript(final Country country) {
+ /* The country code is not considered, but will be if needed in the future. */
return this.defaultScript;
}
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java 2022-08-27 12:50:46 UTC (rev 12716)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/i18n/WritingSystem4a.java 2022-08-27 20:22:24 UTC (rev 12717)
@@ -126,9 +126,9 @@
*/
public static WritingSystem4a find(final Language4a language, final Script4a script, final Country4a country) {
final Language4a languageToUse = language == null ? Language4a.UNDETERMINED : language;
- Script4a scriptToUse = script == null ? languageToUse.getDefaultScript() : script;
+ final Country4a countryToUse = country == null ? Country4a.UNDETERMINED : country;
+ Script4a scriptToUse = script == null ? languageToUse.getDefaultScript(countryToUse) : script;
scriptToUse = scriptToUse == null ? Script4a.UNDETERMINED : scriptToUse;
- final Country4a countryToUse = country == null ? Country4a.UNDETERMINED : country;
Map<Language4a, Map<Country4a, WritingSystem4a>> scriptMap = REGISTRATION_MAP.get(scriptToUse);
if (scriptMap == null) {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-08-27 12:50:50
|
Revision: 12715
http://sourceforge.net/p/foray/code/12715
Author: victormote
Date: 2022-08-27 12:50:21 +0000 (Sat, 27 Aug 2022)
Log Message:
-----------
Handle specific case, temprorary fix. Element-specific logic like this needs to go into an XSLT stylesheet or an external configuration.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2022-08-27 12:47:38 UTC (rev 12714)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2022-08-27 12:50:21 UTC (rev 12715)
@@ -272,6 +272,10 @@
appendText(textElementValue);
return;
}
+ if ("Roman".equals(localName)) {
+ /* This is not accurate, but should serve the purposes of a spell-checker. */
+ appendText("III");
+ }
final WritingSystem4a oldWritingSystem = getCurrentWritingSystem();
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-08-27 12:50:49
|
Revision: 12716
http://sourceforge.net/p/foray/code/12716
Author: victormote
Date: 2022-08-27 12:50:46 +0000 (Sat, 27 Aug 2022)
Log Message:
-----------
Handle new orthography elements.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2022-08-27 12:50:21 UTC (rev 12715)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/OrthographyParser.java 2022-08-27 12:50:46 UTC (rev 12716)
@@ -415,6 +415,9 @@
case "convertible-to-possessive": return;
case "possessive": return;
case "extensible": return;
+ case "explicit-token-list": return;
+ case "explicit-token": return;
+ case "explicit-tokens": return;
default: {
/* Make sure user knows about unknown tag. */
errorMessage("Unknown tag in orthography configuration: {}", localName);
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-08-27 12:47:42
|
Revision: 12714
http://sourceforge.net/p/foray/code/12714
Author: victormote
Date: 2022-08-27 12:47:38 +0000 (Sat, 27 Aug 2022)
Log Message:
-----------
Improvements to dictionaries, addition of one for Greek words translated to Latin script.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
Added Paths:
-----------
trunk/foray/foray-orthography/src/main/data/dictionaries/grc-Latn-ZZZ.dict.xml
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2022-08-27 02:27:45 UTC (rev 12713)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2022-08-27 12:47:38 UTC (rev 12714)
@@ -12,6 +12,7 @@
<w><t>ceil</t><verb><regular-root/></verb></w>
<w><t>ceil-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-ult-eth</t><verb><regular-root value="false"/></verb></w>
+<w><t>go-eth</t><verb/></w>
<w><t>hum-bleth</t><verb><regular-root value="false"/></verb></w>
<w><t>Jno</t><abbrev referenced-word="John"/></w>
<w><t>Kal-a-bar</t><comment>Calabar</comment></w>
@@ -20,6 +21,7 @@
<w><t>liv-eth</t><verb><regular-root value="false"/></verb></w>
<w><t>lo</t><interjection/><comment>Imperative of "look".</comment></w>
<w><t>lov-eth</t><verb><regular-root value="false"/></verb></w>
+<w><t>mim-ick-ry</t><noun><pluralizable/></noun></w>
<w><t>oth-er-ways</t><adjective/><adverb/></w>
<w><t>per-suad-est</t><verb><regular-root value="false"/></verb></w>
<w><t>pre-ëm-i-nence</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2022-08-27 02:27:45 UTC (rev 12713)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2022-08-27 12:47:38 UTC (rev 12714)
@@ -255,7 +255,7 @@
<w><t>Ab-ga-tha</t></w>
<w><t>ab-hen-ry</t></w>
<w><t>ab-hom-i-na-ble</t></w>
-<w><t>ab-hor</t></w>
+<w><t>ab-hor</t><verb><regular-root/></verb></w>
<w><t>ab-horred</t></w>
<w><t>ab-hor-rence</t></w>
<w><t>ab-hor-rent</t></w>
@@ -423,7 +423,7 @@
<w><t>a-bom-i-nate</t></w>
<w><t>a-bom-i-nat-ed</t></w>
<w><t>a-bom-i-nat-ing</t></w>
-<w><t>a-bom-i-na-tion</t></w>
+<w><t>a-bom-i-na-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>a-bom-i-na-tor</t></w>
<w><t>a-bon-dance</t></w>
<w><t>a-boon</t></w>
@@ -799,7 +799,7 @@
<w><t>ac-cen-tu-at-ing</t></w>
<w><t>ac-cen-tu-a-tion</t></w>
<w><t>ac-cen-tu-a-tor</t></w>
-<w><t>ac-cept</t></w>
+<w><t>ac-cept</t><verb><regular-root/></verb></w>
<w><t>ac-cept-a-bil-i-ty</t></w>
<w><t>ac-cept-a-ble</t></w>
<w><t>ac-cept-a-ble-ness</t></w>
@@ -820,7 +820,7 @@
<w><t>ac-ces-si-bil-i-ty</t></w>
<w><t>ac-ces-si-ble</t></w>
<w><t>ac-ces-si-bly</t></w>
-<w><t>ac-ces-sion</t></w>
+<w><t>ac-ces-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ac-ces-sion-al</t></w>
<phrase><t>ac-ces-sion num-ber</t></phrase>
<w><t>ac-ces-so-ri-al</t></w>
@@ -4862,7 +4862,7 @@
<w><t>am-ba-rel-la</t></w>
<w><t>am-ba-ri</t></w>
<w><t>am-ba-ry</t></w>
-<w><t>am-bas-sa-dor</t></w>
+<w><t>am-bas-sa-dor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>am-bas-sa-dor=at=large</t></w>
<w><t>am-bas-sa-do-ri-al</t></w>
<w><t>am-bas-sa-do-ri-al-ly</t></w>
@@ -8530,7 +8530,7 @@
<w><t>ap-pli-qué</t></w>
<w><t>ap-pli-quéd</t></w>
<w><t>ap-pli-qué-ing</t></w>
-<w><t>ap-ply</t></w>
+<w><t>ap-ply</t><verb><regular-root/></verb></w>
<w><t>ap-ply-ing</t></w>
<w><t>ap-pog-gia-tu-ra</t></w>
<w><t>ap-pog-gia-tu-ras</t></w>
@@ -10416,7 +10416,7 @@
<w><t>as-sum-ing</t></w>
<w><t>as-sum-ing-ly</t></w>
<w><t>as-sump-sit</t></w>
-<w><t>as-sump-tion</t></w>
+<w><t>as-sump-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>As-sump-tion</t></w>
<w><t>As-sump-tion-ist</t></w>
<w><t>as-sump-tive</t></w>
@@ -12658,8 +12658,7 @@
<w><t>Ba-ke-lite</t></w>
<w><t>bake-meat</t></w>
<w><t>bak-er</t></w>
-<w><t>Bak-er</t></w>
-<w><t>Ba-ker</t></w>
+<w><t>Bak-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>bak-er-ies</t></w>
<w><t>ba-ker-like</t></w>
<phrase><t>bak-er’s doz-en</t></phrase>
@@ -13219,7 +13218,7 @@
<w><t>bap-tis-mal</t></w>
<w><t>bap-tis-mal-ly</t></w>
<phrase><t>bap-tism of fire</t></phrase>
-<w><t>Bap-tist</t></w>
+<w><t>Bap-tist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Bap-tis-ta</t></w>
<w><t>Bap-tiste</t></w>
<w><t>bap-tis-ter-ies</t></w>
@@ -14457,7 +14456,7 @@
<w><t>Bec-que-rel</t></w>
<w><t>be-crawl</t></w>
<w><t>be-crip-ple</t></w>
-<w><t>bed</t></w>
+<w><t>bed</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>BEd</t></w>
<w><t>be-dab-ble</t></w>
<phrase><t>bed and break-fast</t></phrase>
@@ -14861,7 +14860,7 @@
<w><t>be-liev-a-bly</t></w>
<w><t>be-lieve</t><verb><regular-root/></verb></w>
<w><t>be-lieved</t></w>
-<w><t>be-liev-er</t></w>
+<w><t>be-liev-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>be-liev-ing-ly</t></w>
<w><t>be-like</t></w>
<w><t>Be-lin-da</t></w>
@@ -15611,7 +15610,7 @@
<w><t>bé-ton</t></w>
<w><t>bet-o-ny</t></w>
<w><t>be-took</t></w>
-<w><t>be-tray</t></w>
+<w><t>be-tray</t><verb><regular-root/></verb></w>
<w><t>be-tray-al</t></w>
<w><t>be-tray-er</t></w>
<w><t>be-troth</t></w>
@@ -15810,7 +15809,7 @@
<w><t>bi=bi-va-lent</t></w>
<w><t>bibl</t></w>
<w><t>Bibl</t></w>
-<w><t>Bi-ble</t></w>
+<w><t>Bi-ble</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Bi-ble=bash-er</t></w>
<phrase><t>Bi-ble Belt</t></phrase>
<phrase><t>Bi-ble pa-per</t></phrase>
@@ -17379,7 +17378,7 @@
<w><t>blos-som</t></w>
<w><t>blos-som-less</t></w>
<w><t>blos-som-y</t></w>
-<w><t>blot</t></w>
+<w><t>blot</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>blotch</t></w>
<w><t>blotch-i-er</t></w>
<w><t>blotch-i-est</t></w>
@@ -29208,6 +29207,7 @@
<w><t>C=note</t></w>
<w><t>CNS</t></w>
<w><t>Cnut</t></w>
+<w><t>co</t><abbrev referenced-word="company"/></w>
<w><t>co-ac-er-vate</t></w>
<w><t>co-ac-er-va-tion</t></w>
<w><t>coach</t></w>
@@ -30459,7 +30459,7 @@
<w><t>Co-me-ni-us</t></w>
<w><t>come=on</t></w>
<phrase><t>come o-ver</t></phrase>
-<w><t>com-er</t></w>
+<w><t>com-er</t><noun><pluralizable/></noun></w>
<w><t>co-mes</t></w>
<w><t>co-mes-ti-ble</t></w>
<w><t>com-et</t></w>
@@ -30771,7 +30771,7 @@
<w><t>com-mu-ni-ca-tor</t></w>
<w><t>com-mu-ni-ca-to-ry</t></w>
<w><t>com-mun-ing</t></w>
-<w><t>com-mun-ion</t></w>
+<w><t>com-mun-ion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Com-mun-ion</t></w>
<w><t>com-mun-ion-a-ble</t></w>
<w><t>com-mun-ion-al</t></w>
@@ -30793,7 +30793,7 @@
<phrase><t>Com-mun-ist Par-ty</t></phrase>
<w><t>com-mu-ni-tal</t></w>
<w><t>com-mu-ni-tar-i-an</t></w>
-<w><t>com-mu-ni-ty</t></w>
+<w><t>com-mu-ni-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>com-mu-ni-ty cen-tre</t></phrase>
<phrase><t>com-mu-ni-ty chest</t></phrase>
<phrase><t>com-mu-ni-ty col-lege</t></phrase>
@@ -31129,7 +31129,7 @@
<w><t>com-pra-dor</t></w>
<w><t>com-pra-dore</t></w>
<w><t>com-pre-ca-tion</t></w>
-<w><t>com-pre-hend</t></w>
+<w><t>com-pre-hend</t><verb><regular-root/></verb></w>
<w><t>com-pre-hend-er</t></w>
<w><t>com-pre-hend-i-ble</t></w>
<w><t>com-pre-hend-ing-ly</t></w>
@@ -32181,7 +32181,7 @@
<w><t>con-sid-ered</t></w>
<w><t>con-sid-er-er</t></w>
<w><t>con-sid-er-ing</t></w>
-<w><t>con-sign</t></w>
+<w><t>con-sign</t><verb><regular-root/></verb></w>
<w><t>con-sign-a-ble</t></w>
<w><t>con-sig-na-tion</t></w>
<w><t>con-sign-ee</t></w>
@@ -32283,7 +32283,7 @@
<w><t>con-stant-an</t></w>
<w><t>Con-stan-tia</t></w>
<w><t>Con-stan-tin</t></w>
-<w><t>Con-stan-tine</t></w>
+<w><t>Con-stan-tine</t><noun><convertible-to-possessive/></noun></w>
<phrase><t>Con-stan-tine I</t></phrase>
<phrase><t>Con-stan-tine II</t></phrase>
<phrase><t>Con-stan-tine XI</t></phrase>
@@ -32308,7 +32308,7 @@
<w><t>con-stit-u-en-cy</t></w>
<w><t>con-stit-u-ent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-stit-u-ent-ly</t></w>
-<w><t>con-sti-tute</t></w>
+<w><t>con-sti-tute</t><verb><regular-root/></verb></w>
<w><t>con-sti-tut-ed</t></w>
<w><t>con-sti-tut-er</t></w>
<w><t>con-sti-tut-ing</t></w>
@@ -32653,7 +32653,7 @@
<w><t>con-tra-dict</t></w>
<w><t>con-tra-dict-a-ble</t></w>
<w><t>con-tra-dict-er</t></w>
-<w><t>con-tra-dic-tion</t></w>
+<w><t>con-tra-dic-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-tra-dic-tious</t></w>
<w><t>con-tra-dic-tious-ly</t></w>
<w><t>con-tra-dic-tious-ness</t></w>
@@ -41615,7 +41615,7 @@
<w><t>dic-ta</t></w>
<w><t>dic-ta-graph</t></w>
<w><t>Dic-ta-phone</t></w>
-<w><t>dic-tate</t></w>
+<w><t>dic-tate</t><noun><pluralizable/></noun><verb><regular-root/></verb></w>
<w><t>dic-tat-ing-ly</t></w>
<w><t>dic-ta-tion</t></w>
<w><t>dic-ta-tion-al</t></w>
@@ -42208,7 +42208,7 @@
<w><t>di-ox-ane</t></w>
<w><t>di-ox-ide</t></w>
<w><t>Dip</t></w>
-<w><t>dip</t></w>
+<w><t>dip</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>dip cir-cle</t></phrase>
<w><t>di-pep-tide</t></w>
<w><t>di-pet-al-ous</t></w>
@@ -42300,7 +42300,7 @@
<w><t>di-po-lar</t></w>
<w><t>di-pole</t></w>
<w><t>Di-pol-i-a</t></w>
-<w><t>dip-per</t></w>
+<w><t>dip-per</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dip-pi-er</t></w>
<w><t>dip-pi-est</t></w>
<w><t>dip-ping</t></w>
@@ -42573,7 +42573,7 @@
<w><t>dis-cas-ing</t></w>
<w><t>dis-cept</t></w>
<w><t>dis-cep-ta-tion</t></w>
-<w><t>dis-cern</t></w>
+<w><t>dis-cern</t><verb><regular-root/></verb></w>
<w><t>dis-cern-a-ble</t></w>
<w><t>dis-cern-a-ble-ness</t></w>
<w><t>dis-cern-a-bly</t></w>
@@ -48361,7 +48361,7 @@
<w><t>em-bla-zon-er</t></w>
<w><t>em-bla-zon-ment</t></w>
<w><t>em-bla-zon-ry</t></w>
-<w><t>em-blem</t></w>
+<w><t>em-blem</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>em-blem-at-ic</t></w>
<w><t>em-blem-at-i-cal</t></w>
<w><t>em-blem-at-i-cal-ly</t></w>
@@ -48768,7 +48768,7 @@
<w><t>en-a-bling</t></w>
<phrase><t>en-a-bling act</t></phrase>
<w><t>e-na-ceous</t></w>
-<w><t>en-act</t></w>
+<w><t>en-act</t><verb><regular-root/></verb></w>
<w><t>en-act-a-ble</t></w>
<w><t>en-ac-tive</t></w>
<w><t>en-act-ment</t></w>
@@ -49542,7 +49542,7 @@
<w><t>en-root</t></w>
<w><t>Ens</t></w>
<w><t>ens</t></w>
-<w><t>en-sam-ple</t></w>
+<w><t>en-sam-ple</t><noun><pluralizable/></noun></w>
<w><t>en-san-guine</t></w>
<w><t>en-san-guined</t></w>
<w><t>en-san-guin-ing</t></w>
@@ -50268,8 +50268,7 @@
<w><t>E-pis-co-pal</t></w>
<w><t>e-pis-co-pal</t></w>
<phrase><t>E-pis-co-pal Church</t></phrase>
-<w><t>E-pis-co-pa-lian</t></w>
-<w><t>E-pis-co-pa-li-an</t></w>
+<w><t>E-pis-co-pa-lian</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>e-pis-co-pa-li-an</t></w>
<w><t>E-pis-co-pa-lian-ism</t></w>
<w><t>e-pis-co-pal-ism</t></w>
@@ -53236,7 +53235,7 @@
<w><t>ex-tra-ter-ri-to-ri-al-ly</t></w>
<phrase><t>ex-tra time</t></phrase>
<w><t>ex-tra-u-ter-ine</t></w>
-<w><t>ex-trav-a-gance</t></w>
+<w><t>ex-trav-a-gance</t><noun><pluralizable/></noun></w>
<w><t>ex-trav-a-gan-cy</t></w>
<w><t>ex-trav-a-gant</t></w>
<w><t>ex-trav-a-gant-ly</t></w>
@@ -56634,7 +56633,7 @@
<w><t>Flo-is</t></w>
<w><t>F-lon</t></w>
<w><t>flong</t></w>
-<w><t>flood</t></w>
+<w><t>flood</t><noun><pluralizable/></noun></w>
<w><t>Flood</t></w>
<w><t>flood-a-ble</t></w>
<phrase><t>flood con-trol</t></phrase>
@@ -58061,7 +58060,7 @@
<w><t>foun-drous</t></w>
<w><t>found-ry</t></w>
<phrase><t>found-ry proof</t></phrase>
-<w><t>fount</t></w>
+<w><t>fount</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Fount</t></w>
<w><t>foun-tain</t></w>
<w><t>foun-tained</t></w>
@@ -61687,7 +61686,7 @@
<w><t>Gia-co-mo</t></w>
<w><t>Gia-co-muz-zo</t></w>
<w><t>Giam-bat-tis-ta</t></w>
-<w><t>gi-ant</t></w>
+<w><t>gi-ant</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>gi-ant-ess</t></w>
<w><t>gi-ant-ism</t></w>
<phrase><t>gi-ant kill-er</t></phrase>
@@ -71871,7 +71870,7 @@
<w><t>Hun-nish</t></w>
<w><t>Hun-nish-ness</t></w>
<w><t>hunt</t></w>
-<w><t>Hunt</t></w>
+<w><t>Hunt</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>hunt-a-ble</t></w>
<w><t>hunt-a-way</t></w>
<w><t>hunt-ed</t></w>
@@ -74328,8 +74327,8 @@
<w><t>im-mers-ing</t></w>
<w><t>im-mer-sion</t></w>
<phrase><t>im-mer-sion heat-er</t></phrase>
-<w><t>im-mer-sion-ism</t></w>
-<w><t>im-mer-sion-ist</t></w>
+<w><t>im-mer-sion-ism</t><noun><singular/><convertible-to-possessive/></noun></w>
+<w><t>im-mer-sion-ist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>im-mesh</t></w>
<w><t>im-me-thod-i-cal</t></w>
<w><t>im-me-thod-i-cal-ly</t></w>
@@ -76397,7 +76396,7 @@
<w><t>in-fa-mous-ness</t></w>
<w><t>in-fa-my</t></w>
<w><t>in-fan-cy</t></w>
-<w><t>in-fant</t></w>
+<w><t>in-fant</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-fan-ta</t></w>
<w><t>in-fan-te</t></w>
<w><t>in-fant-hood</t></w>
@@ -76426,7 +76425,7 @@
<w><t>in-fea-si-bil-i-ty</t></w>
<w><t>in-fea-si-ble</t></w>
<w><t>in-fea-si-ble-ness</t></w>
-<w><t>in-fect</t></w>
+<w><t>in-fect</t><verb><regular-root/></verb></w>
<w><t>in-fect-ant</t></w>
<w><t>in-fect-ed-ness</t></w>
<w><t>in-fect-er</t></w>
@@ -77432,7 +77431,7 @@
<w><t>in-stil-ling</t></w>
<w><t>in-still-ment</t></w>
<w><t>in-stil-ment</t></w>
-<w><t>in-stinct</t></w>
+<w><t>in-stinct</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-stinc-tive</t></w>
<w><t>in-stinc-tive-ly</t></w>
<w><t>in-stinc-tu-al</t></w>
@@ -79135,10 +79134,10 @@
<w><t>in-tro-vert</t></w>
<w><t>in-trude</t></w>
<w><t>in-trud-ed</t></w>
-<w><t>in-trud-er</t></w>
+<w><t>in-trud-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-trud-ing</t></w>
<w><t>in-trud-ing-ly</t></w>
-<w><t>in-tru-sion</t></w>
+<w><t>in-tru-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-tru-sion-al</t></w>
<w><t>in-tru-sive</t></w>
<w><t>in-tru-sive-ly</t></w>
@@ -90180,7 +90179,7 @@
<w><t>lu-te-ti-um</t></w>
<w><t>Luth</t></w>
<w><t>Lu-ther</t></w>
-<w><t>Lu-ther-an</t></w>
+<w><t>Lu-ther-an</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Lu-ther-an-ism</t></w>
<w><t>Lu-ther-ism</t></w>
<w><t>lu-thern</t></w>
@@ -94788,7 +94787,7 @@
<w><t>meth-od-ised</t></w>
<w><t>meth-od-is-ing</t></w>
<w><t>Meth-od-ism</t></w>
-<w><t>Meth-od-ist</t></w>
+<w><t>Meth-od-ist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Meth-od-is-tic</t></w>
<w><t>Meth-od-is-ti-cal</t></w>
<w><t>Meth-od-is-ti-cal-ly</t></w>
@@ -96722,7 +96721,7 @@
<w><t>mis-typed</t></w>
<w><t>mis-un-der-stand</t></w>
<w><t>mis-un-der-stand-er</t></w>
-<w><t>mis-un-der-stand-ing</t></w>
+<w><t>mis-un-der-stand-ing</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mis-un-der-stand-ing-ly</t></w>
<w><t>mis-un-der-stood</t></w>
<w><t>mis-un-ion</t></w>
@@ -110036,7 +110035,7 @@
<w><t>or-di-nal</t></w>
<w><t>or-di-nal-ly</t></w>
<phrase><t>or-di-nal num-ber</t></phrase>
-<w><t>or-di-nance</t></w>
+<w><t>or-di-nance</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>or-di-nand</t></w>
<w><t>or-di-nar-i-ate</t></w>
<w><t>or-di-nar-i-ly</t></w>
@@ -113430,7 +113429,7 @@
<w><t>o-ver-shade</t></w>
<w><t>o-ver-shad-ed</t></w>
<w><t>o-ver-shad-ing</t></w>
-<w><t>o-ver-shad-ow</t></w>
+<w><t>o-ver-shad-ow</t><verb><regular-root/></verb></w>
<w><t>o-ver-shine</t></w>
<w><t>o-ver-shined</t></w>
<w><t>o-ver-shin-ing</t></w>
@@ -114152,8 +114151,8 @@
<w><t>paed-er-as-ty</t></w>
<w><t>pae-di-a-tri-cian</t></w>
<w><t>pae-di-at-rics</t></w>
-<w><t>pae-do-bap-tism</t></w>
-<w><t>pae-do-bap-tist</t></w>
+<w><t>pae-do-bap-tism</t><noun><singular/><convertible-to-possessive/></noun></w>
+<w><t>pae-do-bap-tist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pae-do-gen-e-sis</t></w>
<w><t>pae-do-ge-net-ic</t></w>
<w><t>pae-dol-o-gy</t></w>
@@ -115730,7 +115729,7 @@
<w><t>par-tak-a-ble</t></w>
<w><t>par-take</t><verb><regular-root value="false"/></verb></w>
<w><t>par-tak-en</t></w>
-<w><t>par-tak-er</t></w>
+<w><t>par-tak-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>par-takes</t><verb><regular-root value="false"/></verb></w>
<w><t>par-tak-ing</t></w>
<w><t>par-tan</t></w>
@@ -115923,7 +115922,7 @@
<w><t>pas-sa-ca-glia</t></w>
<w><t>pas-sade</t></w>
<w><t>pas-sa-do</t></w>
-<w><t>pas-sage</t></w>
+<w><t>pas-sage</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pas-saged</t></w>
<phrase><t>pas-sage hawk</t></phrase>
<phrase><t>pas-sag-er hawk</t></phrase>
@@ -122174,7 +122173,7 @@
<w><t>pop-u-lar-ised</t></w>
<w><t>pop-u-lar-is-er</t></w>
<w><t>pop-u-lar-is-ing</t></w>
-<w><t>pop-u-lar-i-ty</t></w>
+<w><t>pop-u-lar-i-ty</t><noun><convertible-to-possessive/></noun></w>
<w><t>pop-u-lar-i-za-tion</t></w>
<w><t>pop-u-lar-ize</t></w>
<w><t>pop-u-lar-iz-er</t></w>
@@ -122293,7 +122292,7 @@
<phrase><t>Port E-liz-a-beth</t></phrase>
<w><t>porte=mon-mon-naies</t></w>
<w><t>porte=mon-naie</t></w>
-<w><t>por-tend</t></w>
+<w><t>por-tend</t><verb><regular-root/></verb></w>
<w><t>por-tent</t></w>
<w><t>por-ten-tous</t></w>
<w><t>por-ten-tous-ly</t></w>
@@ -125352,8 +125351,8 @@
<w><t>prel-ate-ship</t></w>
<w><t>pre-lat-ic</t></w>
<w><t>pre=Lat-in</t></w>
-<w><t>prel-a-tism</t></w>
-<w><t>prel-a-tist</t></w>
+<w><t>prel-a-tism</t><noun><singular/><convertible-to-possessive/></noun></w>
+<w><t>prel-a-tist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>prel-a-ture</t></w>
<w><t>pre-launch</t></w>
<w><t>pre-law-ful</t></w>
@@ -127496,6 +127495,7 @@
<w><t>Prof</t></w>
<w><t>prof</t></w>
<w><t>pro-fac-ul-ty</t></w>
+<w><t>prof-a-na-tion</t><noun><pluralizable/></noun></w>
<w><t>pro-fan-a-to-ry</t></w>
<w><t>pro-fane</t></w>
<w><t>pro-faned</t></w>
@@ -129592,7 +129592,7 @@
<w><t>pulp-board</t></w>
<w><t>pulp-i-er</t></w>
<w><t>pulp-i-est</t></w>
-<w><t>pul-pit</t></w>
+<w><t>pul-pit</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pul-pit-al</t></w>
<w><t>pul-pit-eer</t></w>
<w><t>pul-pit-er</t></w>
@@ -133123,7 +133123,7 @@
<w><t>rea-soned</t></w>
<w><t>rea-soned-ly</t></w>
<w><t>rea-son-er</t></w>
-<w><t>rea-son-ing</t></w>
+<w><t>rea-son-ing</t><noun><pluralizable/></noun></w>
<w><t>rea-son-ing-ly</t></w>
<w><t>rea-son-less</t></w>
<w><t>rea-son-sured</t></w>
@@ -133217,8 +133217,8 @@
<w><t>re-band-aged</t></w>
<w><t>re-band-ag-ing</t></w>
<w><t>re-bank</t></w>
-<w><t>re-bap-tism</t></w>
-<w><t>re-bap-tize</t></w>
+<w><t>re-bap-tism</t><noun><singular/><convertible-to-possessive/></noun></w>
+<w><t>re-bap-tize</t><verb><regular-root/></verb></w>
<w><t>re-bap-tized</t></w>
<w><t>re-bap-tiz-ing</t></w>
<w><t>re-bar-ba-tive</t></w>
@@ -135456,7 +135456,7 @@
<w><t>re-jeop-ard-iz-ing</t></w>
<w><t>re-jig</t></w>
<w><t>re-jig-ger</t></w>
-<w><t>re-joice</t></w>
+<w><t>re-joice</t><verb><regular-root/></verb></w>
<w><t>re-joiced</t></w>
<w><t>re-joice-ful</t></w>
<w><t>re-joic-ing</t></w>
@@ -136127,7 +136127,7 @@
<w><t>re-or-ches-trat-ed</t></w>
<w><t>re-or-ches-trat-ing</t></w>
<w><t>re-or-ches-tra-tion</t></w>
-<w><t>re-or-dain</t></w>
+<w><t>re-or-dain</t><verb><regular-root/></verb></w>
<w><t>re-or-der</t></w>
<w><t>re-or-gan-ise</t></w>
<w><t>re-or-gan-ised</t></w>
@@ -140328,7 +140328,7 @@
<w><t>sacque</t></w>
<w><t>sac-ra</t></w>
<w><t>sa-cral</t></w>
-<w><t>sac-ra-ment</t></w>
+<w><t>sac-ra-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>sac-ra-men-tal</t></w>
<w><t>sac-ra-men-tal-ism</t></w>
<w><t>sac-ra-men-tal-ist</t></w>
@@ -143230,7 +143230,7 @@
<phrase><t>se-cret ser-vice</t></phrase>
<w><t>se-cret=serv-ice</t></w>
<phrase><t>se-cret so-ci-e-ty</t></phrase>
-<w><t>sect</t></w>
+<w><t>sect</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>sec-tar-i-an</t></w>
<w><t>sec-tar-i-an-ise</t></w>
<w><t>sec-tar-i-an-ised</t></w>
@@ -148504,7 +148504,7 @@
<w><t>skunk-weed</t></w>
<w><t>Sku-ta-ri</t></w>
<w><t>skut-te-rud-ite</t></w>
-<w><t>sky</t></w>
+<w><t>sky</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>sky-borne</t></w>
<w><t>sky-cap</t></w>
<w><t>sky-dive</t></w>
@@ -151984,7 +151984,7 @@
<w><t>spring-time</t></w>
<w><t>spring-wood</t></w>
<w><t>spring-y</t></w>
-<w><t>sprin-kle</t></w>
+<w><t>sprin-kle</t><verb><regular-root/></verb></w>
<w><t>sprink-ler</t></w>
<w><t>sprin-kler</t></w>
<phrase><t>sprin-kler sys-tem</t></phrase>
@@ -152213,7 +152213,7 @@
<w><t>squin-nied</t></w>
<w><t>squin-ny</t></w>
<w><t>squin-ny-ing</t></w>
-<w><t>squint</t></w>
+<w><t>squint</t><verb><regular-root/></verb></w>
<w><t>squint-er</t></w>
<w><t>squint-ing-ly</t></w>
<w><t>squint-ing-ness</t></w>
@@ -158749,7 +158749,7 @@
<w><t>sym-bol-is-ti-cal</t></w>
<w><t>sym-bol-is-ti-cal-ly</t></w>
<phrase><t>sym-bol-ist move-ment</t></phrase>
-<w><t>sym-bol-ize</t></w>
+<w><t>sym-bol-ize</t><verb><regular-root/></verb></w>
<w><t>sym-bol-ized</t></w>
<w><t>sym-bol-iz-ing</t></w>
<w><t>sym-bolled</t></w>
@@ -160218,7 +160218,7 @@
<w><t>tax-y-ing</t></w>
<w><t>tay</t></w>
<w><t>Tay</t></w>
-<w><t>Tay-lor</t></w>
+<w><t>Tay-lor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Tay-lor-ite</t></w>
<phrase><t>Tay-lor’s se-ries</t></phrase>
<w><t>Tay-lor-ville</t></w>
@@ -164211,6 +164211,7 @@
<w><t>tram-car</t></w>
<w><t>tram-less</t></w>
<w><t>tram-line</t></w>
+<w><t>tram-mel</t><verb><regular-root/></verb></w>
<w><t>tram-meled</t></w>
<w><t>tram-mel-er</t></w>
<w><t>tram-mel-ing</t></w>
@@ -164498,7 +164499,7 @@
<w><t>trans-la-tion-al</t></w>
<w><t>trans-la-tion-al-ly</t></w>
<w><t>trans-la-tive</t></w>
-<w><t>trans-la-tor</t></w>
+<w><t>trans-la-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>trans=Li-be-ri-an</t></w>
<w><t>trans=Lib-y-an</t></w>
<w><t>trans-light</t></w>
@@ -168602,6 +168603,7 @@
<w><t>un-chron-i-cled</t></w>
<w><t>un-chron-o-log-i-cal</t></w>
<w><t>un-church</t></w>
+<w><t>un-churched</t><adjective></adjective></w>
<w><t>un-church-ly</t></w>
<w><t>un-churl-ish</t></w>
<w><t>un-churn</t></w>
@@ -179444,7 +179446,7 @@
<w><t>USA</t></w>
<w><t>us-a-ble</t></w>
<w><t>USAF</t></w>
-<w><t>us-age</t></w>
+<w><t>us-age</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>us-ance</t></w>
<w><t>Us-beg</t></w>
<w><t>Us-bek</t></w>
@@ -186527,7 +186529,7 @@
<w><t>YHWH</t></w>
<w><t>yid</t></w>
<w><t>Yid-dish</t></w>
-<w><t>yield</t></w>
+<w><t>yield</t><verb><regular-root/></verb></w>
<w><t>yield-ing</t></w>
<w><t>yield-ing-ly</t></w>
<w><t>yield-ing-ness</t></w>
Added: trunk/foray/foray-orthography/src/main/data/dictionaries/grc-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/grc-Latn-ZZZ.dict.xml (rev 0)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/grc-Latn-ZZZ.dict.xml 2022-08-27 12:47:38 UTC (rev 12714)
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!DOCTYPE axsl-dictionary
+ PUBLIC "-//aXSL//DTD Dictionary V0.1//EN"
+ "http://www.axsl.org/dtds/0.1/en/axsl-dictionary.dtd">
+
+<axsl-dictionary language="grc" script="Latn" hard-hyphen-char="="
+ soft-hyphen-char="-">
+
+<!--
+Dictionary of Ancient Greek words that are transliterated into Latin script.
+-->
+
+<w><t>bap-ti-zo</t><noun></noun></w>
+</axsl-dictionary>
Property changes on: trunk/foray/foray-orthography/src/main/data/dictionaries/grc-Latn-ZZZ.dict.xml
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2022-08-27 02:27:45 UTC (rev 12713)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2022-08-27 12:47:38 UTC (rev 12714)
@@ -10,7 +10,9 @@
<!--
-->
+<w><t>ad</t></w>
<w><t>ann-um</t></w>
+<w><t>ar-gu-ment-um</t></w>
<w><t>bel-li</t></w>
<w><t>ca-sus</t></w>
<w><t>de</t></w>
@@ -17,6 +19,8 @@
<w><t>et</t></w>
<w><t>fide</t></w>
<w><t>fit</t></w>
+<w><t>hoc</t></w>
+<w><t>ho-mi-nem</t></w>
<w><t>i.e</t><abbrev referenced-word="id est"/><comment>Until "i.e." can be handled properly.</comment></w>
<w><t>in</t></w>
<w><t>in-fi-del-i-um</t></w>
@@ -27,8 +31,12 @@
<w><t>pa-rent-is</t></w>
<w><t>part-i-bus</t></w>
<w><t>per</t></w>
+<w><t>pe-ti-tio</t></w>
+<w><t>prin-ci-pii</t></w>
<w><t>prop-a-gan-da</t></w>
<w><t>qui</t></w><w><t>sem-per</t></w>
+<w><t>sig-no</t></w>
+<w><t>vin-ces</t></w>
<w><t>vive</t></w>
</axsl-dictionary>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-08-27 02:27:46
|
Revision: 12713
http://sourceforge.net/p/foray/code/12713
Author: victormote
Date: 2022-08-27 02:27:45 +0000 (Sat, 27 Aug 2022)
Log Message:
-----------
Abandon the "assign ID" class. This task is better suited to a stylesheet.
Removed Paths:
-------------
trunk/foray/foray-xml/src/main/java/org/foray/xml/ForayAssignId.java
Deleted: trunk/foray/foray-xml/src/main/java/org/foray/xml/ForayAssignId.java
===================================================================
--- trunk/foray/foray-xml/src/main/java/org/foray/xml/ForayAssignId.java 2022-07-13 22:20:27 UTC (rev 12712)
+++ trunk/foray/foray-xml/src/main/java/org/foray/xml/ForayAssignId.java 2022-08-27 02:27:45 UTC (rev 12713)
@@ -1,408 +0,0 @@
-/*
- * Copyright 2020 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-package org.foray.xml;
-
-import org.foray.common.primitive.StringUtils;
-import org.foray.common.primitive.XmlUtils;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.CommandLineParser;
-import org.apache.commons.cli.DefaultParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.Option;
-import org.apache.commons.cli.Options;
-import org.apache.commons.cli.ParseException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.w3c.dom.Attr;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.traversal.DocumentTraversal;
-import org.w3c.dom.traversal.NodeFilter;
-import org.w3c.dom.traversal.NodeIterator;
-import org.xml.sax.EntityResolver;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-import org.xml.sax.XMLReader;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerException;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
-
-/**
- * Command-line application that reads an XML file, recomputes the Id attributes found in it, and writes the result.
- */
-public class ForayAssignId extends SaxParser<Object> {
-
- /** Command-line status constant indicating that the command line itself was not properly formed. */
- public static final byte STATUS_COMMAND_LINE_ERROR = 1;
-
- /** Command-line return status constant indicating that a file was not found. */
- public static final byte STATUS_FILE_NOT_FOUND = 2;
-
- /** Command-line return status constant indicating that there was an error parsing the input file. */
- public static final byte STATUS_PARSING_ERROR = 3;
-
- /** Command-line return status constant indicating that there was an error writing the output. */
- public static final byte STATUS_WRITING_ERROR = 4;
-
- /** The input source to be modified. */
- private InputSource input;
-
- /** The output stream to which the modified output should be sent. */
- private OutputStream output;
-
- /** The entity resolver to be used for resolving Dtd catalogs and other
- * entities. */
- private EntityResolver entityResolver;
-
- /** The string to use for formatting Ids for Graphic elements. */
- private String ancestorParaFormatString = "%03d";
-
- /** Map whose key is an Element with an Id, and whose value is the counter of child objects needing Ids encountered
- * so far. */
- private Map<Element, AncestorInfo> counterMap = new HashMap<Element, AncestorInfo>();
-
- /**
- * Stores the information for one ancestor node.
- */
- private class AncestorInfo {
-
- /** The number of Para-level children counted so far. */
- private int paraCount = 0;
- }
-
- /** List of elements whose IDs should not be changed, but for which we need to track a count of descendants whose
- * IDs have been assigned based on this element. */
- private List<String> ancestorLevelElements = Arrays.asList(
- "Book", "Part", "ToC", "Preface", "Chapter", "Float", "Graphic", "Appendices", "Appendix", "IndexFixed");
-
- /** List of elements whose IDs can be assigned based on an ancestor. */
- private List<String> paraLevelElements = Arrays.asList(
- "Para", "Head1", "Head2", "Signature", "Dateline", "PoetryVerse");
-
- /** The location of catalogs to use during parsing. */
- private String[] catalogs;
-
-
- /**
- * Constructor.
- * @param input The input source encapsulating the document to be modified.
- * @param output The output stream to which the modified document should be sent.
- * @param catalog The location of a catalog file that should be used to find the Dtd for this document.
- * This may be null.
- */
- public ForayAssignId(final InputSource input, final OutputStream output, final String catalog) {
- this.input = input;
- this.output = output;
- if (catalog != null) {
- final String[] catalogs = {catalog};
- this.entityResolver = XmlUtils.getEntityResolver(catalogs);
- }
- }
-
- @Override
- public Object parse(final InputSource inputSource) throws IOException, ParserConfigurationException, SAXException {
- final EntityResolver entityResolver = XmlUtils.getEntityResolver(this.catalogs);
- final XMLReader parser = createSax2Parser(true, true, true, entityResolver, true);
- parser.parse(inputSource);
- return null;
- }
-
- /**
- * Intantiates parser and starts parsing of input.
- * @throws IOException For I/O Errors.
- * @throws SAXException For parsing errors.
- * @throws ParserConfigurationException For errors configuring parser.
- * @throws TransformerException When it is impossible to create a {@link Transformer} instance.
- */
- public void start() throws IOException, SAXException, ParserConfigurationException, TransformerException {
- /* Read the input into a DOM. */
- final DocumentBuilder db = createDocumentBuilder();
- final Document doc = db.parse(input);
-
- /* Change the document. */
- changeDocument(doc);
-
- /* Write the modified document. */
- final TransformerFactory tFactory = TransformerFactory.newInstance();
- final Transformer transformer = tFactory.newTransformer();
- if (doc.getDoctype() != null) {
- transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, doc.getDoctype().getPublicId());
- transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, doc.getDoctype().getSystemId());
- }
-
- final DOMSource source = new DOMSource(doc);
- final StreamResult result = new StreamResult(this.output);
- transformer.transform(source, result);
- }
-
- /**
- * Sets up processing for a new {@link Document}.
- * @param doc The document to be processed.
- */
- private void changeDocument(final Document doc) {
- final DocumentTraversal dt = (DocumentTraversal) doc;
- final NodeIterator ni = dt.createNodeIterator(doc.getDocumentElement(), NodeFilter.SHOW_ELEMENT, null, false);
- Element element = (Element) ni.nextNode();
- while (element != null) {
- final Attr idAttribute = element.getAttributeNode("Id");
- if (idAttribute != null) {
- final String newValue = computeIdValue(element);
- idAttribute.setValue(newValue);
- }
- element = (Element) ni.nextNode();
- }
- }
-
-
- /**
- * Computes the "Id" for a given element.
- * @param element The element for which an "Id" should be computed.
- * @return The computed "Id" for {@code element}.
- */
- private String computeIdValue(final Element element) {
- AncestorInfo ancestorInfo = null;
- if (paraLevelElements.contains(element.getTagName())) {
- ancestorInfo = new AncestorInfo();
- this.counterMap.put(element, ancestorInfo);
- return computeParaIdValue(element);
- }
-
- if (ancestorLevelElements.contains(element.getTagName())) {
- ancestorInfo = new AncestorInfo();
- this.counterMap.put(element, ancestorInfo);
- return element.getAttribute("Id");
- }
-
- return "unknown";
- }
-
- /**
- * Computes the "Id" for a given paragraph-level element.
- * @param element The paragraph-level element for which an "Id" should be computed.
- * @return The computed "Id" for {@code element}.
- */
- private String computeParaIdValue(final Element element) {
- final Element ancestor = findAncestorWithId(element);
- final AncestorInfo info = counterMap.get(ancestor);
- if (info == null) {
- throw new IllegalStateException("Ancestor Info not found for element " + element.getTagName() +
- ", Id " + element.getAttribute("Id"));
- }
- String prefix = StringUtils.EMPTY_STRING;
-
- if (this.paraLevelElements.contains(ancestor.getTagName())) {
- /* If the ancestor is itself a Para, then this is probably the child of a footnote. Use the ancestor's
- * actual ID (instead of the descendant prefix) as the root of this element's ID. */
- prefix = ancestor.getAttribute("Id") + "-";
- } else {
- if (ancestor.hasAttribute("DescendantIdPrefix")) {
- prefix = ancestor.getAttribute("DescendantIdPrefix") + "-";
- }
- }
-
- return prefix + "p" + String.format(ancestorParaFormatString, ++info.paraCount);
- }
-
- /**
- * For a given {@link Element}, finds its nearest ancestor node that has an "Id" attribute.
- * @param element The element for which an "Id" ancestor is wanted.
- * @return The nearest ancestor element that has an "Id" attribute, or null if none is found.
- */
- private Element findAncestorWithId(final Element element) {
- Element ancestor = (Element) element.getParentNode();
- while (! ancestor.hasAttribute("Id")) {
- ancestor = (Element) ancestor.getParentNode();
- }
- return ancestor;
- }
-
- /**
- * Creates a standard {@link DocumentBuilder}.
- * @return A new document builder.
- * @throws ParserConfigurationException For errors creating or configuring the document builder.
- */
- private DocumentBuilder createDocumentBuilder() throws ParserConfigurationException {
- final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
-
- /* The following section sets the factory with parser features. Not sure whether this works. */
-
- /* Turn on namespace-prefixes so that we get the namespace declarations
- * returned with other attributes and can therefore write them out
- * along with them. */
- try {
- dbf.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
- } catch (final ParserConfigurationException e) {
- errorMessage("Parser does not recognize the \"namespace-prefixes\" feature.");
- }
-
- /* Turn on validation if it is available. */
- try {
- dbf.setFeature("http://xml.org/sax/features/validation", true);
- } catch (final ParserConfigurationException e1) {
- errorMessage("Parser does not recognize the \"validation\" feature.");
- }
-
- /* Turn on "notify-char-refs" feature.
- * Sadly, this only works with Xerces.
- * This feature, or something like it is very important.
- * Without it, character entities get transformed into characters
- * without notification.
- * When notified, we can (and do) ignore the transformed characters
- * and use the character entities instead.
- * We do NOT want to change the user's content. */
- try {
- dbf.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
- } catch (final ParserConfigurationException e) {
- /* Make this a fatal error. */
- errorMessage("Parser cannot report character entities. Aborting.");
- return null;
- }
-
- final DocumentBuilder db = dbf.newDocumentBuilder();
- db.setEntityResolver(this.entityResolver);
- return db;
- }
-
- @Override
- public void reset() {
- throw new UnsupportedOperationException();
- }
-
- /**
- * Returns the command-line options for the {@link #main(String[])} method.
- * @return Command-line options.
- */
- private static Options getCommandLineOptions() {
- final Options clOptions = new Options();
- final Option input = new Option("i", "input", true, "path to the input file");
- input.setRequired(true);
- final Option output = new Option("o", "output", true, "path to the output file");
- output.setRequired(true);
- final Option catalog = new Option("c", "catalog", true, "path to the OASIS XML catalog");
- output.setRequired(true);
- clOptions.addOption(input);
- clOptions.addOption(output);
- clOptions.addOption(catalog);
- return clOptions;
- }
-
- /**
- * Command-line interface for modifying an XML document.
- *
- * <p>Design Note: We use files instead of URLs here to more easily integrate with desktop tools such as XML
- * editors, which typically know how to pass parameters for local files, but not how to convert them to URLs.
- * If URLs are needed, additional flags probably need to be added to the command-line interface to designate
- * that.</p>
- *
- * <p>Return status is one of:</p>
- * <ul>
- * <li>0 (success)</li>
- * <li>1 (wrong quantity of arguments)</li>
- * <li>2 (file not found)</li>
- * <li>3 (parsing error)</li>
- * </ul>
- *
- * @param args command-line arguments.
- * Argument 1 is the location of the input file.
- * Argument 2 is the location of the output file.
- * Argument 3 is an optional location of an OASIS-compliant catalog file that can be used to locate local DTDs.
- */
- public static void main(final String[] args) {
- final Logger logger = LoggerFactory.getLogger(ForayAssignId.class);
- final Options commandLineOptions = ForayAssignId.getCommandLineOptions();
- final CommandLineParser commandLineParser = new DefaultParser();
- CommandLine parsedCommandLine = null;
- try {
- parsedCommandLine = commandLineParser.parse(commandLineOptions, args);
- } catch (final ParseException e) {
- logger.error(e.getMessage(), e);
- final HelpFormatter helpFormatter = new HelpFormatter();
- helpFormatter.printHelp("java -cp $FORAY_CLASSPATH " + ForayAssignId.class.getName(), commandLineOptions,
- true);
- /* CheckStyle: Allow System.exit() in main method. */
- System.exit(ForayAssignId.STATUS_COMMAND_LINE_ERROR);
- }
-
- final String input = parsedCommandLine.getOptionValue("input");
- final String output = parsedCommandLine.getOptionValue("output");
- final String catalog = parsedCommandLine.getOptionValue("catalog");
-
- InputStream inputStream = null;
- try {
- FileInputStream fis = null;
- fis = new FileInputStream(input);
- inputStream = new BufferedInputStream(fis);
- } catch (final FileNotFoundException e) {
- logger.error("File cannot be opened for input: " + input, e);
- /* CheckStyle: Allow System.exit() in main method. */
- System.exit(ForayAssignId.STATUS_FILE_NOT_FOUND);
- }
- final InputSource inputSource = new InputSource(inputStream);
- OutputStream outputStream = null;
- try {
- final FileOutputStream fos = new FileOutputStream(output);
- outputStream = new BufferedOutputStream(fos);
- } catch (final FileNotFoundException e) {
- logger.error("File cannot be opened for output: " + output, e);
- /* CheckStyle: Allow System.exit() in main method. */
- System.exit(ForayAssignId.STATUS_FILE_NOT_FOUND);
- }
- final ForayAssignId processor = new ForayAssignId(inputSource, outputStream, catalog);
- try {
- processor.start();
- } catch (final IOException | ParserConfigurationException | SAXException | TransformerException e) {
- logger.error("Error parsing input.", e);
- /* CheckStyle: Allow System.exit() in main method. */
- System.exit(ForayAssignId.STATUS_PARSING_ERROR);
- }
- logger.info("Input: " + input + " successfully modified,\n output in: " + output);
- }
-
-}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-07-13 22:20:40
|
Revision: 12712
http://sourceforge.net/p/foray/code/12712
Author: victormote
Date: 2022-07-13 22:20:27 +0000 (Wed, 13 Jul 2022)
Log Message:
-----------
Use new aXSL elements to configure explicit tokens.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2022-07-10 12:47:53 UTC (rev 12711)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2022-07-13 22:20:27 UTC (rev 12712)
@@ -6,6 +6,10 @@
<axsl-orthography-config>
+ <explicit-token-list id="eng-Latn-explicit-tokens">
+ <explicit-token end-of-sentence="never">i.e.</explicit-token>
+ </explicit-token-list>
+
<match-rule-list id="eng-Latn-match-rules">
<match desc="Arabic digits">^[0-9]+$</match>
<match desc="Formatted Arabic digits">^[0-9]{1,3}(,[0-9]{3})*(\.[0-9]*)?$</match>
@@ -252,13 +256,14 @@
</hyphenation-patterns-resource>
<configuration>
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"
+ language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
+ <explicit-tokens reference="eng-Latn-explicit-tokens"/>
<match-rules reference="eng-Latn-match-rules"/>
<derivative-rules reference="eng-Latn-derivative-patterns"/>
<dictionary reference="dictionary-eng-moby"/>
<hyphenation-patterns reference="hyph-patterns-eng"/>
<derivative-factories reference="eng-Latn-derivatives"/>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"
- language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
<orthography language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
<orthography language-iso-3char="eng" script-iso-4char="Zyyy" country-iso-3char="USA"/>
<orthography language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
@@ -266,21 +271,21 @@
</configuration>
<configuration>
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
<dictionary reference="dictionary-latin"/>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
<orthography language-iso-3char="lat" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
</configuration>
<configuration>
- <dictionary reference="dictionary-italian"/>
<lexer class="org.foray.orthography.LexerJavaBreakIterator"
language-iso-3char="eng" script-iso-4char="Latn" country-iso-3char="USA"/>
+ <dictionary reference="dictionary-italian"/>
<orthography language-iso-3char="ita" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
</configuration>
<configuration>
+ <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
<dictionary reference="dictionary-french"/>
- <lexer class="org.foray.orthography.LexerJavaBreakIterator"/>
<orthography language-iso-3char="fre" script-iso-4char="Latn" country-iso-3char="ZZZ"/>
</configuration>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-07-10 12:47:56
|
Revision: 12711
http://sourceforge.net/p/foray/code/12711
Author: victormote
Date: 2022-07-10 12:47:53 +0000 (Sun, 10 Jul 2022)
Log Message:
-----------
Document some remaining tasks related to spell-checking.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2022-07-09 18:24:39 UTC (rev 12710)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2022-07-10 12:47:53 UTC (rev 12711)
@@ -61,15 +61,29 @@
* <li>Tokenize the text based on the refined boundary types.</li>
* </ol>
*
+ * <p>TODO: We need to handle the abbreviation problem mentioned above. ICU4J handles the problem at the sentence level,
+ * but not at the word level.
+ * (See test org.foray.orthography.LexerEnglishIcu4jTests#testEmbeddedAbbreviation1() where this is demonstrated).
+ * One possible solution is to use ICU4J to first break text into sentences, then break each sentence into words.
+ * ICU4J appears to use the Unicode Common Locale Data Repository (CLDR) to manage the data related to this issue.
+ * CLDR maintains the abbreviation data in XML files found in common/segments in elements found at XPath
+ * ldml/segmentations/segmentation/suppressions/suppression.
+ * It is doubtful that relying on this data will be flexible enough to handle abbreviations that will be needed for the
+ * tasks this lexer needs to support, especially spell-checking.
+ * Modifying the segments data file would mean managing additions we make as diffs to that data, and would introduce
+ * the need to build ICU4J as part of our build process.
+ * It may make sense to modify our word-breaking algorithm instead, to identify the ambiguous characters, read backward
+ * to the beginning of the word, and lookup our own abbreviations database for a match.
+ * Whatever solution is chosen, we need to remove the klunky workaround entry for "i.e" in
+ * src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml.</p>
+ *
* @see <a href="https://www.unicode.org/reports/tr29/#Word_Boundaries">Unicode Standard Annex #29, Unicode Text
* Segmentation</a>
+ * @see <a href="https://sujitpal.blogspot.com/2008/05/tokenizing-text-with-icu4js.html">Tokenizing Text with ICU4j's
+ * RuleBasedBreakIterator</a>
+ * @see <a href="https://cldr.unicode.org">Unicode CLDR Project</a>
*/
public abstract class Lexer4a implements Lexer {
- /*
- * TODO: After this class was written, I found the following interesting blog post, which should be considered
- * further:
- * https://sujitpal.blogspot.com/2008/05/tokenizing-text-with-icu4js.html
- */
/**
* Enumeration of possible character types, as they relate to word-breaking.
@@ -146,7 +160,7 @@
}
/**
- * Uses the Java BreakIterator to find the breaks that it detects.
+ * Uses a BreakIterator to find the breaks that it detects.
* @param sequence The sequence whose breaks are needed.
* @return The sequence of breaks, indexes into {@code sequence}.
*/
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2022-07-09 18:24:39 UTC (rev 12710)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2022-07-10 12:47:53 UTC (rev 12711)
@@ -76,11 +76,32 @@
import javax.xml.parsers.ParserConfigurationException;
/**
- * Parses a generic XML document, looking for spelling errors.
- * The specified natural language can change at any time, and must be tracked.
- * General plan is to capture all of the text data in one CharSequence, then parse and compare to dictionary entries.
- * TODO: The element-specific configuration items in this class (e.g. {@link #elementIgnoreList} and
+ * <p>Parses a generic XML document, looking for spelling errors.
+ * The specified natural language can change at any time, and must be tracked.</p>
+ *
+ * <p>TODO: The element-specific configuration items in this class (e.g. {@link #elementIgnoreList} and
* {@link #textElementMap} should be externalized so that they are configurable.
+ * One possible method for handling this general problem is to create an XML schema/DTD that is specifically designed to
+ * handle spell-checking, and use XSLT stylesheets to transform to that schema.
+ * This class would parse only the new DTD.</p>
+ *
+ * <p>TODO: Leading-attached and trailing-attached punctuation can be separated from their words by being processed at
+ * different times.
+ * For example, assuming a base language of "eng", consider</p>
+ * <pre>(<ForeignPhrase xml:lang="fre">en route</ForeignPhrase> from ...</pre>.
+ * <p>In this case, the opening parenthesis will be treated as a word because it is in a different language than the
+ * French phrase following.
+ * One possible solution is to leave some tokens in the buffer so that they can be evaluated with subsequent tokens
+ * before assigning them word/non-word status.</p>
+ *
+ * <p>TODO: Handle multi-word dictionary items better.
+ * In the dictionary, we need to distinguish between phrases that are the concatenation of 1) two or more valid words
+ * in their own right, and 2) two or more words that are not valid words.
+ * Those in the first class don't really belong in a spell-check dictionary, although they may have other uses.
+ * Those in the second class need to be handled specially.
+ * For example, the location "Sao Paulo" contains two words, neither of which is a valid English word.
+ * However, an English dictionary might include the two together as a valid English phrase.
+ * We need to add support for this in the dictionary and in this class.</p>
*/
public class SpellChecker extends SaxParser<Object> {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-07-09 18:24:43
|
Revision: 12710
http://sourceforge.net/p/foray/code/12710
Author: victormote
Date: 2022-07-09 18:24:39 +0000 (Sat, 09 Jul 2022)
Log Message:
-----------
Upgrade ICU4J to 71.1. Add test-like demonstration of how it handles embedded abbreviations, and document conclusion that we cannot use it as-is.
Modified Paths:
--------------
trunk/foray/buildSrc/src/main/groovy/foray.common-conventions.gradle
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java
Modified: trunk/foray/buildSrc/src/main/groovy/foray.common-conventions.gradle
===================================================================
--- trunk/foray/buildSrc/src/main/groovy/foray.common-conventions.gradle 2022-07-05 21:46:26 UTC (rev 12709)
+++ trunk/foray/buildSrc/src/main/groovy/foray.common-conventions.gradle 2022-07-09 18:24:39 UTC (rev 12710)
@@ -39,7 +39,7 @@
xmlgraphicsCommons: '2.6', // Latest is 2.6 as of 2022-01-17.
batik: '1.14', // Latest is 1.14 as of 2022-01-17.
jeuclid: '3.1.14', // Latest is 3.1.14 as of 2021-01-12. See Note 7.
- icu4j: '68.2', // Latest is 68.2 as of 2021-01-12.
+ icu4j: '71.1', // Latest is 71.1 as of 2022-07-09.
junit: '4.13.2', // Latest is 5.8.2 as of 2022-01-17. See Note 8.
mockito: '2.28.2', // Latest is 3.7.0 as of 2021-01-12. See Note 9.
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2022-07-05 21:46:26 UTC (rev 12709)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2022-07-09 18:24:39 UTC (rev 12710)
@@ -42,10 +42,14 @@
import java.util.List;
/**
- * <p>Implementations know how to break a character sequence into words and interword content.
- * Where possible, the process of "lexing" or "tokenizing" being done here should be done without reference to
- * word dictionaries.
- * We hope to split the content between words and non-words using information about the characters only.</p>
+ * <p>FOray implementation of {@link Lexer}.
+ * Although we would hope to split the content between words and non-words using information about the characters only,
+ * that appears to be impossible for some (perhaps all) languages.
+ * For example, for an abbreviation used in many Latin languages, "i.e.", there does not appear to be a way, other than
+ * by using a dictionary to determine that it is all one word, instead of the word "i.e" followed by a
+ * sentence-terminating period.
+ * This arises from the ambiguity of the FULL_STOP or period "." character being used both as a signal for full stop and
+ * as a signal for an abbreviation.</p>
*
* <p>The general process used by implementation of this class is as follows:</p>
* <ol>
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java 2022-07-05 21:46:26 UTC (rev 12709)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishIcu4jTests.java 2022-07-09 18:24:39 UTC (rev 12710)
@@ -30,8 +30,14 @@
import org.foray.common.i18n.WritingSystem4a;
+import com.ibm.icu.text.BreakIterator;
+
+import org.junit.Assert;
import org.junit.Before;
+import org.junit.Test;
+import java.util.Locale;
+
/**
* Tests of {@link LexerLatin1}.
*/
@@ -53,4 +59,49 @@
return this.out;
}
+ /**
+ * Test of a sentence containing an abbreviation that might be interpreted as ending the sentence.
+ * This is more of a demo of ICU4J than a test.
+ */
+ @Test
+ public void testEmbeddedAbbreviation1() {
+ /* With apologies to William Shakespeare, The Tempest, Act I, Scene 2. */
+ // 00000000001111111111222222222233333333334444
+ // 01234567890123456789012345678901234567890123
+ final String testString = "Hell is empty, i.e. all the devils are here.";
+
+ final BreakIterator sentenceIterator = BreakIterator.getSentenceInstance(Locale.US);
+ sentenceIterator.setText(testString);
+ Assert.assertEquals(0, sentenceIterator.first());
+ Assert.assertEquals(44, sentenceIterator.next());
+ Assert.assertEquals(-1, sentenceIterator.next());
+
+ final BreakIterator wordIterator = BreakIterator.getWordInstance(Locale.US);
+ wordIterator.setText(testString);
+ Assert.assertEquals(0, wordIterator.first());
+ Assert.assertEquals(4, wordIterator.next());
+ Assert.assertEquals(5, wordIterator.next());
+ Assert.assertEquals(7, wordIterator.next());
+ Assert.assertEquals(8, wordIterator.next());
+ Assert.assertEquals(13, wordIterator.next());
+ Assert.assertEquals(14, wordIterator.next());
+ Assert.assertEquals(15, wordIterator.next());
+ Assert.assertEquals(18, wordIterator.next()); // This one causes a problem.
+ Assert.assertEquals(19, wordIterator.next());
+ Assert.assertEquals(20, wordIterator.next());
+ Assert.assertEquals(23, wordIterator.next());
+ Assert.assertEquals(24, wordIterator.next());
+ Assert.assertEquals(27, wordIterator.next());
+ Assert.assertEquals(28, wordIterator.next());
+ Assert.assertEquals(34, wordIterator.next());
+ Assert.assertEquals(35, wordIterator.next());
+ Assert.assertEquals(38, wordIterator.next());
+ Assert.assertEquals(39, wordIterator.next());
+ Assert.assertEquals(43, wordIterator.next());
+ Assert.assertEquals(44, wordIterator.next());
+ Assert.assertEquals(-1, wordIterator.next());
+
+ /* Conclusion: Although the ICU4J sentence iterator correctly handles "i.e.", its word iterator does not. */
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-07-05 21:46:29
|
Revision: 12709
http://sourceforge.net/p/foray/code/12709
Author: victormote
Date: 2022-07-05 21:46:26 +0000 (Tue, 05 Jul 2022)
Log Message:
-----------
Improvements to dictionaries.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2022-07-05 17:31:27 UTC (rev 12708)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2022-07-05 21:46:26 UTC (rev 12709)
@@ -9,20 +9,25 @@
<w><t>be-hoof</t><noun/></w>
<w><t>Car-tha-gen-i-an</t><noun><pluralizable/><convertible-to-possessive/></noun><comment>Carthaginian.</comment></w>
-<w><t>hum-bleth</t></w>
+<w><t>ceil</t><verb><regular-root/></verb></w>
+<w><t>ceil-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>ex-ult-eth</t><verb><regular-root value="false"/></verb></w>
+<w><t>hum-bleth</t><verb><regular-root value="false"/></verb></w>
<w><t>Jno</t><abbrev referenced-word="John"/></w>
<w><t>Kal-a-bar</t><comment>Calabar</comment></w>
<w><t>Kam-e-run</t><comment>Cameroon</comment></w>
<w><t>Kam-e-runs</t><comment>Related to Cameroon</comment></w>
+<w><t>liv-eth</t><verb><regular-root value="false"/></verb></w>
<w><t>lo</t><interjection/><comment>Imperative of "look".</comment></w>
-<w><t>lov-eth</t></w>
+<w><t>lov-eth</t><verb><regular-root value="false"/></verb></w>
<w><t>oth-er-ways</t><adjective/><adverb/></w>
-<w><t>per-suad-est</t><verb/></w>
+<w><t>per-suad-est</t><verb><regular-root value="false"/></verb></w>
<w><t>pre-ëm-i-nence</t></w>
<w><t>pre-ëm-i-nent-ly</t><adverb/></w>
-<w><t>seek-est</t></w>
-<w><t>speak-eth</t></w>
+<w><t>seek-est</t><verb><regular-root value="false"/></verb></w>
+<w><t>sleep-est</t><verb><regular-root value="false"/></verb></w>
+<w><t>speak-eth</t><verb><regular-root value="false"/></verb></w>
<w><t>un-lade</t><verb><regular-root/></verb></w>
-<w><t>walk-eth</t></w>
+<w><t>walk-eth</t><verb><regular-root value="false"/></verb></w>
</axsl-dictionary>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2022-07-05 17:31:27 UTC (rev 12708)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2022-07-05 21:46:26 UTC (rev 12709)
@@ -856,7 +856,7 @@
<w><t>Ac-ci-us</t></w>
<w><t>ac-claim</t></w>
<w><t>ac-claim-er</t></w>
-<w><t>ac-cla-ma-tion</t></w>
+<w><t>ac-cla-ma-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ac-clam-a-to-ry</t></w>
<w><t>ac-cli-mat-a-ble</t></w>
<w><t>ac-cli-mate</t></w>
@@ -885,7 +885,7 @@
<w><t>ac-com-mo-dat-ed</t></w>
<w><t>ac-com-mo-dat-ing</t></w>
<w><t>ac-com-mo-dat-ing-ly</t></w>
-<w><t>ac-com-mo-da-tion</t></w>
+<w><t>ac-com-mo-da-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>ac-com-mo-da-tion ad-dress</t></phrase>
<w><t>ac-com-mo-da-tion-al</t></w>
<phrase><t>ac-com-mo-da-tion bill</t></phrase>
@@ -1365,8 +1365,8 @@
<phrase><t>a-cous-tic pho-net-ics</t></phrase>
<w><t>a-cous-tics</t></w>
<w><t>acpt</t></w>
-<w><t>ac-quaint</t></w>
-<w><t>ac-quaint-ance</t></w>
+<w><t>ac-quaint</t><verb><regular-root/></verb></w>
+<w><t>ac-quaint-ance</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ac-quaint-ance-ship</t></w>
<w><t>ac-quaint-ed</t></w>
<w><t>ac-quaint-ed-ness</t></w>
@@ -1398,7 +1398,7 @@
<w><t>A-crae-a</t></w>
<w><t>a-cral-de-hyde</t></w>
<w><t>A-cre</t></w>
-<w><t>a-cre</t></w>
+<w><t>a-cre</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>a-cre-age</t></w>
<w><t>a-cred</t></w>
<w><t>a-cre=foot</t></w>
@@ -2076,7 +2076,7 @@
<w><t>a-dor-er</t></w>
<w><t>a-dor-ing</t></w>
<w><t>a-dor-ing-ly</t></w>
-<w><t>a-dorn</t></w>
+<w><t>a-dorn</t><verb><regular-root/></verb></w>
<w><t>a-dorn-er</t></w>
<w><t>a-dorn-ing-ly</t></w>
<w><t>a-dorn-ment</t></w>
@@ -4244,7 +4244,7 @@
<w><t>al-le-vi-a-to-ry</t></w>
<w><t>all=ex-pense</t></w>
<w><t>all=ex-pens-es=paid</t></w>
-<w><t>al-ley</t></w>
+<w><t>al-ley</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>al-ley cat</t></phrase>
<w><t>al-ley-fired-est</t></w>
<w><t>al-ley-way</t></w>
@@ -4386,7 +4386,7 @@
<w><t>all-seed</t></w>
<w><t>all-spice</t></w>
<w><t>All-ston</t></w>
-<w><t>al-lude</t></w>
+<w><t>al-lude</t><verb><regular-root/></verb></w>
<w><t>al-lud-ed</t></w>
<w><t>al-lud-ing</t></w>
<w><t>al-lure</t></w>
@@ -4717,7 +4717,8 @@
<w><t>a-lu-mi-nous</t></w>
<w><t>a-lu-mi-num</t></w>
<w><t>a-lum-na</t></w>
-<w><t>a-lum-nus</t></w>
+<w><t>a-lum-ni</t><noun><plural/></noun></w>
+<w><t>a-lum-nus</t><noun><singular/></noun></w>
<w><t>al-um-root</t></w>
<w><t>A-lun-dum</t></w>
<w><t>al-u-nite</t></w>
@@ -4788,7 +4789,8 @@
<w><t>a-man-dine</t></w>
<w><t>A-man-ist</t></w>
<w><t>am-a-ni-ta</t></w>
-<w><t>a-man-u-en-sis</t></w>
+<w><t>a-man-u-en-ses</t><noun><plural/></noun></w>
+<w><t>a-man-u-en-sis</t><noun><singular/></noun></w>
<w><t>A-ma-p</t></w>
<w><t>A-ma-pá</t></w>
<w><t>A-mar-a</t></w>
@@ -4804,7 +4806,7 @@
<w><t>A-mar-yn-ceus</t></w>
<w><t>A-ma-sa</t></w>
<w><t>Am-a-si-as</t></w>
-<w><t>a-mass</t></w>
+<w><t>a-mass</t><verb><regular-root/></verb></w>
<w><t>a-mass-a-ble</t></w>
<w><t>a-mass-er</t></w>
<w><t>a-mass-ment</t></w>
@@ -6621,7 +6623,7 @@
<w><t>an-oes-trus</t></w>
<w><t>an-o-et-ic</t></w>
<w><t>a-noi-a</t></w>
-<w><t>a-noint</t></w>
+<w><t>a-noint</t><verb><regular-root/></verb></w>
<w><t>a-noint-er</t></w>
<w><t>a-noint-ment</t></w>
<w><t>a-nole</t></w>
@@ -9459,8 +9461,8 @@
<w><t>Arm-co</t></w>
<w><t>armed</t></w>
<phrase><t>armed forc-es</t></phrase>
-<w><t>Ar-me-ni-a</t></w>
-<w><t>Ar-me-ni-an</t></w>
+<w><t>Ar-me-ni-a</t><noun><convertible-to-possessive/></noun></w>
+<w><t>Ar-me-ni-an</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Ar-me-ni-an Church</t></phrase>
<w><t>Ar-me-noid</t></w>
<w><t>Ar-men-ti</t></w>
@@ -11727,7 +11729,7 @@
<w><t>Av-en-tine</t></w>
<w><t>a-ven-tu-rin</t></w>
<w><t>a-ven-tu-rine</t></w>
-<w><t>av-e-nue</t></w>
+<w><t>av-e-nue</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>a-ver</t></w>
<w><t>av-er-age</t></w>
<w><t>av-er-aged</t></w>
@@ -12249,7 +12251,7 @@
<w><t>ba-cil-lus</t></w>
<w><t>Ba-cis</t></w>
<w><t>bac-i-tra-cin</t></w>
-<w><t>back</t></w>
+<w><t>back</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>back-ache</t></w>
<w><t>back=al-ley</t></w>
<w><t>back-band</t></w>
@@ -13958,7 +13960,7 @@
<w><t>bat-fowl-er</t></w>
<w><t>Bath</t></w>
<w><t>bath</t></w>
-<w><t>bathe</t></w>
+<w><t>bathe</t><verb><regular-root/></verb></w>
<w><t>bath-er</t></w>
<w><t>bath-ers</t></w>
<w><t>ba-thet-ic</t></w>
@@ -15417,7 +15419,7 @@
<w><t>Bert</t></w>
<w><t>Ber-ta</t></w>
<w><t>Ber-taud</t></w>
-<w><t>berth</t></w>
+<w><t>berth</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Ber-tha</t></w>
<w><t>ber-tha</t></w>
<w><t>berth-age</t></w>
@@ -15941,7 +15943,7 @@
<w><t>bi-cy-clic</t></w>
<w><t>bi-cy-cli-cal</t></w>
<w><t>bi-cy-clist</t></w>
-<w><t>bid</t></w>
+<w><t>bid</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Bi-da</t></w>
<w><t>bi-dar-ka</t></w>
<w><t>bi-dar-kee</t></w>
@@ -16183,7 +16185,7 @@
<phrase><t>bill of in-dict-ment</t></phrase>
<phrase><t>bill of lad-ing</t></phrase>
<w><t>bil-lon</t></w>
-<w><t>bil-low</t></w>
+<w><t>bil-low</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>bil-low-i-er</t></w>
<w><t>bil-low-i-est</t></w>
<w><t>bil-low-i-ness</t></w>
@@ -19333,7 +19335,7 @@
<w><t>breadth-wise</t></w>
<w><t>bread-win-ner</t></w>
<w><t>bread-win-ning</t></w>
-<w><t>break</t></w>
+<w><t>break</t><verb><regular-root value="false"/></verb></w>
<w><t>break-a-ble</t></w>
<w><t>break-a-ble-ness</t></w>
<w><t>break-a-bly</t></w>
@@ -19357,6 +19359,7 @@
<w><t>break-out</t></w>
<w><t>break-o-ver</t></w>
<w><t>break-point</t></w>
+<w><t>breaks</t><verb><regular-root value="false"/></verb></w>
<w><t>break-through</t></w>
<w><t>break-up</t></w>
<w><t>break-wa-ter</t></w>
@@ -19817,9 +19820,9 @@
<w><t>Br-l</t></w>
<w><t>Br-no</t></w>
<w><t>bro</t></w>
-<w><t>broach</t></w>
+<w><t>broach</t><verb><regular-root/></verb></w>
<w><t>broach-er</t></w>
-<w><t>broad</t></w>
+<w><t>broad</t><adjective><extensible/></adjective></w>
<w><t>Broad</t></w>
<phrase><t>broad ar-row</t></phrase>
<w><t>broad-ax</t></w>
@@ -20964,7 +20967,7 @@
<w><t>burn=up</t></w>
<w><t>bu-roo</t></w>
<w><t>burp</t></w>
-<w><t>burr</t></w>
+<w><t>burr</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Burr</t></w>
<w><t>bur-ra-wang</t></w>
<w><t>burred</t></w>
@@ -21433,7 +21436,7 @@
<w><t>cab-e-zo-nes</t></w>
<w><t>ca-bil-do</t></w>
<w><t>Ca-bi-mas</t></w>
-<w><t>cab-in</t></w>
+<w><t>cab-in</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>cab-in boy</t></phrase>
<phrase><t>cab-in class</t></phrase>
<w><t>cab-in=class</t></w>
@@ -21601,7 +21604,7 @@
<w><t>Ca-den-za</t></w>
<w><t>ca-den-za</t></w>
<w><t>Ca-det</t></w>
-<w><t>ca-det</t></w>
+<w><t>ca-det</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ca-det-cy</t></w>
<w><t>ca-det-ship</t></w>
<w><t>ca-dette</t></w>
@@ -22064,7 +22067,7 @@
<w><t>cal-lus-es</t></w>
<w><t>cal-lus-ing</t></w>
<w><t>Cal-ly</t></w>
-<w><t>calm</t></w>
+<w><t>calm</t><noun><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>cal-ma-tive</t></w>
<w><t>calm-i-er</t></w>
<w><t>calm-i-est</t></w>
@@ -24712,8 +24715,6 @@
<w><t>CEGB</t></w>
<w><t>cei-ba</t></w>
<w><t>cei-bo</t></w>
-<w><t>ceil</t></w>
-<w><t>ceil-er</t></w>
<w><t>cei-lidh</t></w>
<w><t>ceil-ing</t></w>
<w><t>ceil-inged</t></w>
@@ -24899,7 +24900,7 @@
<w><t>cen-sur-ing</t></w>
<w><t>cen-sus</t></w>
<w><t>cen-sus-es</t></w>
-<w><t>cent</t></w>
+<w><t>cent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cen-tal</t></w>
<w><t>cen-tare</t></w>
<w><t>cen-tas</t></w>
@@ -26727,7 +26728,7 @@
<w><t>Chin</t></w>
<w><t>chi-n</t></w>
<w><t>chi-na</t></w>
-<w><t>Chi-na</t></w>
+<w><t>Chi-na</t><noun><convertible-to-possessive/></noun></w>
<phrase><t>Chi-na as-ter</t></phrase>
<phrase><t>chi-na bark</t></phrase>
<w><t>chi-na-ber-ry</t></w>
@@ -27050,7 +27051,7 @@
<w><t>Choc-taw=root</t></w>
<w><t>Cho-ëph-o-ri</t></w>
<w><t>Chog-yal</t></w>
-<w><t>choice</t></w>
+<w><t>choice</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective><extensible/></adjective></w>
<w><t>choice-less</t></w>
<w><t>choice-ly</t></w>
<w><t>choice-ness</t></w>
@@ -28913,7 +28914,7 @@
<w><t>Cllr</t></w>
<w><t>clo-a-ca</t></w>
<w><t>clo-a-cal</t></w>
-<w><t>cloak</t></w>
+<w><t>cloak</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cloak=and=dag-ger</t></w>
<w><t>cloak=and=suit-er</t></w>
<w><t>cloak-ed-ly</t></w>
@@ -30325,7 +30326,7 @@
<w><t>col-u-mel-lar</t></w>
<w><t>col-u-mel-late</t></w>
<w><t>col-u-mel-li-form</t></w>
-<w><t>col-umn</t></w>
+<w><t>col-umn</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>co-lum-nal</t></w>
<w><t>co-lum-nar</t></w>
<w><t>col-um-nar-i-ty</t></w>
@@ -30637,7 +30638,7 @@
<w><t>com-mis-sar-i-at</t></w>
<w><t>com-mis-sar-ies</t></w>
<w><t>com-mis-sar-y</t></w>
-<w><t>com-mis-sion</t></w>
+<w><t>com-mis-sion</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>com-mis-sion-aire</t></w>
<w><t>com-mis-sion-al</t></w>
<phrase><t>com-mis-sioned of-fic-er</t></phrase>
@@ -30757,7 +30758,7 @@
<w><t>com-mu-ni-ca-ble</t></w>
<w><t>com-mu-ni-ca-ble-ness</t></w>
<w><t>com-mu-ni-ca-bly</t></w>
-<w><t>com-mu-ni-cant</t></w>
+<w><t>com-mu-ni-cant</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>com-mu-ni-cate</t></w>
<w><t>com-mu-ni-cat-ed</t></w>
<w><t>com-mu-ni-cat-ing</t></w>
@@ -31301,7 +31302,7 @@
<w><t>con-cept</t></w>
<w><t>con-cep-ta-cle</t></w>
<w><t>con-cep-tac-u-lar</t></w>
-<w><t>con-cep-tion</t></w>
+<w><t>con-cep-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-cep-tion-al</t></w>
<w><t>con-cep-tive</t></w>
<w><t>con-cep-tu-al</t></w>
@@ -31599,7 +31600,7 @@
<w><t>con-dy-lo-mas</t></w>
<w><t>con-dy-lo-ma-ta</t></w>
<w><t>con-dy-lom-a-tous</t></w>
-<w><t>cone</t></w>
+<w><t>cone</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cone-flow-er</t></w>
<w><t>Con-el-rad</t></w>
<w><t>cone-nose</t></w>
@@ -31878,7 +31879,8 @@
<w><t>con-gre-ga-tion-al</t></w>
<phrase><t>Con-gre-ga-tion-al Church</t></phrase>
<w><t>Con-gre-ga-tion-al-ism</t></w>
-<w><t>con-gre-ga-tion-al-ism</t></w>
+<w><t>con-gre-ga-tion-al-ism</t><noun><convertible-to-possessive/></noun></w>
+<w><t>con-gre-ga-tion-al-ist</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-gre-ga-tion-al-ly</t></w>
<w><t>con-gre-ga-tive</t></w>
<w><t>con-gre-ga-tive-ness</t></w>
@@ -32304,7 +32306,7 @@
<w><t>con-sti-pat-ing</t></w>
<w><t>con-sti-pa-tion</t></w>
<w><t>con-stit-u-en-cy</t></w>
-<w><t>con-stit-u-ent</t></w>
+<w><t>con-stit-u-ent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>con-stit-u-ent-ly</t></w>
<w><t>con-sti-tute</t></w>
<w><t>con-sti-tut-ed</t></w>
@@ -33688,7 +33690,7 @@
<w><t>cor-ral-ling</t></w>
<w><t>cor-ra-sion</t></w>
<w><t>cor-ra-sive</t></w>
-<w><t>cor-rect</t></w>
+<w><t>cor-rect</t><verb><regular-root/></verb><adjective/></w>
<w><t>cor-rect-a-ble</t></w>
<w><t>cor-rect-ed-ness</t></w>
<w><t>cor-rect-i-ble</t></w>
@@ -34797,7 +34799,7 @@
<w><t>cram-ming-ly</t></w>
<w><t>cram-oi-sie</t></w>
<w><t>cram-oi-sy</t></w>
-<w><t>cramp</t></w>
+<w><t>cramp</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>cramped</t></w>
<w><t>cramped-ness</t></w>
<w><t>cramp-er</t></w>
@@ -38410,7 +38412,7 @@
<w><t>dec-i-bar</t></w>
<w><t>dec-i-bel</t></w>
<w><t>de-cid-a-ble</t></w>
-<w><t>de-cide</t></w>
+<w><t>de-cide</t><verb><regular-root/></verb></w>
<w><t>de-cid-ed</t></w>
<w><t>de-cid-ed-ly</t></w>
<w><t>de-cid-ed-ness</t></w>
@@ -39231,7 +39233,7 @@
<w><t>De-la-vigne</t></w>
<w><t>Del-a-ware</t></w>
<w><t>Del-a-war-e-an</t></w>
-<w><t>de-lay</t></w>
+<w><t>de-lay</t><noun><pluralizable/></noun><verb><regular-root/></verb></w>
<w><t>de-lay-a-ble</t></w>
<phrase><t>de-lay ac-tion</t></phrase>
<w><t>de-lay=ac-tion</t></w>
@@ -40027,7 +40029,7 @@
<w><t>de-ox-y-ri-bose</t></w>
<w><t>dep</t></w>
<w><t>de-paint</t></w>
-<w><t>de-part</t></w>
+<w><t>de-part</t><verb><regular-root/></verb></w>
<w><t>de-part-ed</t></w>
<w><t>dé-par-te-ment</t></w>
<w><t>de-part-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -40220,7 +40222,7 @@
<w><t>dep-re-ter</t></w>
<w><t>de-priv-a-ble</t></w>
<w><t>de-priv-al</t></w>
-<w><t>dep-ri-va-tion</t></w>
+<w><t>dep-ri-va-tion</t><noun><plural/><convertible-to-possessive/></noun></w>
<w><t>de-priv-a-tive</t></w>
<w><t>de-prive</t></w>
<w><t>de-prived</t></w>
@@ -40598,7 +40600,7 @@
<w><t>des-ox-y-ri-bo-nu-cle-o-pro-tein</t></w>
<w><t>des-ox-y-ri-bose</t></w>
<w><t>des-pair</t></w>
-<w><t>de-spair</t></w>
+<w><t>de-spair</t><noun><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>de-spair-er</t></w>
<w><t>de-spair-ful</t></w>
<w><t>de-spair-ful-ly</t></w>
@@ -40623,7 +40625,7 @@
<w><t>des-pi-ca-bly</t></w>
<w><t>de-spis-a-ble</t></w>
<w><t>de-spis-a-ble-ness</t></w>
-<w><t>des-pise</t></w>
+<w><t>des-pise</t><verb><regular-root/></verb></w>
<w><t>de-spise</t></w>
<w><t>de-spised</t></w>
<w><t>de-spis-er</t></w>
@@ -43628,7 +43630,7 @@
<w><t>dis-trait</t></w>
<w><t>dis-traite</t></w>
<w><t>dis-traught</t></w>
-<w><t>dis-tress</t></w>
+<w><t>dis-tress</t><noun><pluralizable/></noun><verb><regular-root/></verb></w>
<w><t>dis-tressed</t></w>
<w><t>dis-tress-ed-ly</t></w>
<w><t>dis-tress-ed-ness</t></w>
@@ -44613,7 +44615,7 @@
<w><t>dor-meuse</t></w>
<w><t>dor-mie</t></w>
<w><t>dor-mi-ent</t></w>
-<w><t>dor-mi-to-ry</t></w>
+<w><t>dor-mi-to-ry</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Dor-mo-bile</t></w>
<w><t>dor-mouse</t></w>
<w><t>dor-my</t></w>
@@ -45374,7 +45376,7 @@
<w><t>D-rer</t></w>
<w><t>Dres-den</t></w>
<phrase><t>Dres-den chi-na</t></phrase>
-<w><t>dress</t></w>
+<w><t>dress</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>dres-sage</t></w>
<phrase><t>dress cir-cle</t></phrase>
<w><t>dress=coat-ed</t></w>
@@ -46517,7 +46519,7 @@
<w><t>ear-shot</t></w>
<w><t>ear-split-ting</t></w>
<w><t>ear=split-ting</t></w>
-<w><t>earth</t></w>
+<w><t>earth</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Ear-tha</t></w>
<w><t>earth-born</t></w>
<w><t>earth-bound</t></w>
@@ -47495,7 +47497,7 @@
<w><t>e-jac-u-la-tive</t></w>
<w><t>e-jac-u-la-tor</t></w>
<w><t>e-jac-u-la-to-ry</t></w>
-<w><t>e-ject</t></w>
+<w><t>e-ject</t><verb><regular-root/></verb></w>
<w><t>e-jec-ta</t></w>
<w><t>e-jec-tion</t></w>
<phrase><t>e-jec-tion seat</t></phrase>
@@ -47600,7 +47602,7 @@
<w><t>eld</t></w>
<w><t>El-da</t></w>
<w><t>El-den</t></w>
-<w><t>eld-er</t></w>
+<w><t>eld-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>El-der</t></w>
<w><t>el-der</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>eld-er-ber-ry</t></w>
@@ -47608,6 +47610,7 @@
<w><t>eld-er-li-ness</t></w>
<w><t>eld-er-ly</t></w>
<w><t>el-der-ly</t></w>
+<w><t>el-der-ship</t><noun><convertible-to-possessive/></noun></w>
<phrase><t>el-der states-man</t></phrase>
<w><t>eld-est</t></w>
<w><t>el-ding</t></w>
@@ -48246,7 +48249,7 @@
<w><t>El-win</t></w>
<w><t>El-wood</t></w>
<w><t>El-wyn</t></w>
-<w><t>E-ly</t></w>
+<w><t>E-ly</t><noun><convertible-to-possessive/></noun></w>
<w><t>El-y-ot</t></w>
<w><t>E-lyr-i-a</t></w>
<w><t>E-ly-s</t></w>
@@ -48517,7 +48520,7 @@
<w><t>emf</t></w>
<w><t>e-mic-tion</t></w>
<w><t>e-mi-gr</t></w>
-<w><t>em-i-grant</t></w>
+<w><t>em-i-grant</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>em-i-grate</t></w>
<w><t>em-i-grat-ed</t></w>
<w><t>em-i-grat-ing</t></w>
@@ -48809,7 +48812,7 @@
<w><t>en-cage</t></w>
<w><t>en-caged</t></w>
<w><t>en-cag-ing</t></w>
-<w><t>en-camp</t></w>
+<w><t>en-camp</t><verb><regular-root/></verb></w>
<w><t>en-camp-ment</t></w>
<w><t>en-cap-su-late</t></w>
<w><t>en-cap-su-lat-ed</t></w>
@@ -49003,7 +49006,7 @@
<w><t>end-dam-ag-ing</t></w>
<w><t>en-dear</t><verb><regular-root/></verb></w>
<w><t>en-dear-ing-ly</t></w>
-<w><t>en-dear-ment</t></w>
+<w><t>en-dear-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>en-deav-or</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>en-deav-or-er</t></w>
<w><t>En-de-cott</t></w>
@@ -49192,7 +49195,7 @@
<w><t>En-dym-i-on</t></w>
<w><t>ENE</t></w>
<w><t>en-e-ma</t></w>
-<w><t>en-e-my</t></w>
+<w><t>en-e-my</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>E-ne-o-lith-ic</t></w>
<w><t>en-er-get-ic</t></w>
<w><t>en-er-get-i-cal</t></w>
@@ -49317,13 +49320,14 @@
<phrase><t>Eng-lish horn</t></phrase>
<w><t>Eng-lish-ism</t></w>
<w><t>Eng-lish-ly</t></w>
-<w><t>Eng-lish-man</t></w>
+<w><t>Eng-lish-man</t><noun><singular/></noun></w>
<phrase><t>Eng-lish-man’s tie</t></phrase>
+<w><t>Eng-lish-men</t><noun><plural/></noun></w>
<w><t>Eng-lish-ness</t></w>
<w><t>Eng-lish-ry</t></w>
<phrase><t>Eng-lish set-ter</t></phrase>
-<w><t>Eng-lish-wom-an</t></w>
-<w><t>Eng-lish-wom-en</t></w>
+<w><t>Eng-lish-wom-an</t><noun><singular/></noun></w>
+<w><t>Eng-lish-wom-en</t><noun><plural/></noun></w>
<w><t>en-glut</t></w>
<w><t>en-glut-ted</t></w>
<w><t>en-glut-ting</t></w>
@@ -49384,7 +49388,7 @@
<w><t>en-jambed</t></w>
<w><t>en-jambe-ment</t></w>
<w><t>en-jamb-ment</t></w>
-<w><t>en-join</t></w>
+<w><t>en-join</t><verb><regular-root/></verb></w>
<w><t>en-join-er</t></w>
<w><t>en-join-ment</t></w>
<w><t>en-joy</t><verb><regular-root/></verb></w>
@@ -50847,7 +50851,7 @@
<w><t>e-rup-tive-ness</t></w>
<w><t>e-rup-tiv-i-ty</t></w>
<w><t>er-vil</t></w>
-<w><t>Er-vin</t></w>
+<w><t>Er-vin</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Er-vine</t></w>
<w><t>Er-win</t></w>
<w><t>Er-win-i-a</t></w>
@@ -52006,7 +52010,7 @@
<w><t>e-vict-ee</t></w>
<w><t>e-vic-tion</t></w>
<w><t>e-vic-tor</t></w>
-<w><t>ev-i-dence</t></w>
+<w><t>ev-i-dence</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ev-i-denced</t></w>
<w><t>ev-i-denc-ing</t></w>
<w><t>ev-i-dent</t></w>
@@ -52497,11 +52501,11 @@
<w><t>ex-haust-less-ly</t></w>
<w><t>ex-haust-less-ness</t></w>
<w><t>ex-he-dra</t></w>
-<w><t>ex-hib-it</t></w>
+<w><t>ex-hib-it</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>ex-hib-it-a-ble</t></w>
<w><t>ex-hib-it-ant</t></w>
<w><t>ex-hib-it-er</t></w>
-<w><t>ex-hi-bi-tion</t></w>
+<w><t>ex-hi-bi-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-hi-bi-tion-er</t></w>
<w><t>ex-hi-bi-tion-ism</t></w>
<w><t>ex-hi-bi-tion-ist</t></w>
@@ -52762,7 +52766,7 @@
<w><t>ex-pend-a-ble</t></w>
<w><t>ex-pend-er</t></w>
<w><t>ex-pend-i-ture</t></w>
-<w><t>ex-pense</t></w>
+<w><t>ex-pense</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>ex-pense ac-count</t></phrase>
<w><t>ex-pense-less</t></w>
<w><t>ex-pen-sive</t></w>
@@ -52831,7 +52835,7 @@
<phrase><t>ex-plain a-way</t></phrase>
<w><t>ex-plain-er</t></w>
<w><t>ex-pla-nate</t></w>
-<w><t>ex-pla-na-tion</t></w>
+<w><t>ex-pla-na-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-plan-a-tive</t></w>
<w><t>ex-plan-a-tive-ly</t></w>
<w><t>ex-pla-na-tor</t></w>
@@ -52892,7 +52896,7 @@
<w><t>ex-plo-sive-ly</t></w>
<w><t>ex-plo-sive-ness</t></w>
<w><t>ex-po</t></w>
-<w><t>ex-po-nent</t></w>
+<w><t>ex-po-nent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ex-po-nen-tial</t></w>
<phrase><t>ex-po-nen-tial dis-tri-bu-tion</t></phrase>
<w><t>ex-po-nen-tial-ly</t></w>
@@ -53452,7 +53456,7 @@
<w><t>fac</t></w>
<w><t>fa-cade</t></w>
<w><t>fa-çade</t></w>
-<w><t>face</t></w>
+<w><t>face</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>face-a-ble</t></w>
<w><t>face-bar</t></w>
<w><t>face=cen-tered</t></w>
@@ -55081,7 +55085,7 @@
<w><t>feuil-le-ton-ism</t></w>
<w><t>feuil-le-ton-ist</t></w>
<w><t>feuil-le-ton-is-tic</t></w>
-<w><t>fe-ver</t></w>
+<w><t>fe-ver</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>fe-ver blis-ter</t></phrase>
<w><t>fe-ver-few</t></w>
<w><t>fe-ver-ish</t></w>
@@ -56613,7 +56617,7 @@
<w><t>floc-cu-lent-ly</t></w>
<w><t>floc-cu-lus</t></w>
<w><t>floc-cus</t></w>
-<w><t>flock</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>flock</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>flock-bed</t></w>
<w><t>flock-i-er</t></w>
<w><t>flock-i-est</t></w>
@@ -58962,7 +58966,7 @@
<w><t>frond-less</t></w>
<w><t>Fron-ia</t></w>
<w><t>frons</t></w>
-<w><t>front</t></w>
+<w><t>front</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>front-ad</t></w>
<w><t>front-age</t></w>
<w><t>front-al</t></w>
@@ -62347,7 +62351,7 @@
<w><t>glis-ten-ing-ly</t></w>
<w><t>glis-ter</t></w>
<w><t>glis-ter-ing-ly</t></w>
-<w><t>glit-ter</t></w>
+<w><t>glit-ter</t><noun/><verb><regular-root/></verb></w>
<phrase><t>glit-ter ice</t></phrase>
<w><t>glit-ter-ing-ly</t></w>
<w><t>glit-ter-y</t></w>
@@ -63315,7 +63319,7 @@
<w><t>gor-cock</t></w>
<w><t>Gor-di-an</t></w>
<phrase><t>Gor-di-an knot</t></phrase>
-<w><t>Gor-don</t></w>
+<w><t>Gor-don</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Gor-don set-ter</t></phrase>
<w><t>gore</t></w>
<w><t>Go-re</t></w>
@@ -64713,7 +64717,7 @@
<w><t>grouch-i-ness</t></w>
<w><t>grouch-y</t></w>
<w><t>Grou-chy</t></w>
-<w><t>ground</t></w>
+<w><t>ground</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ground-a-ble</t></w>
<w><t>ground-a-bly</t></w>
<w><t>ground-age</t></w>
@@ -66469,7 +66473,7 @@
<w><t>hal-i-tus</t></w>
<w><t>hal-i-tus-es</t></w>
<w><t>Hal-iv-er</t></w>
-<w><t>hall</t></w>
+<w><t>hall</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Hall</t></w>
<w><t>hal-lah</t></w>
<w><t>hal-lahs</t></w>
@@ -67613,7 +67617,7 @@
<w><t>ha-za-nim</t></w>
<w><t>ha-zans</t></w>
<w><t>Haz-ard</t></w>
-<w><t>haz-ard</t></w>
+<w><t>haz-ard</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>haz-ard-a-ble</t></w>
<w><t>haz-ard-er</t></w>
<w><t>haz-ard-less</t></w>
@@ -68702,7 +68706,7 @@
<phrase><t>Hen-ri-et-ta Ma-ri-a</t></phrase>
<w><t>Hen-rik</t></w>
<w><t>Hen-ri-ka</t></w>
-<w><t>Hen-ry</t></w>
+<w><t>Hen-ry</t><noun><convertible-to-possessive/></noun></w>
<w><t>hen-ry</t></w>
<w><t>Hen-ry-et-ta</t></w>
<phrase><t>Hen-ry I</t></phrase>
@@ -69831,7 +69835,7 @@
<w><t>hin-ny</t></w>
<w><t>hin-ny-ing</t></w>
<w><t>Hin-shel-wood</t></w>
-<w><t>hint</t></w>
+<w><t>hint</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>hint-er</t></w>
<w><t>hin-ter-land</t></w>
<w><t>Hin-ton</t></w>
@@ -70025,7 +70029,7 @@
<w><t>his-to-ri-o-graph-ic</t></w>
<w><t>his-to-ri-o-graph-i-cal-ly</t></w>
<w><t>his-to-ri-og-ra-phy</t></w>
-<w><t>his-to-ry</t></w>
+<w><t>his-to-ry</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>his-to-throm-bin</t></w>
<w><t>his-to-tome</t></w>
<w><t>his-tot-o-my</t></w>
@@ -70199,7 +70203,7 @@
<w><t>Ho-fei</t></w>
<w><t>Ho-fer</t></w>
<w><t>Hof-fa</t></w>
-<w><t>Hoff-man</t></w>
+<w><t>Hoff-man</t><noun><convertible-to-possessive/></noun></w>
<w><t>Hof-mann</t></w>
<w><t>Hof-manns-thal</t></w>
<w><t>Hof-stadt-er</t></w>
@@ -70315,7 +70319,7 @@
<w><t>Ho-li</t></w>
<w><t>hol-i-but</t></w>
<w><t>Hol-i-day</t></w>
-<w><t>hol-i-day</t></w>
+<w><t>hol-i-day</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>hol-i-day camp</t></phrase>
<w><t>hol-i-day-er</t></w>
<w><t>hol-i-day=mak-er</t></w>
@@ -73445,7 +73449,7 @@
<w><t>ICBM</t></w>
<w><t>ice</t></w>
<w><t>Ice</t></w>
-<w><t>ice-berg</t></w>
+<w><t>ice-berg</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ice-blink</t></w>
<w><t>ice-boat</t></w>
<w><t>ice-boat-ing</t></w>
@@ -75136,6 +75140,7 @@
<w><t>in-ar-tis-tic</t></w>
<w><t>in-ar-tis-ti-cal</t></w>
<w><t>in-ar-tis-ti-cal-ly</t></w>
+<w><t>in-as-much</t></w>
<phrase><t>in-as-much as</t></phrase>
<w><t>in-at-ten-tion</t></w>
<w><t>in-at-ten-tive</t></w>
@@ -75880,7 +75885,7 @@
<w><t>in-di-cate</t></w>
<w><t>in-di-cat-ed</t></w>
<w><t>in-di-cat-ing</t></w>
-<w><t>in-di-ca-tion</t></w>
+<w><t>in-di-ca-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-dic-a-tive</t></w>
<w><t>in-dic-a-tive-ly</t></w>
<w><t>in-di-ca-tor</t></w>
@@ -76798,7 +76803,7 @@
<w><t>in-her-ent</t></w>
<w><t>in-her-ent-ly</t></w>
<w><t>in-her-ing</t></w>
-<w><t>in-her-it</t></w>
+<w><t>in-her-it</t><verb><regular-root/></verb></w>
<w><t>in-her-it-a-bil-i-ty</t></w>
<w><t>in-her-it-a-ble</t></w>
<w><t>in-her-it-a-ble-ness</t></w>
@@ -77456,7 +77461,7 @@
<w><t>in-sti-tu-tor</t></w>
<w><t>instr</t></w>
<w><t>in-stroke</t></w>
-<w><t>in-struct</t></w>
+<w><t>in-struct</t><verb><regular-root/></verb></w>
<w><t>in-struct-ed-ly</t></w>
<w><t>in-struct-ed-ness</t></w>
<w><t>in-struct-i-ble</t></w>
@@ -78670,7 +78675,7 @@
<phrase><t>in-ter-rupt-ed screw</t></phrase>
<w><t>in-ter-rupt-er</t></w>
<w><t>in-ter-rupt-i-ble</t></w>
-<w><t>in-ter-rup-tion</t></w>
+<w><t>in-ter-rup-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-ter-rup-tive</t></w>
<w><t>in-ter-rup-tor</t></w>
<w><t>in-ter-sale</t></w>
@@ -79091,7 +79096,7 @@
<w><t>in-tro-duc-er</t></w>
<w><t>in-tro-duc-i-ble</t></w>
<w><t>in-tro-duc-ing</t></w>
-<w><t>in-tro-duc-tion</t></w>
+<w><t>in-tro-duc-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-tro-duc-tive</t></w>
<w><t>in-tro-duc-to-ri-ly</t></w>
<w><t>in-tro-duc-to-ri-ness</t></w>
@@ -79246,7 +79251,7 @@
<w><t>in-vei-gle-ment</t></w>
<w><t>in-vei-gler</t></w>
<w><t>in-ve-nit</t></w>
-<w><t>in-vent</t></w>
+<w><t>in-vent</t><verb><regular-root/></verb></w>
<w><t>in-vent-a-ble</t></w>
<w><t>in-vent-er</t></w>
<w><t>in-vent-i-ble</t></w>
@@ -79347,7 +79352,7 @@
<phrase><t>in-vis-i-ble ink</t></phrase>
<w><t>in-vis-i-ble-ness</t></w>
<w><t>in-vis-i-bly</t></w>
-<w><t>in-vi-ta-tion</t></w>
+<w><t>in-vi-ta-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>in-vi-ta-tion-al</t></w>
<w><t>in-vi-ta-tor-y</t></w>
<w><t>in-vi-ta-to-ry</t></w>
@@ -80532,7 +80537,7 @@
<w><t>jab</t></w>
<w><t>Ja-bal</t></w>
<w><t>Jab-al-pur</t></w>
-<w><t>jab-ber</t></w>
+<w><t>jab-ber</t><noun><singular/></noun><verb><regular-root/></verb></w>
<w><t>jab-ber-er</t></w>
<w><t>jab-ber-ing-ly</t></w>
<w><t>Jab-ber-wock</t></w>
@@ -80810,7 +80815,7 @@
<w><t>jam-bos</t></w>
<w><t>jam-boy</t></w>
<w><t>jamb-stone</t></w>
-<w><t>James</t></w>
+<w><t>James</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>James-burg</t></w>
<w><t>James-e-an</t></w>
<w><t>James-i-an</t></w>
@@ -80914,7 +80919,7 @@
<w><t>Jaque-lee</t></w>
<w><t>Ja-ques</t></w>
<w><t>Jaques=Dal-croze</t></w>
-<w><t>jar</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>jar</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>Ja-rash</t></w>
<w><t>jar-di-ni</t></w>
<w><t>jar-di-niere</t></w>
@@ -83885,7 +83890,7 @@
<w><t>knap</t></w>
<w><t>knap-per</t></w>
<w><t>knap-ping</t></w>
-<w><t>knap-sack</t></w>
+<w><t>knap-sack</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>knap-sacked</t></w>
<w><t>knap-weed</t></w>
<w><t>knar</t></w>
@@ -83902,7 +83907,7 @@
<w><t>knead-a-ble</t></w>
<w><t>knead-er</t></w>
<w><t>knead-ing-ly</t></w>
-<w><t>knee</t></w>
+<w><t>knee</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>knee-cap</t></w>
<w><t>knee-hole</t></w>
<w><t>knee-ing</t></w>
@@ -86336,7 +86341,7 @@
<w><t>Lear</t></w>
<w><t>lear</t></w>
<w><t>Le-ar-chus</t></w>
-<w><t>learn</t></w>
+<w><t>learn</t><verb><regular-root/></verb></w>
<w><t>learn-a-ble</t></w>
<w><t>learn-ed</t></w>
<w><t>Lear-ned</t></w>
@@ -86685,7 +86690,7 @@
<phrase><t>Leif Er-ic-son</t></phrase>
<w><t>Leigh</t></w>
<w><t>Leigh=Mal-lo-ry</t></w>
-<w><t>Leigh-ton</t></w>
+<w><t>Leigh-ton</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Lei-la</t></w>
<w><t>Leins-dorf</t></w>
<w><t>Lein-ster</t></w>
@@ -86790,7 +86795,7 @@
<w><t>L’En-fant</t></w>
<w><t>Leng-len</t></w>
<w><t>length</t></w>
-<w><t>length-en</t></w>
+<w><t>length-en</t><verb><regular-root/></verb></w>
<w><t>length-en-er</t></w>
<w><t>length-i-er</t></w>
<w><t>length-i-est</t></w>
@@ -87006,7 +87011,7 @@
<phrase><t>less-er pan-da</t></phrase>
<phrase><t>Less-er Sun-da Is-lands</t></phrase>
<w><t>Les-sing</t></w>
-<w><t>les-son</t></w>
+<w><t>les-son</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>les-sor</t></w>
<w><t>lest</t></w>
<w><t>les-te</t></w>
@@ -87479,7 +87484,7 @@
<w><t>li-cens-er</t></w>
<w><t>li-cens-ing</t></w>
<w><t>li-cen-sor</t></w>
-<w><t>li-cen-ti-ate</t></w>
+<w><t>li-cen-ti-ate</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>li-cen-ti-ate-ship</t></w>
<w><t>li-cen-ti-a-tion</t></w>
<w><t>li-cen-tious</t></w>
@@ -87991,7 +87996,7 @@
<w><t>lin-e-age</t></w>
<w><t>lin-e-al</t></w>
<w><t>lin-e-al-ly</t></w>
-<w><t>lin-e-a-ment</t></w>
+<w><t>lin-e-a-ment</t><noun><pluralizable/></noun></w>
<w><t>lin-e-a-men-tal</t></w>
<w><t>lin-e-a-men-ta-tion</t></w>
<w><t>lin-e-ar</t></w>
@@ -89564,7 +89569,7 @@
<w><t>love-mak-ing</t></w>
<w><t>love=mak-ing</t></w>
<phrase><t>love po-tion</t></phrase>
-<w><t>lov-er</t></w>
+<w><t>lov-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Lov-er</t></w>
<w><t>lov-er-less</t></w>
<w><t>lov-er-like</t></w>
@@ -91039,7 +91044,7 @@
<w><t>mah-zors</t></w>
<w><t>Mai</t></w>
<w><t>Ma-ia</t></w>
-<w><t>maid</t></w>
+<w><t>maid</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mai-dan</t></w>
<w><t>Mai-da-nek</t></w>
<w><t>maid-en</t></w>
@@ -91251,7 +91256,7 @@
<w><t>mal-a-droit</t></w>
<w><t>mal-a-droit-ly</t></w>
<w><t>mal-a-droit-ness</t></w>
-<w><t>mal-a-dy</t></w>
+<w><t>mal-a-dy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>mal-a fi-de</t></phrase>
<w><t>Mal-a-ga</t></w>
<w><t>Mal-a-gas-y</t></w>
@@ -91726,6 +91731,7 @@
<w><t>man-i-fes-ta-tive</t></w>
<w><t>man-i-fes-ta-tive-ly</t></w>
<phrase><t>Man-i-fest Des-ti-ny</t></phrase>
+<w><t>man-i-fest-ly</t><adverb/></w>
<w><t>man-i-fes-to</t></w>
<w><t>man-i-fold</t></w>
<w><t>man-i-fold-er</t></w>
@@ -92114,7 +92120,7 @@
<w><t>Mar-fa</t></w>
<w><t>marg</t></w>
<w><t>mar-ga</t></w>
-<w><t>Mar-ga-ret</t></w>
+<w><t>Mar-ga-ret</t><noun><convertible-to-possessive/></noun></w>
<w><t>Mar-ga-re-ta</t></w>
<phrase><t>Mar-ga-ret of An-jou</t></phrase>
<phrase><t>Mar-ga-ret of Na-varre</t></phrase>
@@ -94203,7 +94209,8 @@
<w><t>mer-chant-a-ble-ness</t></w>
<phrase><t>mer-chant bank</t></phrase>
<w><t>mer-chant-like</t></w>
-<w><t>mer-chant-man</t></w>
+<w><t>mer-chant-man</t><noun><singular/><convertible-to-possessive/></noun></w>
+<w><t>mer-chant-men</t><noun><plural/><convertible-to-possessive/></noun></w>
<phrase><t>mer-chant na-vy</t></phrase>
<phrase><t>mer-chant prince</t></phrase>
<w><t>mer-chet</t></w>
@@ -96188,7 +96195,7 @@
<w><t>mis-con-ceived</t></w>
<w><t>mis-con-ceiv-er</t></w>
<w><t>mis-con-ceiv-ing</t></w>
-<w><t>mis-con-cep-tion</t></w>
+<w><t>mis-con-cep-tion</t><noun><plural/><convertible-to-possessive/></noun></w>
<w><t>mis-con-duct</t></w>
<w><t>mis-con-jec-ture</t></w>
<w><t>mis-con-jec-tured</t></w>
@@ -98384,7 +98391,7 @@
<w><t>mound=build-er</t></w>
<w><t>mound-ing</t></w>
<w><t>mounds-man</t></w>
-<w><t>mount</t></w>
+<w><t>mount</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>Moun-tain</t></w>
<w><t>moun-tain</t><noun><pluralizable/></noun></w>
<phrase><t>moun-tain ash</t></phrase>
@@ -98996,7 +99003,7 @@
<w><t>mul-ti-tube</t></w>
<w><t>mul-ti-tu-ber-cu-late</t></w>
<w><t>mul-ti-tu-bu-lar</t></w>
-<w><t>mul-ti-tude</t></w>
+<w><t>mul-ti-tude</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mul-ti-tu-di-nous</t></w>
<w><t>mul-ti-tu-di-nous-ly</t></w>
<w><t>mul-ti-tu-di-nous-ness</t></w>
@@ -101933,7 +101940,7 @@
<w><t>noil</t></w>
<w><t>noil-y</t></w>
<w><t>noir</t></w>
-<w><t>noise</t></w>
+<w><t>noise</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>noise-less</t></w>
<w><t>noise-less-ly</t></w>
<w><t>noise-less-ness</t></w>
@@ -109782,7 +109789,7 @@
<w><t>op-por-tun-ist</t></w>
<w><t>op-por-tun-is-tic</t></w>
<w><t>op-por-tun-is-ti-cal-ly</t></w>
-<w><t>op-por-tu-ni-ty</t></w>
+<w><t>op-por-tu-ni-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>op-por-tu-ni-ty cost</t></phrase>
<w><t>op-pos-a-bil-i-ty</t></w>
<w><t>op-pos-a-ble</t></w>
@@ -109911,7 +109918,7 @@
<w><t>O-ran</t></w>
<w><t>o-rang</t></w>
<w><t>Or-ange</t></w>
-<w><t>or-ange</t></w>
+<w><t>or-ange</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>or-ange-ade</t></w>
<phrase><t>or-ange blos-som</t></phrase>
<w><t>Or-ange-burg</t></w>
@@ -110011,7 +110018,7 @@
<w><t>Or-cus</t></w>
<w><t>Or-czy</t></w>
<w><t>ord</t></w>
-<w><t>or-dain</t></w>
+<w><t>or-dain</t><verb><regular-root/></verb></w>
<w><t>or-dain-a-ble</t></w>
<w><t>or-dain-er</t></w>
<w><t>or-dain-ment</t></w>
@@ -115523,7 +115530,7 @@
<w><t>Pa-ri-shad</t></w>
<phrase><t>par-ish clerk</t></phrase>
<phrase><t>par-ish coun-cil</t></phrase>
-<w><t>pa-rish-ion-er</t></w>
+<w><t>pa-rish-ion-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pa-rish-ion-er-ship</t></w>
<phrase><t>par-ish reg-is-ter</t></phrase>
<w><t>par-ish=rigged</t></w>
@@ -115805,7 +115812,7 @@
<phrase><t>part-ing strip</t></phrase>
<phrase><t>par-ti pris</t></phrase>
<w><t>par-tis</t></w>
-<w><t>par-ti-san</t></w>
+<w><t>par-ti-san</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>par-ti-san-ry</t></w>
<w><t>par-ti-san-ship</t></w>
<w><t>par-ti-ta</t></w>
@@ -116057,7 +116064,7 @@
<w><t>pas-tur-a-ble</t></w>
<w><t>pas-tur-age</t></w>
<w><t>pas-tur-al</t></w>
-<w><t>pas-ture</t></w>
+<w><t>pas-ture</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Pas-ture</t></w>
<w><t>pas-tured</t></w>
<w><t>pas-ture-less</t></w>
@@ -116740,7 +116747,7 @@
<w><t>ped-i-palp</t></w>
<w><t>ped-i-pal-pal</t></w>
<w><t>ped-i-pal-pate</t></w>
-<w><t>ped-lar</t></w>
+<w><t>ped-lar</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ped-lar-ies</t></w>
<w><t>ped-lar-y</t></w>
<w><t>ped-ler</t></w>
@@ -116788,7 +116795,7 @@
<phrase><t>Peep-ing Tom</t></phrase>
<w><t>peep-show</t></w>
<w><t>pee-pul</t></w>
-<w><t>peer</t></w>
+<w><t>peer</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>peer-age</t></w>
<w><t>Peerce</t></w>
<w><t>peer-ess</t></w>
@@ -118303,7 +118310,7 @@
<w><t>pe-tite</t></w>
<w><t>pe-tite-ness</t></w>
<phrase><t>pet-it four</t></phrase>
-<w><t>pe-ti-tion</t></w>
+<w><t>pe-ti-tion</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>pe-ti-tion-a-ble</t></w>
<w><t>pe-ti-tion-ar-y</t></w>
<w><t>pe-ti-tion-er</t></w>
@@ -120075,7 +120082,7 @@
<w><t>Pí-o</t></w>
<w><t>pio-let</t></w>
<w><t>pi-on</t></w>
-<w><t>pi-o-neer</t></w>
+<w><t>pi-o-neer</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pi-os-i-ty</t></w>
<w><t>pi-ous</t></w>
<w><t>pi-ous-ly</t></w>
@@ -120779,7 +120786,7 @@
<phrase><t>play-ing field</t></phrase>
<w><t>play-let</t></w>
<w><t>play-mak-er</t></w>
-<w><t>play-mate</t></w>
+<w><t>play-mate</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>play-pen</t></w>
<w><t>play-read-er</t></w>
<w><t>play-room</t></w>
@@ -121991,6 +121998,7 @@
<w><t>pom-pos-i-ty</t></w>
<w><t>pomp-ous</t></w>
<w><t>pomp-ous-ly</t></w>
+<w><t>’pon</t><contraction referenced-word="upon"/></w>
<w><t>Pon-ca</t></w>
<w><t>ponce</t></w>
<w><t>Pon-ce</t></w>
@@ -122337,7 +122345,7 @@
<phrase><t>Por-to Ri-co</t></phrase>
<phrase><t>Port Phil-lip</t></phrase>
<phrase><t>Port Phil-lip Bay</t></phrase>
-<w><t>por-trait</t></w>
+<w><t>por-trait</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>por-trait-ist</t></w>
<w><t>por-trait-like</t></w>
<w><t>por-trai-ture</t></w>
@@ -126127,7 +126135,8 @@
<w><t>pres-by-ter-i-al</t></w>
<w><t>pres-by-te-ri-al</t></w>
<w><t>Pres-by-te-ri-an</t></w>
-<w><t>pres-by-te-ri-an</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
+<w><t>pres-by-te-ri-an</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective/></w>
+<w><t>pres-by-te-ri-an-ism</t><noun><convertible-to-possessive/></noun></w>
<w><t>pres-by-ter-ies</t></w>
<w><t>pres-by-ter-y</t></w>
<w><t>pre-scho-las-tic</t></w>
@@ -126984,7 +126993,7 @@
<phrase><t>Prince Ru-pert</t></phrase>
<w><t>prince’s=feath-er</t></w>
<w><t>prince-ship</t></w>
-<w><t>prin-cess</t></w>
+<w><t>prin-cess</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>prin-cesse</t></w>
<w><t>prin-cess-like</t></w>
<phrase><t>prin-cess roy-al</t></phrase>
@@ -127779,7 +127788,7 @@
<w><t>pro-logu-iz-er</t></w>
<w><t>pro-logu-iz-ing</t></w>
<w><t>pro-lo-gus</t></w>
-<w><t>pro-long</t></w>
+<w><t>pro-long</t><verb><regular-root/></verb></w>
<w><t>pro-long-a-bly</t></w>
<w><t>pro-lon-gate</t></w>
<w><t>pro-lon-gat-ed</t></w>
@@ -132735,7 +132744,7 @@
<w><t>raz-er</t></w>
<w><t>raz-ing</t></w>
<w><t>ra-zoo</t></w>
-<w><t>ra-zor</t></w>
+<w><t>ra-zor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ra-zor-back</t></w>
<w><t>ra-zor-bill</t></w>
<phrase><t>ra-zor=billed auk</t></phrase>
@@ -133830,7 +133839,7 @@
<w><t>re-crit-i-cize</t></w>
<w><t>re-crit-i-cized</t></w>
<w><t>re-crit-i-ciz-ing</t></w>
-<w><t>re-cross</t></w>
+<w><t>re-cross</t><verb><regular-root/></verb></w>
<w><t>re-crown</t></w>
<w><t>re-cru-desce</t></w>
<w><t>re-cru-desced</t></w>
@@ -134001,7 +134010,7 @@
<w><t>re-ded-i-cat-ing</t></w>
<w><t>re-ded-i-ca-tion</t></w>
<w><t>re-deed</t></w>
-<w><t>re-deem</t></w>
+<w><t>re-deem</t><verb><regular-root/></verb></w>
<w><t>re-deem-a-bil-i-ty</t></w>
<w><t>re-deem-a-ble</t></w>
<w><t>re-deem-a-ble-ness</t></w>
@@ -134592,7 +134601,7 @@
<w><t>re-feel-ing</t></w>
<w><t>re-felt</t></w>
<w><t>refer</t></w>
-<w><t>re-fer</t></w>
+<w><t>re-fer</t><verb><regular-root/></verb></w>
<w><t>ref-er-ee</t></w>
<w><t>ref-er-ence</t></w>
<phrase><t>ref-er-ence book</t></phrase>
@@ -134645,7 +134654,7 @@
<w><t>re-flat-ed</t></w>
<w><t>re-flat-ing</t></w>
<w><t>re-fla-tion</t></w>
-<w><t>re-flect</t></w>
+<w><t>re-flect</t><verb><regular-root/></verb></w>
<w><t>re-flect-ance</t></w>
<w><t>re-flec-tance</t></w>
<w><t>re-flect-i-ble</t></w>
@@ -137769,7 +137778,7 @@
<w><t>re-vict-ual-ing</t></w>
<w><t>re-vict-ualled</t></w>
<w><t>re-vict-ual-ling</t></w>
-<w><t>re-view</t></w>
+<w><t>re-view</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>re-view-a-bil-i-ty</t></w>
<w><t>re-view-a-ble</t></w>
<phrase><t>re-view cop-y</t></phrase>
@@ -138277,7 +138286,7 @@
<w><t>Ric-ci-o-li</t></w>
<w><t>Ric-ci-us</t></w>
<w><t>rice</t></w>
-<w><t>Rice</t></w>
+<w><t>Rice</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>rice-bird</t></w>
<phrase><t>rice pa-per</t></phrase>
<w><t>ric-er</t></w>
@@ -138555,7 +138564,7 @@
<w><t>rind-less</t></w>
<w><t>rind-y</t></w>
<w><t>rin-for-zan-do</t></w>
-<w><t>ring</t></w>
+<w><t>ring</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root value="false"/></verb></w>
<w><t>Ring</t></w>
<w><t>ring=a=lie-vi-o</t></w>
<w><t>ring-bolt</t></w>
@@ -139032,7 +139041,7 @@
<w><t>role=play-ing</t></w>
<w><t>Rolf</t></w>
<w><t>Rolfe</t></w>
-<w><t>roll</t></w>
+<w><t>roll</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>roll-a-ble</t></w>
<w><t>Rol-land</t></w>
<w><t>roll-a-way</t></w>
@@ -139576,7 +139585,7 @@
<w><t>roup-i-er</t></w>
<w><t>roup-i-ly</t></w>
<w><t>roup-y</t></w>
-<w><t>rouse</t></w>
+<w><t>rouse</t><verb><regular-root/></verb></w>
<w><t>rouse-a-bout</t></w>
<w><t>rous-ed-ness</t></w>
<w><t>rous-er</t></w>
@@ -140117,7 +140126,7 @@
<w><t>rust-i-ly</t></w>
<w><t>Rus-tin</t></w>
<w><t>rust-i-ness</t></w>
-<w><t>rus-tle</t></w>
+<w><t>rus-tle</t><verb><regular-root/></verb></w>
<w><t>rus-tler</t></w>
<phrase><t>rus-tle up</t></phrase>
<w><t>rus-tling-ly</t></w>
@@ -140676,7 +140685,7 @@
<w><t>sale-a-bly</t></w>
<phrase><t>sale and re-turn</t></phrase>
<w><t>sal-e-brous</t></w>
-<w><t>Sa-lem</t></w>
+<w><t>Sa-lem</t><noun><convertible-to-possessive/></noun></w>
<w><t>sal-ep</t></w>
<w><t>sal-e-ra-tus</t></w>
<w><t>Sa-ler-no</t></w>
@@ -142265,6 +142274,7 @@
<w><t>schol-ar</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>schol-arch</t></w>
<w><t>schol-ar-less</t></w>
+<w><t>schol-ar-ly</t><adjective><extensible value="false"/></adjective></w>
<w><t>schol-ar-ship</t></w>
<w><t>scho-las-tholiast</t></w>
<w><t>scho-las-tic</t></w>
@@ -142599,7 +142609,7 @@
<w><t>sco-top-ic</t></w>
<w><t>Scots</t></w>
<w><t>Scots-man</t></w>
-<w><t>Scott</t></w>
+<w><t>Scott</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Scott-dale</t></w>
<w><t>Scot-ti</t></w>
<w><t>Scot-ti-cism</t></w>
@@ -143042,7 +143052,7 @@
<w><t>sear</t></w>
<phrase><t>sea rang-er</t></phrase>
<phrase><t>sea ra-ven</t></phrase>
-<w><t>search</t></w>
+<w><t>search</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>search-a-ble</t></w>
<w><t>search-a-ble-ness</t></w>
<w><t>search-er</t></w>
@@ -145570,7 +145580,7 @@
<w><t>Ser-a-pe-a</t></w>
<w><t>Ser-a-pe-um</t></w>
<w><t>Ser-a-pe-ums</t></w>
-<w><t>ser-aph</t></w>
+<w><t>ser-aph</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>se-raph-ic</t></w>
<w><t>se-raph-i-cal</t></w>
<w><t>se-raph-i-cal-ly</t></w>
@@ -145875,7 +145885,7 @@
<phrase><t>set-tle for</t></phrase>
<phrase><t>set-tle in</t></phrase>
<w><t>set-tle-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>set-tler</t></w>
+<w><t>set-tler</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>set-tle with</t></phrase>
<w><t>set-tling</t></w>
<w><t>set-tlings</t></w>
@@ -146482,7 +146492,7 @@
<w><t>sheer-legs</t></w>
<w><t>Sheer-ness</t></w>
<w><t>sheers</t></w>
-<w><t>sheet</t></w>
+<w><t>sheet</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>sheet an-chor</t></phrase>
<w><t>sheet-ing</t></w>
<w><t>sheet-less</t></w>
@@ -146823,7 +146833,7 @@
<w><t>shnook</t></w>
<w><t>Sho-a</t></w>
<w><t>shoad</t></w>
-<w><t>shoal</t></w>
+<w><t>shoal</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>shoat</t></w>
<w><t>sho-chet</t></w>
<w><t>sho-che-tim</t></w>
@@ -146926,7 +146936,7 @@
<w><t>shor-ing</t></w>
<w><t>shorn</t></w>
<w><t>Short</t></w>
-<w><t>short</t></w>
+<w><t>short</t><adjective><extensible/></adjective></w>
<phrase><t>short ac-count</t></phrase>
<w><t>short-age</t></w>
<w><t>short-bread</t></w>
@@ -146942,7 +146952,7 @@
<w><t>short-cut</t></w>
<w><t>short=dat-ed</t></w>
<phrase><t>short di-vi-sion</t></phrase>
-<w><t>short-en</t></w>
+<w><t>short-en</t><verb><regular-root/></verb></w>
<w><t>short-en-er</t></w>
<w><t>short-en-ing</t></w>
<phrase><t>Short-er Cat-e-chism</t></phrase>
@@ -148401,7 +148411,7 @@
<w><t>skin-like</t></w>
<w><t>skinned</t></w>
<w><t>Skin-ner</t></w>
-<w><t>skin-ner</t></w>
+<w><t>skin-ner</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Skin-ner box</t></phrase>
<w><t>skin-ner-ies</t></w>
<w><t>skin-ner-y</t></w>
@@ -149551,7 +149561,7 @@
<w><t>snye</t></w>
<w><t>sny-ing</t></w>
<w><t>so</t></w>
-<w><t>soak</t></w>
+<w><t>soak</t><verb><regular-root/></verb></w>
<w><t>soak-age</t></w>
<w><t>soak-a-way</t></w>
<w><t>soak-er</t></w>
@@ -149788,7 +149798,7 @@
<w><t>sof-frit-to</t></w>
<w><t>So-fi-a</t></w>
<w><t>So-fi-ya</t></w>
-<w><t>soft</t></w>
+<w><t>soft</t><adjective><extensible/></adjective></w>
<w><t>sof-ta</t></w>
<w><t>soft-ball</t></w>
<w><t>soft-board</t></w>
@@ -150560,7 +150570,7 @@
<phrase><t>South-ern Brit-ish Eng-lish</t></phrase>
<phrase><t>South-ern Cross</t></phrase>
<w><t>South-ern-er</t></w>
-<w><t>south-ern-er</t></w>
+<w><t>south-ern-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>south-ern hem-i-sphere</t></phrase>
<phrase><t>South-ern Lights</t></phrase>
<w><t>south-ern-li-ness</t></w>
@@ -152811,7 +152821,7 @@
<w><t>Stau-ding-er</t></w>
<w><t>staume-ral</t></w>
<w><t>staum-rel</t></w>
-<w><t>staunch</t></w>
+<w><t>staunch</t><adjective><extensible/></adjective></w>
<w><t>Staun-ton</t></w>
<w><t>stau-ro-lite</t></w>
<w><t>stau-ro-lit-ic</t></w>
@@ -152829,7 +152839,7 @@
<w><t>stav-ing</t></w>
<w><t>Stav-ro-pol</t></w>
<w><t>staw</t></w>
-<w><t>stay</t></w>
+<w><t>stay</t><verb><regular-root/></verb></w>
<w><t>stay-a-ble</t></w>
<w><t>stay-bolt</t></w>
<w><t>stay-er</t></w>
@@ -153958,7 +153968,7 @@
<w><t>stra-min-e-ous-ly</t></w>
<w><t>stra-mo-ni-um</t></w>
<w><t>Strand</t></w>
-<w><t>strand</t></w>
+<w><t>strand</t><verb><regular-root/></verb></w>
<w><t>Strand-lop-er</t></w>
<w><t>strange</t><adjective><extensible/></adjective></w>
<w><t>strange-ly</t></w>
@@ -154368,7 +154378,7 @@
<w><t>strych-nin-ism</t></w>
<w><t>Stry-mon</t></w>
<w><t>Stu</t></w>
-<w><t>Stu-art</t></w>
+<w><t>Stu-art</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>stub</t></w>
<phrase><t>stub ax-le</t></phrase>
<w><t>stub-bed</t></w>
@@ -156245,7 +156255,7 @@
<w><t>su-dor-ip-a-rous</t></w>
<w><t>Su-dra</t></w>
<w><t>suds</t></w>
-<w><t>sue</t></w>
+<w><t>sue</t><verb><regular-root/></verb></w>
<w><t>Sue</t></w>
<w><t>suede</t></w>
<w><t>sued-ed</t></w>
@@ -156370,7 +156380,7 @@
<w><t>su-ing</t></w>
<w><t>su-int</t></w>
<w><t>Suisse</t></w>
-<w><t>suit</t></w>
+<w><t>suit</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>suit-a-ble</t></w>
<w><t>suit-a-bly</t></w>
<w><t>suit-case</t></w>
@@ -156377,7 +156387,7 @@
<w><t>suite</t></w>
<w><t>suit-ed</t></w>
<w><t>suit-ing</t></w>
-<w><t>suit-or</t></w>
+<w><t>suit-or</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Sui-y</t></w>
<w><t>su-ji=mu-ji</t></w>
<w><t>Su-kar-na-pu-ra</t></w>
@@ -156581,7 +156591,7 @@
<phrase><t>sum-mar-y of-fence</t></phrase>
<w><t>sum-ma-tion</t></w>
<w><t>sum-ma-tion-al</t></w>
-<w><t>sum-mer</t></w>
+<w><t>sum-mer</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>sum-mer cy-press</t></phrase>
<w><t>sum-mer-house</t></w>
<w><t>sum-mer-hous-es</t></w>
@@ -157835,7 +157845,7 @@
<w><t>sup-port-a-ble</t></w>
<w><t>sup-port-a-ble-ness</t></w>
<w><t>sup-port-a-bly</t></w>
-<w><t>sup-port-er</t></w>
+<w><t>sup-port-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>sup-port-ing</t></w>
<w><t>sup-port-ing-ly</t></w>
<w><t>sup-port-ive</t></w>
@@ -159164,7 +159174,7 @@
<w><t>Tab-by</t></w>
<w><t>tab-by-ing</t></w>
<w><t>ta-ber</t></w>
-<w><t>tab-er-nac-le</t></w>
+<w><t>tab-er-nac-le</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>tab-er-nac-u-lar</t></w>
<w><t>ta-bes</t></w>
<w><t>ta-bes-cence</t></w>
@@ -159176,7 +159186,7 @@
<w><t>Tab-i-tha</t></w>
<w><t>tab-la</t></w>
<w><t>tab-la-ture</t></w>
-<w><t>ta-ble</t></w>
+<w><t>ta-ble</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>tab-leau</t></w>
<phrase><t>ta-bleau vi-vant</t></phrase>
<w><t>tab-leaux</t></w>
@@ -159447,7 +159457,7 @@
<w><t>Tai-nan</t></w>
<w><t>Taine</t></w>
<w><t>Tai-no</t></w>
-<w><t>taint</t></w>
+<w><t>taint</t><verb><regular-root/></verb></w>
<w><t>taint-less</t></w>
<w><t>taint-less-ly</t></w>
<w><t>taint-less-ness</t></w>
@@ -160894,7 +160904,7 @@
<w><t>ten-son</t></w>
<w><t>ten-sor</t></w>
<w><t>ten-so-ri-al</t></w>
-<w><t>tent</t></w>
+<w><t>tent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ten-ta-cle</t></w>
<w><t>ten-ta-cled</t></w>
<w><t>ten-ta-cle-like</t></w>
@@ -161220,7 +161230,7 @@
<w><t>tes-ti-fi-er</t></w>
<w><t>tes-ti-fy</t></w>
<w><t>tes-ti-fy-ing</t></w>
-<w><t>tes-ti-mo-ni-al</t></w>
+<w><t>tes-ti-mo-ni-al</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>tes-ti-mo-ni-al-ised</t></w>
<w><t>tes-ti-mo-ni-al-is-ing</t></w>
<w><t>tes-ti-mo-ni-al-ize</t></w>
@@ -161985,7 +161995,7 @@
<w><t>thing-u-ma-bob</t></w>
<w><t>thing-u-ma-jig</t></w>
<w><t>thing-um-bob</t></w>
-<w><t>think</t></w>
+<w><t>think</t><verb><regular-root value="false"/></verb></w>
<w><t>think-a-ble</t></w>
<w><t>think-a-ble-ness</t></w>
<w><t>think-a-bly</t></w>
@@ -161993,6 +162003,7 @@
<w><t>think-ing</t></w>
<w><t>think-ing-ly</t></w>
<phrase><t>think o-ver</t></phrase>
+<w><t>thinks</t><verb><regular-root value="false"/></verb></w>
<w><t>thin-ly</t></w>
<w><t>thin-ner</t></w>
<w><t>thin-ness</t></w>
@@ -163649,7 +163660,7 @@
<phrase><t>tor-ic lens</t></phrase>
<w><t>to-ri-i</t></w>
<w><t>To-ri-no</t></w>
-<w><t>tor-ment</t></w>
+<w><t>tor-ment</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>tor-ment-er</t></w>
<w><t>tor-men-til</t></w>
<w><t>tor-men-tor</t></w>
@@ -165387,7 +165398,7 @@
<w><t>tri-ose</t></w>
<phrase><t>tri-o so-na-ta</t></phrase>
<w><t>tri-ox-ide</t></w>
-<w><t>trip</t></w>
+<w><t>trip</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>tri-pal-mi-tin</t></w>
<w><t>tri-part</t></w>
<w><t>tri-part-ed</t></w>
@@ -167102,7 +167113,7 @@
<w><t>um-bra-geous-ly</t></w>
<w><t>um-bra-geous-ness</t></w>
<w><t>um-bral</t></w>
-<w><t>um-brel-la</t></w>
+<w><t>um-brel-la</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>um-brel-la bird</t></phrase>
<w><t>um-brel-la-less</t></w>
<w><t>um-brel-la-like</t></w>
@@ -170242,7 +170253,7 @@
<w><t>un-der-sexed</t></w>
<w><t>un-der-sex-ton</t></w>
<w><t>un-der-sheath-ing</t></w>
-<w><t>un-der-shep-herd</t></w>
+<w><t>un-der-shep-herd</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>un-der-sher-iff</t></w>
<w><t>un-der-shield</t></w>
<w><t>un-der-shine</t></w>
@@ -179106,7 +179117,7 @@
<w><t>Up-ing-ton</t></w>
<w><t>U-pis</t></w>
<w><t>up-keep</t></w>
-<w><t>up-land</t></w>
+<w><t>up-land</t><noun><pluralizable/></noun></w>
<w><t>Up-land</t></w>
<phrase><t>up-land cot-ton</t></phrase>
<w><t>up-land-er</t></w>
@@ -180257,7 +180268,7 @@
<w><t>ve-he-men-cy</t></w>
<w><t>ve-he-ment</t></w>
<w><t>ve-he-ment-ly</t></w>
-<w><t>ve-hi-cle</t></w>
+<w><t>ve-hi-cle</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ve-hic-u-lar</t></w>
<w><t>ve-hic-u-lum</t></w>
<w><t>Vehm-ge-richt</t></w>
@@ -182018,7 +182029,7 @@
<w><t>vo-tive</t></w>
<w><t>vo-tress</t></w>
<w><t>Vo-ty-ak</t></w>
-<w><t>vouch</t></w>
+<w><t>vouch</t><verb><regular-root/></verb></w>
<w><t>vouch-er</t></w>
<w><t>vouch-safe</t></w>
<w><t>vouch-safed</t></w>
@@ -182027,7 +182038,7 @@
<w><t>vouge</t></w>
<w><t>vous-soir</t></w>
<w><t>Vou-vray</t></w>
-<w><t>vow</t></w>
+<w><t>vow</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>vow-el</t></w>
<phrase><t>vow-el gra-da-tion</t></phrase>
<w><t>vow-el-ise</t></w>
@@ -182785,7 +182796,7 @@
<w><t>watch-case</t></w>
<phrase><t>Watch Com-mit-tee</t></phrase>
<w><t>watch-dog</t></w>
-<w><t>watch-er</t></w>
+<w><t>watch-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>watch-ful</t></w>
<w><t>watch-ful-ly</t></w>
<w><t>watch-ful-ness</t></w>
@@ -185344,7 +185355,7 @@
<w><t>With-ers</t></w>
<w><t>with-ers</t><noun><plural/></noun></w>
<w><t>with-er-shins</t></w>
-<w><t>With-er-spoon</t></w>
+<w><t>With-er-spoon</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>with-held</t></w>
<w><t>with-hold</t></w>
<w><t>with-hold-er</t></w>
@@ -185533,6 +185544,7 @@
<w><t>won-der=strick-en</t></w>
<w><t>won-der-struck</t></w>
<w><t>won-der-work</t></w>
+<w><t>won-drous</t><adjective/></w>
<w><t>won-drous-ly</t></w>
<w><t>won-drous-ness</t></w>
<w><t>wong-a=wong-a</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml 2022-07-05 17:31:27 UTC (rev 12708)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml 2022-07-05 21:46:26 UTC (rev 12709)
@@ -10,6 +10,7 @@
<!--
-->
+<w><t>com-pa-gnon</t></w>
<w><t>de</t></w>
<w><t>en</t></w>
<w><t>France</t></w>
@@ -17,6 +18,7 @@
<w><t>la</t></w>
<w><t>masse</t></w>
<w><t>route</t></w>
+<w><t>voy-age</t></w>
</axsl-dictionary>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2022-07-05 17:31:27 UTC (rev 12708)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2022-07-05 21:46:26 UTC (rev 12709)
@@ -20,9 +20,11 @@
<w><t>i.e</t><abbrev referenced-word="id est"/><comment>Until "i.e." can be handled properly.</comment></w>
<w><t>in</t></w>
<w><t>in-fi-del-i-um</t></w>
+<w><t>lo-co</t></w>
<w><t>nas-ci-tur</t></w>
<w><t>non</t></w>
<w><t>pa-ra-tus</t></w>
+<w><t>pa-rent-is</t></w>
<w><t>part-i-bus</t></w>
<w><t>per</t></w>
<w><t>prop-a-gan-da</t></w>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-07-05 17:31:29
|
Revision: 12708
http://sourceforge.net/p/foray/code/12708
Author: victormote
Date: 2022-07-05 17:31:27 +0000 (Tue, 05 Jul 2022)
Log Message:
-----------
Handle capitalization for words that don't start with a letter, example: 'Twas.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java 2022-07-05 17:29:12 UTC (rev 12707)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharacterUtils.java 2022-07-05 17:31:27 UTC (rev 12708)
@@ -359,4 +359,40 @@
|| ARABIC_NUMERAL_ADDITIONAL.indexOf(c) > -1;
}
+ /**
+ * Indicates whether a given character is a letter.
+ * @param c The codepoint to be tested.
+ * @return True if and only if {@code c} is a letter.
+ */
+ public static boolean isLetter(final int c) {
+ final int type = Character.getType(c);
+
+ switch (type) {
+ /* Ordered by expected frequency of use, for performance. */
+ case Character.LOWERCASE_LETTER:
+ case Character.UPPERCASE_LETTER:
+ case Character.TITLECASE_LETTER:
+ case Character.MODIFIER_LETTER:
+ case Character.OTHER_LETTER: {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Finds the index of the first letter in a character sequence.
+ * @param chars The character sequence to be tested.
+ * @return The index to the first letter in {@code chars}, or -1 if no lettes is found.
+ */
+ public static int firstLetter(final CharSequence chars) {
+ for (int index = 0; index < chars.length(); index ++) {
+ final char c = chars.charAt(index);
+ if (isLetter(c)) {
+ return index;
+ }
+ }
+ return -1;
+ }
+
}
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2022-07-05 17:29:12 UTC (rev 12707)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Orthography4a.java 2022-07-05 17:31:27 UTC (rev 12708)
@@ -30,6 +30,7 @@
import org.foray.common.primitive.BooleanUtils;
import org.foray.common.primitive.CharSequenceUtils;
+import org.foray.common.primitive.CharacterUtils;
import org.foray.common.primitive.NumberUtils;
import org.foray.orthography.wrapper.CapitalizedWord;
import org.foray.orthography.wrapper.ExactWord;
@@ -335,10 +336,13 @@
* last rule, users should enter the oddly-capitalized word into a dictionary in that form.
* TODO: This capability should be included in the orthography configuration instead of being hard-coded
* here. */
- if (Character.isUpperCase(wordChars.charAt(0))) {
- final StringBuilder builder = new StringBuilder(wordChars);
- builder.setCharAt(0, Character.toLowerCase(wordChars.charAt(0)));
- return isRecognizedWord(builder, offset, length, pos, adhocDictionaries);
+ final int indexFirstLetter = CharacterUtils.firstLetter(wordChars);
+ if (indexFirstLetter > -1) {
+ if (Character.isUpperCase(wordChars.charAt(indexFirstLetter))) {
+ final StringBuilder builder = new StringBuilder(wordChars);
+ builder.setCharAt(indexFirstLetter, Character.toLowerCase(wordChars.charAt(indexFirstLetter)));
+ return isRecognizedWord(builder, offset, length, pos, adhocDictionaries);
+ }
}
return false;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-07-05 17:29:14
|
Revision: 12707
http://sourceforge.net/p/foray/code/12707
Author: victormote
Date: 2022-07-05 17:29:12 +0000 (Tue, 05 Jul 2022)
Log Message:
-----------
Parse abbreviations and contractions the same way as normal words.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2022-07-05 16:22:20 UTC (rev 12706)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/DictionaryParser.java 2022-07-05 17:29:12 UTC (rev 12707)
@@ -352,6 +352,7 @@
case "possessive": break;
case "extensible": break;
case "abbrev": break;
+ case "contraction": break;
case "comment": {
setTextParsingActive(false);
break;
@@ -367,7 +368,9 @@
@Override
public void endElement(final String uri, final String localName, final String qName) throws SAXException {
switch(localName) {
- case "w": {
+ case "w":
+ case "abbrev":
+ case "contraction": {
final StringWord word = new StringWord(this.currentPartsOfSpeech, this.currentSegments);
final String actualContent = word.getActualContent().toString();
checkCollation(actualContent, word.getCollatingContent().toString());
@@ -486,7 +489,6 @@
case "convertible-to-possessive": break;
case "possessive": break;
case "extensible": break;
- case "abbrev": break;
case "comment": {
setTextParsingActive(true);
break;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-07-05 16:22:22
|
Revision: 12706
http://sourceforge.net/p/foray/code/12706
Author: victormote
Date: 2022-07-05 16:22:20 +0000 (Tue, 05 Jul 2022)
Log Message:
-----------
Add back test of initial contraction.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2022-07-05 16:15:13 UTC (rev 12705)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2022-07-05 16:22:20 UTC (rev 12706)
@@ -294,26 +294,26 @@
Assert.assertEquals("Return", actual.get(6));
}
-// /**
-// * Test of a string starting with an initial contraction.
-// */
-// @Test
-// public void testInitialContraction() {
-// final String testString = "’Tis the season to be jolly.";
-// final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
-// Assert.assertEquals(12, actual.size());
-// Assert.assertEquals("’Tis", actual.get(0));
-// Assert.assertEquals(" ", actual.get(1));
-// Assert.assertEquals("the", actual.get(2));
-// Assert.assertEquals(" ", actual.get(3));
-// Assert.assertEquals("season", actual.get(4));
-// Assert.assertEquals(" ", actual.get(5));
-// Assert.assertEquals("to", actual.get(6));
-// Assert.assertEquals(" ", actual.get(7));
-// Assert.assertEquals("be", actual.get(8));
-// Assert.assertEquals(" ", actual.get(9));
-// Assert.assertEquals("jolly", actual.get(10));
-// Assert.assertEquals(".", actual.get(11));
-// }
+ /**
+ * Test of a string starting with an initial contraction.
+ */
+ @Test
+ public void testInitialContraction() {
+ final String testString = "’Tis the season to be jolly.";
+ final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
+ Assert.assertEquals(12, actual.size());
+ Assert.assertEquals("’Tis", actual.get(0));
+ Assert.assertEquals(" ", actual.get(1));
+ Assert.assertEquals("the", actual.get(2));
+ Assert.assertEquals(" ", actual.get(3));
+ Assert.assertEquals("season", actual.get(4));
+ Assert.assertEquals(" ", actual.get(5));
+ Assert.assertEquals("to", actual.get(6));
+ Assert.assertEquals(" ", actual.get(7));
+ Assert.assertEquals("be", actual.get(8));
+ Assert.assertEquals(" ", actual.get(9));
+ Assert.assertEquals("jolly", actual.get(10));
+ Assert.assertEquals(".", actual.get(11));
+ }
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-07-05 16:15:16
|
Revision: 12705
http://sourceforge.net/p/foray/code/12705
Author: victormote
Date: 2022-07-05 16:15:13 +0000 (Tue, 05 Jul 2022)
Log Message:
-----------
Fix for "words" that contain only punctuation.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2022-07-04 22:54:10 UTC (rev 12704)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2022-07-05 16:15:13 UTC (rev 12705)
@@ -194,6 +194,15 @@
final CharType previousBreakType = breakIndex == 0 ? CharType.BREAK_CHAR : breakTypes[breakIndex - 1];
final CharType nextBreakType = breakIndex == breakTypes.length - 1 ? CharType.END
: breakTypes[breakIndex + 1];
+
+ /* If the current type is not a break char, but it is surrounded by break chars, this marks a word. */
+ if (currentBreakType != CharType.BREAK_CHAR
+ && previousBreakType == CharType.BREAK_CHAR
+ && (nextBreakType == CharType.BREAK_CHAR
+ || nextBreakType == CharType.END)) {
+ breakTypes[breakIndex] = CharType.WORD_CHAR;
+ }
+
switch (currentBreakType) {
case ATTACHED_TRAILING_OR_INTRAWORD_PUNCTUATION: {
switch (previousBreakType) {
@@ -236,8 +245,7 @@
/* Resolve attached leading punctuation. */
for (int breakIndex = 0; breakIndex < breakTypes.length; breakIndex ++) {
final CharType currentBreakType = breakTypes[breakIndex];
- final CharType previousBreakType = breakIndex == 0 ?
- CharType.ATTACHED_LEADING_PUNCTUATION : breakTypes[breakIndex - 1];
+ final CharType previousBreakType = breakIndex == 0 ? CharType.BREAK_CHAR : breakTypes[breakIndex - 1];
final CharType nextBreakType = breakIndex == breakTypes.length - 1 ? CharType.END
: breakTypes[breakIndex + 1];
switch (currentBreakType) {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-07-04 22:54:13
|
Revision: 12704
http://sourceforge.net/p/foray/code/12704
Author: victormote
Date: 2022-07-04 22:54:10 +0000 (Mon, 04 Jul 2022)
Log Message:
-----------
Simplify some conditional logic by using switch statements.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2022-07-04 22:09:39 UTC (rev 12703)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/Lexer4a.java 2022-07-04 22:54:10 UTC (rev 12704)
@@ -189,11 +189,13 @@
*/
protected void filterBreakTypes(final CharType[] breakTypes) {
/* Resolve possible intraword punctuation. */
- for (int breakIndex = 1; breakIndex < breakTypes.length; breakIndex ++) {
+ for (int breakIndex = 0; breakIndex < breakTypes.length; breakIndex ++) {
final CharType currentBreakType = breakTypes[breakIndex];
- if (currentBreakType == CharType.ATTACHED_TRAILING_OR_INTRAWORD_PUNCTUATION) {
- final CharType previousBreakType = breakTypes[breakIndex - 1];
- final CharType nextBreakType = breakTypes[breakIndex + 1];
+ final CharType previousBreakType = breakIndex == 0 ? CharType.BREAK_CHAR : breakTypes[breakIndex - 1];
+ final CharType nextBreakType = breakIndex == breakTypes.length - 1 ? CharType.END
+ : breakTypes[breakIndex + 1];
+ switch (currentBreakType) {
+ case ATTACHED_TRAILING_OR_INTRAWORD_PUNCTUATION: {
switch (previousBreakType) {
case WORD_CHAR: {
switch (nextBreakType) {
@@ -214,54 +216,97 @@
breakTypes[breakIndex] = CharType.ATTACHED_TRAILING_PUNCTUATION;
break;
}
+ case BREAK_CHAR: {
+ /* This cannot be trailing punctuation, so must be the first character in a new word, probably a
+ * contraction like "'tis" for example. */
+ breakTypes[breakIndex] = CharType.WORD_CHAR;
+ break;
+ }
default:
break;
}
+ break;
}
+ default: {
+ break;
+ }
+ }
}
/* Resolve attached leading punctuation. */
for (int breakIndex = 0; breakIndex < breakTypes.length; breakIndex ++) {
final CharType currentBreakType = breakTypes[breakIndex];
- if (currentBreakType == CharType.ATTACHED_LEADING_PUNCTUATION) {
- final CharType previousBreakType = breakIndex < 1 ?
- CharType.ATTACHED_LEADING_PUNCTUATION : breakTypes[breakIndex - 1];
- final CharType nextBreakType = breakTypes[breakIndex + 1];
- if (previousBreakType == CharType.BREAK_CHAR) {
- if (nextBreakType == CharType.BREAK_CHAR) {
+ final CharType previousBreakType = breakIndex == 0 ?
+ CharType.ATTACHED_LEADING_PUNCTUATION : breakTypes[breakIndex - 1];
+ final CharType nextBreakType = breakIndex == breakTypes.length - 1 ? CharType.END
+ : breakTypes[breakIndex + 1];
+ switch (currentBreakType) {
+ case ATTACHED_LEADING_PUNCTUATION: {
+ switch (previousBreakType) {
+ case BREAK_CHAR: {
+ switch (nextBreakType) {
+ case BREAK_CHAR: {
/* Surrounded by breaks. Treat this as a word. */
breakTypes[breakIndex] = CharType.WORD_CHAR;
- } else {
+ break;
+ }
+ default: {
/* Combine it with the previous whitespace. */
breakTypes[breakIndex] = CharType.BREAK_CHAR;
+ break;
}
- } else {
+ }
+ break;
+ }
+ default: {
breakTypes[breakIndex] = CharType.BREAK_CHAR;
+ break;
}
+ }
+ break;
}
+ default: {
+ break;
+ }
+ }
}
-
/* Resolve attached trailing punctuation. Iterate these in reverse order. */
for (int breakIndex = breakTypes.length - 1; breakIndex > 0; breakIndex --) {
final CharType currentBreakType = breakTypes[breakIndex];
- if (currentBreakType == CharType.ATTACHED_TRAILING_PUNCTUATION) {
- final CharType previousBreakType = breakTypes[breakIndex - 1];
- final CharType nextBreakType = breakTypes[breakIndex + 1];
-
- if (nextBreakType == CharType.BREAK_CHAR
- || nextBreakType == CharType.END) {
- if (previousBreakType == CharType.BREAK_CHAR) {
+ final CharType previousBreakType = breakIndex == 0 ? CharType.BREAK_CHAR : breakTypes[breakIndex - 1];
+ final CharType nextBreakType = breakIndex == breakTypes.length - 1 ? CharType.END
+ : breakTypes[breakIndex + 1];
+ switch (currentBreakType) {
+ case ATTACHED_TRAILING_PUNCTUATION: {
+ switch (nextBreakType) {
+ case BREAK_CHAR:
+ case END: {
+ switch(previousBreakType) {
+ case BREAK_CHAR: {
/* Surrounded by breaks. Treat this as a word. */
breakTypes[breakIndex] = CharType.WORD_CHAR;
- } else {
+ break;
+ }
+ default: {
/* Combine it with the previous whitespace. */
breakTypes[breakIndex] = CharType.BREAK_CHAR;
}
- } else {
+ }
+ break;
+ }
+ default: {
breakTypes[breakIndex] = CharType.BREAK_CHAR;
+ break;
}
+ }
+ break;
}
+ default: {
+ break;
+ }
+ }
+
}
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-07-04 22:09:42
|
Revision: 12703
http://sourceforge.net/p/foray/code/12703
Author: victormote
Date: 2022-07-04 22:09:39 +0000 (Mon, 04 Jul 2022)
Log Message:
-----------
Fix some tests broken by recent changes.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/Orthography4aTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryWordTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/WordWrapperTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPast1WordFactoryTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPast1WordTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural1WordFactoryTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural1WordTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural2WordFactoryTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural2WordTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive1WordFactoryTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive1WordTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive2WordFactoryTests.java
trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive2WordTests.java
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2022-07-04 18:17:54 UTC (rev 12702)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/LexerEnglishTests.java 2022-07-04 22:09:39 UTC (rev 12703)
@@ -294,4 +294,26 @@
Assert.assertEquals("Return", actual.get(6));
}
+// /**
+// * Test of a string starting with an initial contraction.
+// */
+// @Test
+// public void testInitialContraction() {
+// final String testString = "’Tis the season to be jolly.";
+// final List<CharSequence> actual = getObjectUnderTest().tokenize(testString);
+// Assert.assertEquals(12, actual.size());
+// Assert.assertEquals("’Tis", actual.get(0));
+// Assert.assertEquals(" ", actual.get(1));
+// Assert.assertEquals("the", actual.get(2));
+// Assert.assertEquals(" ", actual.get(3));
+// Assert.assertEquals("season", actual.get(4));
+// Assert.assertEquals(" ", actual.get(5));
+// Assert.assertEquals("to", actual.get(6));
+// Assert.assertEquals(" ", actual.get(7));
+// Assert.assertEquals("be", actual.get(8));
+// Assert.assertEquals(" ", actual.get(9));
+// Assert.assertEquals("jolly", actual.get(10));
+// Assert.assertEquals(".", actual.get(11));
+// }
+
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/Orthography4aTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/Orthography4aTests.java 2022-07-04 18:17:54 UTC (rev 12702)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/Orthography4aTests.java 2022-07-04 22:09:39 UTC (rev 12703)
@@ -29,6 +29,7 @@
package org.foray.orthography;
import org.foray.common.i18n.WritingSystem4a;
+import org.foray.common.primitive.CharSequenceUtils;
import org.axsl.orthography.OrthographyException;
import org.axsl.orthography.Word;
@@ -217,7 +218,7 @@
Assert.assertNotNull(hyphenation);
Assert.assertEquals("times", hyphenation.toString());
Assert.assertEquals("time", hyphenation.getNormalizedContent());
- Assert.assertEquals("times", hyphenation.getActualContent());
+ Assert.assertTrue(CharSequenceUtils.areEquivalent("times", hyphenation.getActualContent()));
}
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryTests.java 2022-07-04 18:17:54 UTC (rev 12702)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryTests.java 2022-07-04 22:09:39 UTC (rev 12703)
@@ -28,6 +28,8 @@
package org.foray.orthography;
+import org.foray.common.primitive.CharSequenceUtils;
+
import org.junit.Assert;
import org.junit.Test;
@@ -108,7 +110,7 @@
}
final SegmentDictionaryWord dictWord = out.getWord("attention", 0);
- Assert.assertEquals("attention", dictWord.getActualContent());
+ Assert.assertTrue(CharSequenceUtils.areEquivalent("attention", dictWord.getActualContent()));
Assert.assertEquals("at-ten-tion", dictWord.toString());
/* Make sure passing a bogus key returns null. */
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryWordTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryWordTests.java 2022-07-04 18:17:54 UTC (rev 12702)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/SegmentDictionaryWordTests.java 2022-07-04 22:09:39 UTC (rev 12703)
@@ -28,6 +28,8 @@
package org.foray.orthography;
+import org.foray.common.primitive.CharSequenceUtils;
+
import org.axsl.kp.KpNode;
import org.axsl.orthography.DiscretionaryBreak.Quality;
@@ -74,7 +76,8 @@
*/
@Test
public void getActualContentTests() {
- Assert.assertEquals("ambition", dictionary.getWord("ambition", 0).getActualContent());
+ Assert.assertTrue(CharSequenceUtils.areEquivalent("ambition",
+ dictionary.getWord("ambition", 0).getActualContent()));
}
/**
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/WordWrapperTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/WordWrapperTests.java 2022-07-04 18:17:54 UTC (rev 12702)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/WordWrapperTests.java 2022-07-04 22:09:39 UTC (rev 12703)
@@ -28,6 +28,8 @@
package org.foray.orthography;
+import org.foray.common.primitive.CharSequenceUtils;
+
import org.axsl.kp.KpNode;
import org.axsl.orthography.DiscretionaryBreak.Quality;
import org.axsl.orthography.Word;
@@ -73,7 +75,7 @@
*/
@Test
public void testGetActualContent() {
- Assert.assertEquals("harmonious", out.getActualContent());
+ Assert.assertTrue(CharSequenceUtils.areEquivalent("harmonious", out.getActualContent()));
}
/**
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPast1WordFactoryTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPast1WordFactoryTests.java 2022-07-04 18:17:54 UTC (rev 12702)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPast1WordFactoryTests.java 2022-07-04 22:09:39 UTC (rev 12703)
@@ -28,6 +28,7 @@
package org.foray.orthography.wrapper;
+import org.foray.common.primitive.CharSequenceUtils;
import org.foray.orthography.StringWordTests;
import org.axsl.orthography.optional.Dictionary;
@@ -74,7 +75,7 @@
final LatinPast1Word word = out.makeInstance("astonished", this.dictionary);
/* Test for instance equality. */
Assert.assertTrue(word.getWrappedWord() == StringWordTests.WORD_ASTONISH);
- Assert.assertEquals("astonished", word.getActualContent());
+ Assert.assertTrue(CharSequenceUtils.areEquivalent("astonished", word.getActualContent()));
}
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPast1WordTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPast1WordTests.java 2022-07-04 18:17:54 UTC (rev 12702)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPast1WordTests.java 2022-07-04 22:09:39 UTC (rev 12703)
@@ -28,6 +28,7 @@
package org.foray.orthography.wrapper;
+import org.foray.common.primitive.CharSequenceUtils;
import org.foray.orthography.StringWord;
import org.foray.orthography.StringWordTests;
@@ -49,7 +50,7 @@
Assert.assertEquals("as-ton-ish-ed", wrapper.toString());
Assert.assertEquals("astonish", wrapper.getNormalizedContent());
- Assert.assertEquals("astonished", wrapper.getActualContent());
+ Assert.assertTrue(CharSequenceUtils.areEquivalent("astonished", wrapper.getActualContent()));
}
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural1WordFactoryTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural1WordFactoryTests.java 2022-07-04 18:17:54 UTC (rev 12702)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural1WordFactoryTests.java 2022-07-04 22:09:39 UTC (rev 12703)
@@ -28,6 +28,7 @@
package org.foray.orthography.wrapper;
+import org.foray.common.primitive.CharSequenceUtils;
import org.foray.orthography.StringWordTests;
import org.axsl.orthography.optional.Dictionary;
@@ -74,7 +75,7 @@
final LatinPlural1Word word = out.makeInstance("daughters", this.dictionary);
/* Test for instance equality. */
Assert.assertTrue(word.getWrappedWord() == StringWordTests.WORD_DAUGHTER);
- Assert.assertEquals("daughters", word.getActualContent());
+ Assert.assertTrue(CharSequenceUtils.areEquivalent("daughters", word.getActualContent()));
}
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural1WordTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural1WordTests.java 2022-07-04 18:17:54 UTC (rev 12702)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural1WordTests.java 2022-07-04 22:09:39 UTC (rev 12703)
@@ -28,6 +28,7 @@
package org.foray.orthography.wrapper;
+import org.foray.common.primitive.CharSequenceUtils;
import org.foray.orthography.StringWord;
import org.foray.orthography.StringWordTests;
@@ -49,7 +50,7 @@
Assert.assertEquals("daugh-ters", wrapper.toString());
Assert.assertEquals("daughter", wrapper.getNormalizedContent());
- Assert.assertEquals("daughters", wrapper.getActualContent());
+ Assert.assertTrue(CharSequenceUtils.areEquivalent("daughters", wrapper.getActualContent()));
}
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural2WordFactoryTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural2WordFactoryTests.java 2022-07-04 18:17:54 UTC (rev 12702)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural2WordFactoryTests.java 2022-07-04 22:09:39 UTC (rev 12703)
@@ -28,6 +28,7 @@
package org.foray.orthography.wrapper;
+import org.foray.common.primitive.CharSequenceUtils;
import org.foray.orthography.StringWordTests;
import org.axsl.orthography.optional.Dictionary;
@@ -74,7 +75,7 @@
final LatinPlural2Word word = out.makeInstance("companies", this.dictionary);
/* Test for instance equality. */
Assert.assertTrue(word.getWrappedWord() == StringWordTests.WORD_COMPANY);
- Assert.assertEquals("companies", word.getActualContent());
+ Assert.assertTrue(CharSequenceUtils.areEquivalent("companies", word.getActualContent()));
}
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural2WordTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural2WordTests.java 2022-07-04 18:17:54 UTC (rev 12702)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPlural2WordTests.java 2022-07-04 22:09:39 UTC (rev 12703)
@@ -28,6 +28,7 @@
package org.foray.orthography.wrapper;
+import org.foray.common.primitive.CharSequenceUtils;
import org.foray.orthography.StringWord;
import org.foray.orthography.StringWordTests;
@@ -49,7 +50,7 @@
Assert.assertEquals("com-pa-nies", wrapper.toString());
Assert.assertEquals("company", wrapper.getNormalizedContent());
- Assert.assertEquals("companies", wrapper.getActualContent());
+ Assert.assertTrue(CharSequenceUtils.areEquivalent("companies", wrapper.getActualContent()));
}
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive1WordFactoryTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive1WordFactoryTests.java 2022-07-04 18:17:54 UTC (rev 12702)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive1WordFactoryTests.java 2022-07-04 22:09:39 UTC (rev 12703)
@@ -28,6 +28,7 @@
package org.foray.orthography.wrapper;
+import org.foray.common.primitive.CharSequenceUtils;
import org.foray.orthography.StringWordTests;
import org.axsl.orthography.optional.Dictionary;
@@ -74,7 +75,7 @@
final LatinPossessive1Word word = out.makeInstance("daughter\u2019s", this.dictionary);
/* Test for instance equality. */
Assert.assertTrue(word.getWrappedWord() == StringWordTests.WORD_DAUGHTER);
- Assert.assertEquals("daughter\u2019s", word.getActualContent());
+ Assert.assertTrue(CharSequenceUtils.areEquivalent("daughter\u2019s", word.getActualContent()));
}
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive1WordTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive1WordTests.java 2022-07-04 18:17:54 UTC (rev 12702)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive1WordTests.java 2022-07-04 22:09:39 UTC (rev 12703)
@@ -28,6 +28,7 @@
package org.foray.orthography.wrapper;
+import org.foray.common.primitive.CharSequenceUtils;
import org.foray.orthography.StringWord;
import org.foray.orthography.StringWordTests;
@@ -49,7 +50,7 @@
Assert.assertEquals("daugh-ter\u2019s", wrapper.toString());
Assert.assertEquals("daughter", wrapper.getNormalizedContent());
- Assert.assertEquals("daughter\u2019s", wrapper.getActualContent());
+ Assert.assertTrue(CharSequenceUtils.areEquivalent("daughter\u2019s", wrapper.getActualContent()));
}
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive2WordFactoryTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive2WordFactoryTests.java 2022-07-04 18:17:54 UTC (rev 12702)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive2WordFactoryTests.java 2022-07-04 22:09:39 UTC (rev 12703)
@@ -28,6 +28,7 @@
package org.foray.orthography.wrapper;
+import org.foray.common.primitive.CharSequenceUtils;
import org.foray.orthography.StringWordTests;
import org.axsl.orthography.optional.Dictionary;
@@ -74,7 +75,7 @@
final LatinPossessive2Word word = out.makeInstance("daughter\'s", this.dictionary);
/* Test for instance equality. */
Assert.assertTrue(word.getWrappedWord() == StringWordTests.WORD_DAUGHTER);
- Assert.assertEquals("daughter\'s", word.getActualContent());
+ Assert.assertTrue(CharSequenceUtils.areEquivalent("daughter\'s", word.getActualContent()));
}
}
Modified: trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive2WordTests.java
===================================================================
--- trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive2WordTests.java 2022-07-04 18:17:54 UTC (rev 12702)
+++ trunk/foray/foray-orthography/src/test/java/org/foray/orthography/wrapper/LatinPossessive2WordTests.java 2022-07-04 22:09:39 UTC (rev 12703)
@@ -28,6 +28,7 @@
package org.foray.orthography.wrapper;
+import org.foray.common.primitive.CharSequenceUtils;
import org.foray.orthography.StringWord;
import org.foray.orthography.StringWordTests;
@@ -49,7 +50,7 @@
Assert.assertEquals("daugh-ter's", wrapper.toString());
Assert.assertEquals("daughter", wrapper.getNormalizedContent());
- Assert.assertEquals("daughter's", wrapper.getActualContent());
+ Assert.assertTrue(CharSequenceUtils.areEquivalent("daughter's", wrapper.getActualContent()));
}
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2022-07-04 18:17:57
|
Revision: 12702
http://sourceforge.net/p/foray/code/12702
Author: victormote
Date: 2022-07-04 18:17:54 +0000 (Mon, 04 Jul 2022)
Log Message:
-----------
Improvements to dictionaries and spell-checking.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml 2022-07-04 17:00:20 UTC (rev 12701)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-GBR.dict.xml 2022-07-04 18:17:54 UTC (rev 12702)
@@ -25,4 +25,12 @@
<w><t>la-bour-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mod-elled</t><verb/></w>
<w><t>mod-ell-ing</t><noun><singular/><convertible-to-possessive/></noun><verb/></w>
+<w><t>mould</t><noun><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
+<w><t>mould-board</t></w>
+<w><t>mould-er</t><verb><regular-root/></verb></w>
+<w><t>mould-i-er</t></w>
+<w><t>mould-ies</t></w>
+<w><t>mould-i-est</t></w>
+<w><t>mould-ing</t></w>
+<w><t>mould-y</t><adjective><extensible/></adjective></w>
</axsl-dictionary>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml 2022-07-04 17:00:20 UTC (rev 12701)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-USA.dict.xml 2022-07-04 18:17:54 UTC (rev 12702)
@@ -24,4 +24,12 @@
<w><t>la-bor-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mod-eled</t><verb/></w>
<w><t>mod-el-ing</t><noun><singular/><convertible-to-possessive/></noun><verb/></w>
+<w><t>mold</t><noun><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
+<w><t>mold-a-ble</t></w>
+<w><t>mold-board</t></w>
+<w><t>mold-er</t></w>
+<w><t>mold-i-ness</t></w>
+<w><t>mold-ing</t></w>
+<w><t>mold-warp</t></w>
+<w><t>mold-y</t><adjective><extensible/></adjective></w>
</axsl-dictionary>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2022-07-04 17:00:20 UTC (rev 12701)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ-archaic.dict.xml 2022-07-04 18:17:54 UTC (rev 12702)
@@ -8,11 +8,13 @@
soft-hyphen-char="-">
<w><t>be-hoof</t><noun/></w>
-<w><t>Car-tha-gen-i-an</t><noun><pluralizable/><convertible-to-possessive/></noun></w><!-- Carthaginian. -->
+<w><t>Car-tha-gen-i-an</t><noun><pluralizable/><convertible-to-possessive/></noun><comment>Carthaginian.</comment></w>
<w><t>hum-bleth</t></w>
-<w><t>Kal-a-bar</t></w><!-- Calabar -->
-<w><t>Kam-e-run</t></w><!-- Cameroon -->
-<w><t>Kam-e-runs</t></w><!-- Related to Cameroon -->
+<w><t>Jno</t><abbrev referenced-word="John"/></w>
+<w><t>Kal-a-bar</t><comment>Calabar</comment></w>
+<w><t>Kam-e-run</t><comment>Cameroon</comment></w>
+<w><t>Kam-e-runs</t><comment>Related to Cameroon</comment></w>
+<w><t>lo</t><interjection/><comment>Imperative of "look".</comment></w>
<w><t>lov-eth</t></w>
<w><t>oth-er-ways</t><adjective/><adverb/></w>
<w><t>per-suad-est</t><verb/></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2022-07-04 17:00:20 UTC (rev 12701)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/eng-Latn-ZZZ.dict.xml 2022-07-04 18:17:54 UTC (rev 12702)
@@ -101,7 +101,7 @@
<w><t>ab-a-lo-ne</t></w>
<w><t>ab-amp</t></w>
<w><t>ab-am-pere</t></w>
-<w><t>a-ban-don</t></w>
+<w><t>a-ban-don</t><noun/><verb><regular-root/></verb></w>
<w><t>a-ban-don-a-ble</t></w>
<w><t>a-ban-doned</t></w>
<w><t>a-ban-doned-ly</t></w>
@@ -901,7 +901,7 @@
<w><t>ac-com-pa-ny-ing</t></w>
<w><t>ac-com-pa-ny-ist</t></w>
<w><t>ac-com-plice</t></w>
-<w><t>ac-com-plish</t></w>
+<w><t>ac-com-plish</t><verb><regular-root/></verb></w>
<w><t>ac-com-plish-a-ble</t></w>
<w><t>ac-com-plished</t></w>
<w><t>ac-com-plish-er</t></w>
@@ -1934,7 +1934,7 @@
<w><t>ad-jur-er</t></w>
<w><t>ad-jur-ing</t></w>
<w><t>ad-ju-ror</t></w>
-<w><t>ad-just</t></w>
+<w><t>ad-just</t><verb><regular-root/></verb></w>
<w><t>ad-just-a-ble</t></w>
<w><t>ad-just-a-ble=pitch</t></w>
<w><t>ad-just-a-bly</t></w>
@@ -2664,7 +2664,7 @@
<w><t>af-flict</t><verb><regular-root/></verb></w>
<w><t>af-flict-ed-ness</t></w>
<w><t>af-flict-er</t></w>
-<w><t>af-flic-tion</t></w>
+<w><t>af-flic-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>af-flic-tion-less</t></w>
<w><t>af-flic-tive</t></w>
<w><t>af-flic-tive-ly</t></w>
@@ -2730,8 +2730,8 @@
<w><t>Af-ra-sian</t></w>
<w><t>af-reet</t></w>
<w><t>a-fresh</t></w>
-<w><t>Af-ric</t></w>
-<w><t>Af-ri-ca</t></w>
+<w><t>Af-ric</t><noun><convertible-to-possessive/></noun><adjective/></w>
+<w><t>Af-ri-ca</t><noun><convertible-to-possessive/></noun></w>
<w><t>Af-ri-can</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective><extensible value="false"/></adjective></w>
<w><t>Af-ri-can-der</t></w>
<w><t>Af-ri-can-der-ism</t></w>
@@ -5501,7 +5501,7 @@
<w><t>a-muse</t></w>
<w><t>a-mused</t></w>
<w><t>a-mus-ed-ly</t></w>
-<w><t>a-muse-ment</t></w>
+<w><t>a-muse-ment</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>a-muse-ment ar-cade</t></phrase>
<phrase><t>a-muse-ment park</t></phrase>
<w><t>a-mus-er</t></w>
@@ -5955,7 +5955,7 @@
<w><t>An-der-lecht</t></w>
<w><t>An-der-sen</t></w>
<phrase><t>An-der-sen Nex-o</t></phrase>
-<w><t>An-der-son</t></w>
+<w><t>An-der-son</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>An-der-son-ville</t></w>
<w><t>An-ders-sen</t></w>
<w><t>An-des</t></w>
@@ -13804,7 +13804,7 @@
<w><t>bas-i-lis-cine</t></w>
<w><t>bas-i-lisk</t></w>
<w><t>Ba-sil-i-us</t></w>
-<w><t>ba-sin</t></w>
+<w><t>ba-sin</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ba-sined</t></w>
<w><t>bas-i-net</t></w>
<w><t>bas-ing</t></w>
@@ -14270,7 +14270,7 @@
<w><t>beam-like</t></w>
<phrase><t>beam rid-ing</t></phrase>
<w><t>beam-y</t></w>
-<w><t>bean</t></w>
+<w><t>bean</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>bean-bag</t></w>
<phrase><t>bean ca-per</t></phrase>
<w><t>bean-er-ies</t></w>
@@ -14630,7 +14630,7 @@
<w><t>bee-stride</t></w>
<w><t>bees-wax</t></w>
<w><t>bees-wing</t></w>
-<w><t>beet</t></w>
+<w><t>beet</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>beet-fly</t></w>
<w><t>Bee-tho-ven</t></w>
<w><t>bee-tle</t></w>
@@ -15280,7 +15280,7 @@
<w><t>Ber-dya-yev</t></w>
<w><t>Be-re-a</t></w>
<w><t>Be-re-an</t></w>
-<w><t>be-reave</t></w>
+<w><t>be-reave</t><verb><regular-root/></verb></w>
<w><t>be-reave-ment</t></w>
<w><t>be-reav-er</t></w>
<w><t>be-reav-ing</t></w>
@@ -17399,7 +17399,7 @@
<w><t>blous-on</t></w>
<w><t>blou-son</t></w>
<w><t>blous-y</t></w>
-<w><t>blow</t></w>
+<w><t>blow</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root value="false"/></verb></w>
<w><t>B-low</t></w>
<w><t>blow-ball</t></w>
<w><t>blow-er</t></w>
@@ -17426,6 +17426,7 @@
<w><t>blow-out</t></w>
<phrase><t>blow o-ver</t></phrase>
<w><t>blow-pipe</t></w>
+<w><t>blows</t><verb><regular-root value="false"/></verb></w>
<w><t>blows-i-er</t></w>
<w><t>blows-i-est</t></w>
<w><t>blows-i-ly</t></w>
@@ -17613,7 +17614,7 @@
<w><t>boart</t></w>
<w><t>Bo-as</t></w>
<w><t>bo-as</t></w>
-<w><t>boast</t></w>
+<w><t>boast</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>boast-er</t></w>
<w><t>boast-ful</t></w>
<w><t>boast-ful-ly</t></w>
@@ -23019,7 +23020,7 @@
<w><t>cap-ti-va-tion</t></w>
<w><t>cap-ti-va-tive</t></w>
<w><t>cap-ti-va-tor</t></w>
-<w><t>cap-tive</t></w>
+<w><t>cap-tive</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>cap-tiv-i-ty</t></w>
<w><t>cap-tor</t></w>
<w><t>cap-tur-a-ble</t></w>
@@ -23596,7 +23597,7 @@
<w><t>car-pel-late</t></w>
<w><t>Car-pen-tar-i-a</t></w>
<w><t>Car-pen-ta-ri-a</t></w>
-<w><t>car-pen-ter</t></w>
+<w><t>car-pen-ter</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Car-pen-ter</t></w>
<phrase><t>car-pen-ter bee</t></phrase>
<w><t>car-pen-ter-ing</t></w>
@@ -26476,7 +26477,7 @@
<w><t>chev-ro-tain</t></w>
<w><t>chev-y</t></w>
<w><t>chev-y-ing</t></w>
-<w><t>chew</t></w>
+<w><t>chew</t><verb><regular-root/></verb></w>
<w><t>Che-wa</t></w>
<w><t>chew-a-ble</t></w>
<w><t>chew-er</t></w>
@@ -26662,7 +26663,7 @@
<w><t>chil-i-pep-per</t></w>
<w><t>Chil-kat</t></w>
<phrase><t>Chil-koot Pass</t></phrase>
-<w><t>chill</t></w>
+<w><t>chill</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>Chi-ll</t></w>
<w><t>Chi-llán</t></w>
<w><t>chil-ler</t></w>
@@ -27150,11 +27151,13 @@
<w><t>choom</t></w>
<w><t>choo-ra</t></w>
<w><t>choos-a-ble</t></w>
-<w><t>choose</t></w>
+<w><t>choose</t><verb><regular-root value="false"/></verb></w>
<w><t>choos-er</t></w>
+<w><t>chooses</t><verb><regular-root value="false"/></verb></w>
<w><t>choos-ey</t></w>
<w><t>choos-i-er</t></w>
<w><t>choos-i-est</t></w>
+<w><t>choos-ing</t><verb><regular-root value="false"/></verb></w>
<w><t>choos-ing-ly</t></w>
<w><t>choos-y</t></w>
<w><t>chop</t></w>
@@ -27268,7 +27271,7 @@
<w><t>cho-rus-mas-ter</t></w>
<w><t>Chor-z</t></w>
<w><t>Cho-rzów</t></w>
-<w><t>chose</t></w>
+<w><t>chose</t><verb><regular-root value="false"/></verb></w>
<w><t>cho-sen</t></w>
<w><t>Cho-sen</t></w>
<phrase><t>cho-sen peo-ple</t></phrase>
@@ -28334,7 +28337,7 @@
<w><t>clam-mi-ness</t></w>
<w><t>clam-ming</t></w>
<w><t>clam-my</t></w>
-<w><t>clam-or</t></w>
+<w><t>clam-or</t><noun><pluralizable/></noun><verb><regular-root/></verb></w>
<w><t>clam-or-er</t></w>
<w><t>clam-or-ist</t></w>
<w><t>clam-or-ous</t></w>
@@ -29981,7 +29984,7 @@
<w><t>col-la-tion</t></w>
<w><t>col-la-tive</t></w>
<w><t>col-la-tor</t></w>
-<w><t>col-league</t></w>
+<w><t>col-league</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>col-league-ship</t></w>
<w><t>col-lect</t></w>
<w><t>col-lect-a-bil-i-ty</t></w>
@@ -31251,7 +31254,7 @@
<w><t>con-ca-vo=con-cave</t></w>
<w><t>con-cav-o=con-vex</t></w>
<w><t>con-ca-vo=con-vex</t></w>
-<w><t>con-ceal</t></w>
+<w><t>con-ceal</t><verb><regular-root/></verb></w>
<w><t>con-ceal-a-ble</t></w>
<w><t>con-ceal-ed-ly</t></w>
<w><t>con-ceal-ed-ness</t></w>
@@ -31704,7 +31707,7 @@
<w><t>con-fine-ment</t></w>
<w><t>con-fin-er</t></w>
<w><t>con-fin-ing</t></w>
-<w><t>con-firm</t></w>
+<w><t>con-firm</t><verb><regular-root/></verb></w>
<w><t>con-firm-a-ble</t></w>
<w><t>con-firm-and</t></w>
<w><t>con-fir-mand</t></w>
@@ -34139,7 +34142,7 @@
<w><t>co-type</t></w>
<w><t>Cot-ys</t></w>
<w><t>cou-cal</t></w>
-<w><t>couch</t></w>
+<w><t>couch</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>couch-ant</t></w>
<w><t>cou-chant</t></w>
<w><t>cou-ché</t></w>
@@ -34378,7 +34381,8 @@
<w><t>coun-try-fied-ness</t></w>
<w><t>coun-try-folk</t></w>
<phrase><t>coun-try gen-tle-man</t></phrase>
-<w><t>coun-try-man</t></w>
+<w><t>coun-try-man</t><noun><singular/></noun></w>
+<w><t>coun-try-men</t><noun><plural/></noun></w>
<w><t>coun-try-peo-ple</t></w>
<phrase><t>coun-try rock</t></phrase>
<w><t>coun-try-seat</t></w>
@@ -34386,8 +34390,8 @@
<w><t>coun-try-side</t></w>
<w><t>coun-try-wide</t></w>
<w><t>coun-try=wide</t></w>
-<w><t>coun-try-wom-an</t></w>
-<w><t>coun-try-wom-en</t></w>
+<w><t>coun-try-wom-an</t><noun><singular/></noun></w>
+<w><t>coun-try-wom-en</t><noun><plural/></noun></w>
<w><t>count-ship</t></w>
<w><t>coun-ty</t></w>
<phrase><t>coun-ty bor-ough</t></phrase>
@@ -34448,7 +34452,7 @@
<w><t>cour-te-ous-ly</t></w>
<w><t>cour-te-ous-ness</t></w>
<w><t>cour-te-san</t></w>
-<w><t>cour-te-sy</t></w>
+<w><t>cour-te-sy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>cour-te-sy light</t></phrase>
<phrase><t>cour-te-sy ti-tle</t></phrase>
<w><t>cour-te-zan</t></w>
@@ -34483,7 +34487,7 @@
<phrase><t>court ten-nis</t></phrase>
<w><t>court-yard</t></w>
<w><t>cous-cous</t></w>
-<w><t>cous-in</t></w>
+<w><t>cous-in</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Cou-sin</t></w>
<w><t>cous-in-age</t></w>
<w><t>cous-in=ger-man</t></w>
@@ -35295,7 +35299,7 @@
<w><t>crime-less</t></w>
<w><t>crime-less-ness</t></w>
<phrase><t>crime pas-sio-nel</t></phrase>
-<w><t>crim-i-nal</t></w>
+<w><t>crim-i-nal</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>crim-i-nal con-ver-sa-tion</t></phrase>
<w><t>crim-i-nal-ist</t></w>
<w><t>crim-i-nal-is-tics</t></w>
@@ -36136,7 +36140,7 @@
<w><t>cu-cul-late</t></w>
<w><t>cu-cul-lat-ed</t></w>
<w><t>cu-cul-late-ly</t></w>
-<w><t>cu-cum-ber</t></w>
+<w><t>cu-cum-ber</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>cu-cum-ber tree</t></phrase>
<w><t>cu-cu-mi-form</t></w>
<w><t>cu-cur-bit</t></w>
@@ -37857,7 +37861,7 @@
<w><t>Dav-en-port</t></w>
<w><t>dav-en-port</t></w>
<w><t>Dav-ey</t></w>
-<w><t>Da-vid</t></w>
+<w><t>Da-vid</t><noun><convertible-to-possessive/></noun></w>
<w><t>Da-vi-da</t></w>
<phrase><t>Da-vid I</t></phrase>
<w><t>Da-vid-ic</t></w>
@@ -37885,7 +37889,7 @@
<w><t>Dawes</t></w>
<w><t>dawk</t></w>
<w><t>Dawn</t></w>
-<w><t>dawn</t></w>
+<w><t>dawn</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>dawn cho-rus</t></phrase>
<w><t>dawn-like</t></w>
<phrase><t>dawn red-wood</t></phrase>
@@ -38022,8 +38026,8 @@
<w><t>de-aer-at-ing</t></w>
<w><t>de-aer-a-tion</t></w>
<w><t>de-aer-a-tor</t></w>
-<w><t>deaf</t></w>
-<w><t>deaf-en</t></w>
+<w><t>deaf</t><adjective><extensible/></adjective></w>
+<w><t>deaf-en</t><verb><regular-root/></verb></w>
<w><t>deaf-en-ing</t></w>
<w><t>deaf-en-ing-ly</t></w>
<w><t>deaf-ly</t></w>
@@ -38035,7 +38039,7 @@
<w><t>de-a-late</t></w>
<w><t>de-a-lat-ed</t></w>
<w><t>de-a-la-tion</t></w>
-<w><t>deal-er</t></w>
+<w><t>deal-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>deal-er-ship</t></w>
<w><t>deal-fish</t></w>
<w><t>deal-fish-es</t></w>
@@ -38208,7 +38212,7 @@
<w><t>dec</t></w>
<w><t>dec-a-dal</t></w>
<w><t>dec-a-dal-ly</t></w>
-<w><t>dec-ade</t></w>
+<w><t>dec-ade</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dec-a-dence</t></w>
<w><t>dec-a-den-cy</t></w>
<w><t>dec-a-dent</t></w>
@@ -38834,7 +38838,7 @@
<w><t>de-fence-less-ly</t></w>
<w><t>de-fence-less-ness</t></w>
<phrase><t>de-fence mech-an-ism</t></phrase>
-<w><t>de-fend</t></w>
+<w><t>de-fend</t><verb><regular-root/></verb></w>
<w><t>de-fend-a-ble</t></w>
<w><t>de-fend-ant</t></w>
<w><t>de-fend-er</t></w>
@@ -39290,7 +39294,7 @@
<w><t>de-lib-er-a-tive-ness</t></w>
<w><t>de-lib-er-a-tor</t></w>
<w><t>De-libes</t></w>
-<w><t>del-i-ca-cy</t></w>
+<w><t>del-i-ca-cy</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>del-i-cate</t></w>
<w><t>del-i-cate-ly</t></w>
<w><t>del-i-cate-ness</t></w>
@@ -40150,7 +40154,7 @@
<w><t>de-posed</t></w>
<w><t>de-pos-er</t></w>
<w><t>de-pos-ing</t></w>
-<w><t>de-pos-it</t></w>
+<w><t>de-pos-it</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>de-pos-it ac-count</t></phrase>
<w><t>de-pos-i-tar-ies</t></w>
<w><t>de-pos-i-tar-y</t></w>
@@ -40516,7 +40520,7 @@
<w><t>de-sid-er-a-tum</t></w>
<w><t>Des-i-der-i-i</t></w>
<w><t>Des-i-de-ri-us</t></w>
-<w><t>de-sign</t></w>
+<w><t>de-sign</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>des-ig-nate</t></w>
<w><t>des-ig-nat-ed</t></w>
<w><t>des-ig-nat-ing</t></w>
@@ -43169,7 +43173,7 @@
<w><t>dis-pen-sa-ble</t></w>
<w><t>dis-pen-sa-ble-ness</t></w>
<w><t>dis-pen-sa-ry</t></w>
-<w><t>dis-pen-sa-tion</t></w>
+<w><t>dis-pen-sa-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>dis-pen-sa-tion-al</t></w>
<w><t>dis-pen-sa-tion-al-ism</t></w>
<w><t>dis-pen-sa-tor</t></w>
@@ -44035,7 +44039,7 @@
<w><t>dock=wal-lop-ing</t></w>
<w><t>dock-yard</t></w>
<w><t>doc-o-sa-no-ic</t></w>
-<w><t>doc-tor</t></w>
+<w><t>doc-tor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>doc-tor-al</t></w>
<w><t>doc-tor-al-ly</t></w>
<w><t>doc-tor-ate</t></w>
@@ -47300,7 +47304,7 @@
<w><t>ef-fused</t></w>
<w><t>ef-fus-ing</t></w>
<w><t>ef-fu-si-om-e-ter</t></w>
-<w><t>ef-fu-sion</t></w>
+<w><t>ef-fu-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ef-fu-sive</t></w>
<w><t>ef-fu-sive-ly</t></w>
<w><t>ef-fu-sive-ness</t></w>
@@ -48373,7 +48377,7 @@
<w><t>em-bod-y</t></w>
<w><t>em-bod-y-ing</t></w>
<w><t>em-boî-té</t></w>
-<w><t>em-bold-en</t></w>
+<w><t>em-bold-en</t><verb><regular-root/></verb></w>
<w><t>em-bo-lec-to-my</t></w>
<w><t>em-bol-ic</t></w>
<w><t>em-bo-lism</t></w>
@@ -49607,7 +49611,7 @@
<w><t>ENT</t></w>
<w><t>en-tab-la-ture</t></w>
<w><t>en-ta-ble-ment</t></w>
-<w><t>en-tail</t></w>
+<w><t>en-tail</t><verb><regular-root/></verb></w>
<w><t>en-tail-er</t></w>
<w><t>en-tail-ment</t></w>
<w><t>en-ta-moe-ba</t></w>
@@ -52170,8 +52174,8 @@
<w><t>ex-ceed-er</t></w>
<w><t>ex-ceed-ing</t></w>
<w><t>ex-ceed-ing-ly</t></w>
-<w><t>ex-cel</t></w>
-<w><t>ex-celled</t></w>
+<w><t>ex-cel</t><verb><regular-root value="false"/></verb></w>
+<w><t>ex-celled</t><verb><regular-root value="false"/></verb></w>
<w><t>Ex-cel-lence</t></w>
<w><t>ex-cel-lence</t></w>
<w><t>ex-cel-len-cy</t></w>
@@ -52178,7 +52182,8 @@
<w><t>Ex-cel-len-cy</t></w>
<w><t>ex-cel-lent</t></w>
<w><t>ex-cel-lent-ly</t></w>
-<w><t>ex-cel-ling</t></w>
+<w><t>ex-cel-ling</t><verb><regular-root value="false"/></verb></w>
+<w><t>ex-cels</t><verb><regular-root value="false"/></verb></w>
<w><t>ex-cel-si-or</t></w>
<w><t>ex-cen-tric</t></w>
<w><t>ex-cep-a-ble</t></w>
@@ -52243,7 +52248,7 @@
<w><t>ex-ci-ta-tion</t></w>
<w><t>ex-cit-a-tive</t></w>
<w><t>ex-cit-a-to-ry</t></w>
-<w><t>ex-cite</t></w>
+<w><t>ex-cite</t><verb><regular-root/></verb></w>
<w><t>ex-cit-ed</t></w>
<w><t>ex-cit-ed-ly</t></w>
<w><t>ex-cit-ed-ness</t></w>
@@ -52453,7 +52458,7 @@
<w><t>ex-e-quies</t></w>
<w><t>ex-e-quy</t></w>
<w><t>ex-er-cis-a-ble</t></w>
-<w><t>ex-er-cise</t></w>
+<w><t>ex-er-cise</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>ex-er-cised</t></w>
<w><t>ex-er-cis-er</t></w>
<w><t>ex-er-cis-ing</t></w>
@@ -55428,7 +55433,7 @@
<w><t>filch</t></w>
<w><t>filch-er</t></w>
<w><t>filch-ing-ly</t></w>
-<w><t>file</t></w>
+<w><t>file</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>fi-lé</t></w>
<w><t>file-card</t></w>
<w><t>file-fish</t></w>
@@ -57869,7 +57874,7 @@
<w><t>for-sworn-ness</t></w>
<w><t>For-syth</t></w>
<w><t>for-syth-i-a</t></w>
-<w><t>fort</t></w>
+<w><t>fort</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>For-ta-le-za</t></w>
<w><t>for-ta-lice</t></w>
<w><t>For-tas</t></w>
@@ -58253,7 +58258,7 @@
<w><t>fram-boe-sia</t></w>
<w><t>fram-boe-si-a</t></w>
<w><t>fram-boise</t></w>
-<w><t>frame</t></w>
+<w><t>frame</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>frame-a-ble</t></w>
<w><t>frame-a-ble-ness</t></w>
<phrase><t>frame aer-i-al</t></phrase>
@@ -59214,7 +59219,7 @@
<w><t>Fug-ger</t></w>
<w><t>fu-ghet-ta</t></w>
<w><t>fu-gi-o</t></w>
-<w><t>fu-gi-tive</t></w>
+<w><t>fu-gi-tive</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>fu-gi-tive-ly</t></w>
<w><t>fu-gi-tive-ness</t></w>
<w><t>fu-gi-tiv-i-ty</t></w>
@@ -63390,7 +63395,7 @@
<w><t>go-shen-ite</t></w>
<w><t>gos-ling</t></w>
<w><t>gos-more</t></w>
-<w><t>gos-pel</t></w>
+<w><t>gos-pel</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Gos-pel</t></w>
<w><t>gos-pel-er</t></w>
<w><t>gos-pel-ler</t></w>
@@ -67914,7 +67919,7 @@
<w><t>heaume</t></w>
<w><t>heave</t><verb><regular-root/></verb></w>
<w><t>heave-less</t></w>
-<w><t>heav-en</t></w>
+<w><t>heav-en</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>heav-en=born</t></w>
<w><t>heav-en-less</t></w>
<w><t>heav-en-li-ness</t></w>
@@ -71362,7 +71367,7 @@
<w><t>Hou-phouet=Boi-gny</t></w>
<w><t>Hou-phou-et=Boi-gny</t></w>
<w><t>houppe-lande</t></w>
-<w><t>hour</t></w>
+<w><t>hour</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>hour an-gle</t></phrase>
<phrase><t>hour cir-cle</t></phrase>
<w><t>hour-glass</t></w>
@@ -71374,7 +71379,7 @@
<w><t>hours</t></w>
<w><t>Hou-sa-ton-ic</t></w>
<w><t>House</t></w>
-<w><t>house</t></w>
+<w><t>house</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<phrase><t>house a-gent</t></phrase>
<phrase><t>house ar-rest</t></phrase>
<w><t>house-boat</t></w>
@@ -71717,7 +71722,7 @@
<w><t>Hum-ber</t></w>
<w><t>Hum-ber-side</t></w>
<w><t>Hum-bert</t></w>
-<w><t>hum-ble</t></w>
+<w><t>hum-ble</t><verb><regular-root/></verb><adjective><extensible/></adjective></w>
<w><t>hum-ble-bee</t></w>
<w><t>hum-ble-ness</t></w>
<phrase><t>hum-ble pie</t></phrase>
@@ -80830,7 +80835,7 @@
<phrase><t>Ja-na Sangh</t></phrase>
<w><t>Ja-na-ta</t></w>
<w><t>jane</t></w>
-<w><t>Jane</t></w>
+<w><t>Jane</t><noun><convertible-to-possessive/></noun></w>
<w><t>Janes-ville</t></w>
<w><t>Ja-net</t></w>
<w><t>Ja-net-ta</t></w>
@@ -81192,7 +81197,7 @@
<w><t>jer-build-ing</t></w>
<w><t>jer-built</t></w>
<w><t>je-reed</t></w>
-<w><t>jer-e-mi-ad</t></w>
+<w><t>jer-e-mi-ad</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Jer-e-mi-ah</t></w>
<w><t>Jer-e-mi-as</t></w>
<w><t>Jé-ré-mie</t></w>
@@ -81575,7 +81580,7 @@
<w><t>Jo-han-nes-burg</t></w>
<w><t>Jo-han-nine</t></w>
<w><t>Jo-han-nis-berg-er</t></w>
-<w><t>John</t></w>
+<w><t>John</t><noun><convertible-to-possessive/></noun></w>
<w><t>john</t></w>
<phrase><t>John Bar-ley-corn</t></phrase>
<phrase><t>John Birch So-ci-e-ty</t></phrase>
@@ -83494,7 +83499,7 @@
<w><t>kind-li-er</t></w>
<w><t>kind-li-est</t></w>
<w><t>kind-li-ness</t></w>
-<w><t>kin-dling</t></w>
+<w><t>kin-dling</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>kind-ly</t></w>
<w><t>kind-ness</t></w>
<w><t>kin-dred</t></w>
@@ -86227,7 +86232,7 @@
<w><t>leach-i-est</t></w>
<w><t>leach-y</t></w>
<w><t>Lea-cock</t></w>
-<w><t>lead</t></w>
+<w><t>lead</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root value="false"/></verb></w>
<w><t>Lead</t></w>
<phrase><t>lead ac-e-tate</t></phrase>
<phrase><t>lead ar-se-nate</t></phrase>
@@ -86259,6 +86264,7 @@
<phrase><t>lead pen-cil</t></phrase>
<w><t>lead-plant</t></w>
<phrase><t>lead poi-son-ing</t></phrase>
+<w><t>leads</t><verb><regular-root value="false"/></verb></w>
<w><t>leads-man</t></w>
<phrase><t>lead tet-ra-e-thyl</t></phrase>
<w><t>Lead-ville</t></w>
@@ -87749,7 +87755,7 @@
<w><t>lik-a-ble</t></w>
<w><t>lik-a-ble-ness</t></w>
<w><t>Li-ka-si</t></w>
-<w><t>like</t></w>
+<w><t>like</t><verb><regular-root/></verb></w>
<w><t>like-a-bil-i-ty</t></w>
<w><t>like-a-ble</t></w>
<w><t>like-a-ble-ness</t></w>
@@ -89224,7 +89230,7 @@
<w><t>loop-i-est</t></w>
<w><t>loop-y</t></w>
<w><t>Loos</t></w>
-<w><t>loose</t></w>
+<w><t>loose</t><verb><regular-root/></verb><adjective><extensible/></adjective></w>
<w><t>loose-box</t></w>
<phrase><t>loose cov-er</t></phrase>
<w><t>loose=fit-ting</t></w>
@@ -91071,7 +91077,7 @@
<w><t>mai-gre</t></w>
<w><t>mai-hem</t></w>
<w><t>Mai-kop</t></w>
-<w><t>mail</t></w>
+<w><t>mail</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mail-a-bil-i-ty</t></w>
<w><t>mail-a-ble</t></w>
<w><t>mail-bag</t></w>
@@ -91148,7 +91154,7 @@
<w><t>ma-jes-tic</t></w>
<w><t>ma-jes-ti-cal</t></w>
<w><t>ma-jes-ti-cal-ly</t></w>
-<w><t>maj-es-ty</t></w>
+<w><t>maj-es-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Maj-es-ty</t></w>
<w><t>Maj-lis</t></w>
<w><t>ma-jol-i-ca</t></w>
@@ -91632,7 +91638,7 @@
<w><t>man-dy-as</t></w>
<w><t>man-dy-as-es</t></w>
<w><t>mane</t></w>
-<w><t>man=eat-er</t></w>
+<w><t>man=eat-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>man=eat-ing</t></w>
<w><t>maned</t></w>
<w><t>mane-less</t></w>
@@ -91846,7 +91852,7 @@
<w><t>man-ser-vant</t></w>
<w><t>Mans-field</t></w>
<w><t>Mans-holt</t></w>
-<w><t>man-sion</t></w>
+<w><t>man-sion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Man-sion House</t></phrase>
<w><t>man-slaugh-ter</t></w>
<w><t>man-slay-er</t></w>
@@ -92492,7 +92498,7 @@
<w><t>Mar-tin-ville</t></w>
<w><t>mart-let</t></w>
<w><t>mart-net</t></w>
-<w><t>mar-tyr</t></w>
+<w><t>mar-tyr</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mar-tyr-dom</t></w>
<w><t>mar-tyr-i-a</t></w>
<w><t>mar-tyr-ies</t></w>
@@ -93530,7 +93536,7 @@
<w><t>me-dic-i-na-ble</t></w>
<w><t>me-dic-i-nal</t></w>
<phrase><t>me-dic-i-nal leech</t></phrase>
-<w><t>med-i-cine</t></w>
+<w><t>med-i-cine</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>med-i-cine ball</t></phrase>
<phrase><t>med-i-cine chest</t></phrase>
<phrase><t>med-i-cine lodge</t></phrase>
@@ -93878,7 +93884,7 @@
<w><t>mel-o-dy-less</t></w>
<w><t>mel-oid</t></w>
<w><t>mel-o-lon-thine</t></w>
-<w><t>mel-on</t></w>
+<w><t>mel-on</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>mel-on=bulb</t></w>
<w><t>mel-os</t></w>
<w><t>Me-los</t></w>
@@ -93936,7 +93942,7 @@
<w><t>mem-o-ra-ble-ness</t></w>
<w><t>mem-o-ra-bly</t></w>
<w><t>mem-o-ran-dum</t></w>
-<w><t>me-mo-ri-al</t></w>
+<w><t>me-mo-ri-al</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>Me-mo-ri-al Day</t></phrase>
<w><t>me-mo-ri-al-ise</t></w>
<w><t>me-mo-ri-al-ised</t></w>
@@ -95522,7 +95528,7 @@
<w><t>mi-li-tia</t></w>
<w><t>mi-li-tia-man</t></w>
<w><t>mil-i-um</t></w>
-<w><t>milk</t></w>
+<w><t>milk</t><noun/><verb><regular-root/></verb></w>
<w><t>milk=and=wa-ter</t></w>
<phrase><t>milk choc-o-late</t></phrase>
<w><t>milk-er</t></w>
@@ -97148,19 +97154,10 @@
<w><t>mo-lar</t></w>
<w><t>mo-lar-i-ty</t></w>
<w><t>mo-las-ses</t></w>
-<w><t>mold</t></w>
-<w><t>mold-a-ble</t></w>
<w><t>Mol-dau</t></w>
<w><t>Mol-da-vi-a</t></w>
<w><t>Mol-da-vi-an</t></w>
<w><t>mol-da-vite</t></w>
-<w><t>mold-board</t></w>
-<w><t>mold-er</t></w>
-<w><t>mold-i-ness</t></w>
-<w><t>mold-ing</t></w>
-<w><t>mold-warp</t></w>
-<w><t>moldy</t></w>
-<w><t>mold-y</t></w>
<w><t>mole</t></w>
<w><t>Mo-lech</t></w>
<phrase><t>mole crick-et</t></phrase>
@@ -98371,13 +98368,6 @@
<w><t>mou-jik</t></w>
<w><t>Mouk-den</t></w>
<w><t>mou-lage</t></w>
-<w><t>mould</t></w>
-<w><t>mould-board</t></w>
-<w><t>mould-er</t><verb><regular-root/></verb></w>
-<w><t>mould-i-er</t></w>
-<w><t>mould-ies</t></w>
-<w><t>mould-i-est</t></w>
-<w><t>mould-ing</t></w>
<phrase><t>mould-ing board</t></phrase>
<w><t>mould-warp</t></w>
<w><t>mould-y</t></w>
@@ -98431,7 +98421,7 @@
<w><t>mount-ing=block</t></w>
<phrase><t>Mount I-sa</t></phrase>
<w><t>Mount-y</t></w>
-<w><t>mourn</t></w>
+<w><t>mourn</t><verb><regular-root/></verb></w>
<w><t>mourn-er</t></w>
<w><t>mourn-ful</t></w>
<w><t>mourn-ing</t></w>
@@ -99818,7 +99808,7 @@
<w><t>nam-by=pam-by</t></w>
<w><t>nam-by=pam-by-ish</t></w>
<w><t>nam-by=pam-by-ism</t></w>
-<w><t>name</t><noun><pluralizable/></noun></w>
+<w><t>name</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>name-a-bil-i-ty</t></w>
<w><t>name=call-er</t></w>
<w><t>name=call-ing</t></w>
@@ -108213,7 +108203,7 @@
<phrase><t>ob-ject ball</t></phrase>
<phrase><t>ob-ject glass</t></phrase>
<w><t>ob-jec-ti-fy</t></w>
-<w><t>ob-jec-tion</t></w>
+<w><t>ob-jec-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>ob-jec-tion-a-bil-i-ty</t></w>
<w><t>ob-jec-tion-a-ble</t></w>
<w><t>ob-jec-tion-a-ble-ness</t></w>
@@ -109419,7 +109409,7 @@
<w><t>on-i-cism</t></w>
<w><t>o-ni-o-ma-ni-a</t></w>
<w><t>o-ni-o-ma-ni-ac</t></w>
-<w><t>on-ion</t></w>
+<w><t>on-ion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>on-ion dome</t></phrase>
<w><t>on-ion-like</t></w>
<w><t>On-ions</t></w>
@@ -109633,7 +109623,7 @@
<phrase><t>o-pen=heart sur-ger-y</t></phrase>
<phrase><t>o-pen house</t></phrase>
<w><t>o-pen=hous-ing</t></w>
-<w><t>o-pen-ing</t></w>
+<w><t>o-pen-ing</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>o-pen-ing time</t></phrase>
<phrase><t>o-pen let-ter</t></phrase>
<w><t>o-pen=let-ter</t></w>
@@ -109646,6 +109636,7 @@
<w><t>o-pen=mouthed</t></w>
<w><t>o-pen=mouth-ed-ly</t></w>
<w><t>o-pen=mouth-ed-ness</t></w>
+<w><t>o-pen-ness</t><noun/></w>
<phrase><t>o-pen or-der</t></phrase>
<w><t>o-pen=plan</t></w>
<phrase><t>o-pen pol-i-cy</t></phrase>
@@ -113646,12 +113637,15 @@
<w><t>o-ver-thick</t></w>
<w><t>o-ver-thin</t></w>
<w><t>o-ver-thought-ful</t></w>
+<w><t>o-ver-threw</t><verb><regular-root value="false"/></verb></w>
<w><t>o-ver-thrift-i-ly</t></w>
<w><t>o-ver-thrift-i-ness</t></w>
<w><t>o-ver-thrift-y</t></w>
<w><t>o-ver-throng</t></w>
-<w><t>o-ver-throw</t></w>
+<w><t>o-ver-throw</t><verb><regular-root value="false"/></verb></w>
<w><t>o-ver-throw-er</t></w>
+<w><t>o-ver-thrown</t><verb><regular-root value="false"/></verb></w>
+<w><t>o-ver-throws</t><verb><regular-root value="false"/></verb></w>
<w><t>o-ver-thrust</t></w>
<w><t>o-ver-tight</t></w>
<w><t>o-ver-tim-bered</t></w>
@@ -113843,7 +113837,7 @@
<w><t>owl-ish-ly</t></w>
<w><t>owl-ish-ness</t></w>
<w><t>owl-like</t></w>
-<w><t>own</t></w>
+<w><t>own</t><verb><regular-root/></verb></w>
<w><t>own-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>own-er-ship</t></w>
<w><t>O-wos-so</t></w>
@@ -115540,7 +115534,7 @@
<w><t>par-i-syl-lab-ic</t></w>
<w><t>par-i-ty</t></w>
<phrase><t>par-i-ty check</t></phrase>
-<w><t>park</t></w>
+<w><t>park</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>Park</t></w>
<w><t>par-ka</t></w>
<w><t>Par-ker</t></w>
@@ -116458,7 +116452,7 @@
<w><t>PBX</t></w>
<w><t>P=Celt-ic</t></w>
<w><t>pct</t></w>
-<w><t>pea</t></w>
+<w><t>pea</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Pea-bod-y</t></w>
<w><t>peace</t></w>
<w><t>peace-a-ble</t></w>
@@ -118000,7 +117994,7 @@
<w><t>per-se-cut-ed</t></w>
<w><t>per-se-cut-ing</t></w>
<w><t>per-se-cut-ing-ly</t></w>
-<w><t>per-se-cu-tion</t></w>
+<w><t>per-se-cu-tion</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>per-se-cu-tion-al</t></w>
<phrase><t>per-se-cu-tion com-plex</t></phrase>
<w><t>per-se-cu-tive</t></w>
@@ -118589,7 +118583,7 @@
<w><t>phar-yn-gos-co-py</t></w>
<w><t>phar-yn-got-o-my</t></w>
<w><t>phar-ynx</t></w>
-<w><t>phase</t></w>
+<w><t>phase</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>pha-se-al</t></w>
<w><t>phase-less</t></w>
<phrase><t>phase mod-u-la-tion</t></phrase>
@@ -122265,7 +122259,7 @@
<w><t>Por-se-na</t></w>
<w><t>Por-sen-na</t></w>
<w><t>Por-son</t></w>
-<w><t>port</t></w>
+<w><t>port</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Port</t></w>
<w><t>port-a-ble</t></w>
<phrase><t>Port Ad-e-laide</t></phrase>
@@ -126221,7 +126215,7 @@
<w><t>pre-serv-a-ble</t></w>
<w><t>pres-er-va-tion</t></w>
<w><t>pre-serv-a-tive</t></w>
-<w><t>pre-serve</t></w>
+<w><t>pre-serve</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>pre-served</t></w>
<w><t>pre-serv-er</t></w>
<w><t>pre-serv-ing</t></w>
@@ -131373,7 +131367,7 @@
<w><t>queer</t></w>
<w><t>queer=bash-ing</t></w>
<w><t>Quel-i-ma-ne</t></w>
-<w><t>quell</t></w>
+<w><t>quell</t><verb><regular-root/></verb></w>
<w><t>quell-a-ble</t></w>
<w><t>quell-er</t></w>
<w><t>Quel-part</t></w>
@@ -131566,7 +131560,7 @@
<w><t>quin-que-va-lence</t></w>
<w><t>quin-que-va-lent</t></w>
<w><t>quin-sied</t></w>
-<w><t>quin-sy</t></w>
+<w><t>quin-sy</t><noun/></w>
<w><t>quint</t></w>
<w><t>quin-tain</t></w>
<w><t>quin-tal</t></w>
@@ -131597,6 +131591,7 @@
<w><t>Quin-tus</t></w>
<w><t>qui-nua</t></w>
<w><t>quinze</t></w>
+<w><t>quin-zy</t><noun/></w>
<w><t>quip</t></w>
<w><t>quip-ping</t></w>
<w><t>quip-pish</t></w>
@@ -133334,7 +133329,7 @@
<w><t>re-ca-les-cence</t></w>
<w><t>re-ca-lesc-ing</t></w>
<w><t>re-calk</t></w>
-<w><t>re-call</t></w>
+<w><t>re-call</t><verb><regular-root/></verb></w>
<w><t>re-call-a-ble</t></w>
<w><t>Re-ca-mier</t></w>
<w><t>Ré-ca-mier</t></w>
@@ -133798,7 +133793,7 @@
<w><t>re-coup-a-ble</t></w>
<w><t>re-coup-ment</t></w>
<w><t>re-course</t></w>
-<w><t>re-cov-er</t></w>
+<w><t>re-cov-er</t><verb><regular-root/></verb></w>
<w><t>re=cov-er</t></w>
<w><t>re-cov-er-a-ble</t></w>
<w><t>re-cov-er-a-ble-ness</t></w>
@@ -135505,7 +135500,7 @@
<w><t>re-lanc-ing</t></w>
<w><t>re-land</t></w>
<w><t>re-laps-a-ble</t></w>
-<w><t>re-lapse</t></w>
+<w><t>re-lapse</t><verb><regular-root/></verb></w>
<w><t>re-laps-er</t></w>
<phrase><t>re-laps-ing fe-ver</t></phrase>
<w><t>re-lat-a-bil-i-ty</t></w>
@@ -137273,7 +137268,7 @@
<w><t>re-tack</t></w>
<w><t>re-tail</t></w>
<w><t>re-tail-er</t></w>
-<w><t>re-tain</t></w>
+<w><t>re-tain</t><verb><regular-root/></verb></w>
<phrase><t>re-tained ob-ject</t></phrase>
<w><t>re-tain-er</t></w>
<phrase><t>re-tain-ing wall</t></phrase>
@@ -137853,7 +137848,7 @@
<w><t>re-voke</t></w>
<w><t>re-voked</t></w>
<w><t>re-vok-ing</t></w>
-<w><t>re-volt</t></w>
+<w><t>re-volt</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>re-volt-er</t></w>
<w><t>re-volt-ing</t></w>
<w><t>re-volt-ing-ly</t></w>
@@ -140015,6 +140010,7 @@
<w><t>run-out</t></w>
<phrase><t>run o-ver</t></phrase>
<w><t>run-round</t></w>
+<w><t>runs</t><verb><regular-root value="false"/></verb></w>
<w><t>runt</t></w>
<w><t>runt-i-er</t></w>
<w><t>runt-i-est</t></w>
@@ -141042,7 +141038,7 @@
<w><t>Sanc-tus</t></w>
<phrase><t>Sanc-tus bell</t></phrase>
<w><t>San-cus</t></w>
-<w><t>sand</t></w>
+<w><t>sand</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Sand</t></w>
<w><t>San-da-kan</t></w>
<w><t>san-dal</t></w>
@@ -142836,7 +142832,7 @@
<w><t>scrump-tious-ness</t></w>
<w><t>scrump-y</t></w>
<w><t>scrunch</t></w>
-<w><t>scru-ple</t></w>
+<w><t>scru-ple</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>scru-ple-less</t></w>
<w><t>scru-pu-los-i-ty</t></w>
<w><t>scru-pu-lous</t></w>
@@ -142849,7 +142845,7 @@
<w><t>scru-ti-nised</t></w>
<w><t>scru-ti-nis-ing</t></w>
<w><t>scru-ti-ni-za-tion</t></w>
-<w><t>scru-ti-nize</t></w>
+<w><t>scru-ti-nize</t><verb><regular-root/></verb></w>
<w><t>scru-ti-niz-er</t></w>
<w><t>scru-ti-niz-ing-ly</t></w>
<w><t>scru-ti-ny</t></w>
@@ -146538,7 +146534,7 @@
<w><t>shell-proof</t></w>
<w><t>shell-y</t></w>
<w><t>Shel-ta</t></w>
-<w><t>shel-ter</t></w>
+<w><t>shel-ter</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>shel-ter-er</t></w>
<w><t>shel-ter-ing-ly</t></w>
<w><t>shel-ter-less</t></w>
@@ -146594,7 +146590,7 @@
<w><t>Sher-ley</t></w>
<w><t>sher-lock</t></w>
<w><t>Sher-lock</t></w>
-<w><t>Sher-man</t></w>
+<w><t>Sher-man</t><noun><convertible-to-possessive/></noun></w>
<w><t>Sher-od</t></w>
<w><t>Sher-pa</t></w>
<w><t>Sher-rie</t></w>
@@ -146810,7 +146806,7 @@
<w><t>shiv-a-ree</t></w>
<w><t>shive</t></w>
<w><t>Shive-ly</t></w>
-<w><t>shiv-er</t></w>
+<w><t>shiv-er</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>shiv-er-er</t></w>
<w><t>shiv-er-y</t></w>
<w><t>shi-voo</t></w>
@@ -147134,7 +147130,7 @@
<w><t>shrove</t></w>
<w><t>Shrove-tide</t></w>
<phrase><t>Shrove Tues-day</t></phrase>
-<w><t>shrub</t></w>
+<w><t>shrub</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>shrub-ber-ies</t></w>
<w><t>shrub-ber-y</t></w>
<w><t>shrub-bi-er</t></w>
@@ -147829,7 +147825,7 @@
<phrase><t>sim-ul-ta-ne-ous e-qua-tions</t></phrase>
<w><t>si-mul-ta-ne-ous-ly</t></w>
<w><t>si-mul-ta-ne-ous-ness</t></w>
-<w><t>sin</t></w>
+<w><t>sin</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>Sin</t></w>
<w><t>Si-na</t></w>
<w><t>Si-nai</t></w>
@@ -149666,7 +149662,7 @@
<phrase><t>so-cial ser-vic-es</t></phrase>
<phrase><t>so-cial work</t></phrase>
<w><t>so-ci-e-tal</t></w>
-<w><t>so-ci-e-ty</t></w>
+<w><t>so-ci-e-ty</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<phrase><t>So-ci-e-ty Is-lands</t></phrase>
<phrase><t>So-ci-e-ty of Friends</t></phrase>
<phrase><t>So-ci-e-ty of Je-sus</t></phrase>
@@ -151344,7 +151340,7 @@
<w><t>spig-nel</t></w>
<w><t>spig-ot</t></w>
<w><t>spik</t></w>
-<w><t>spike</t></w>
+<w><t>spike</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>spike-dace</t></w>
<w><t>spike-dac-es</t></w>
<w><t>spike-fish</t></w>
@@ -152882,7 +152878,7 @@
<w><t>stealth-less</t></w>
<w><t>stealth-y</t></w>
<w><t>steam</t></w>
-<w><t>steam-boat</t></w>
+<w><t>steam-boat</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>steam=boil-er</t></w>
<w><t>steam=en-gine</t></w>
<w><t>steam-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
@@ -158089,7 +158085,7 @@
<w><t>sur-vive</t></w>
<w><t>sur-vived</t></w>
<w><t>sur-viv-ing</t></w>
-<w><t>sur-vi-vor</t></w>
+<w><t>sur-vi-vor</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>sur-vi-vor-ship</t></w>
<w><t>Su-sa</t></w>
<w><t>Susah</t></w>
@@ -162366,7 +162362,7 @@
<w><t>throm-bus</t></w>
<w><t>throne</t></w>
<w><t>throne-less</t></w>
-<w><t>throng</t></w>
+<w><t>throng</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>thron-ing</t></w>
<w><t>thro-nos</t></w>
<w><t>Throop</t></w>
@@ -163698,7 +163694,7 @@
<w><t>tor-re-fy</t></w>
<w><t>Tor-rens</t></w>
<phrase><t>Tor-rens ti-tle</t></phrase>
-<w><t>tor-rent</t></w>
+<w><t>tor-rent</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>tor-ren-tial</t></w>
<w><t>Tor-re-on</t></w>
<phrase><t>Tor-res Strait</t></phrase>
@@ -163762,7 +163758,7 @@
<w><t>Tos-ca-na</t></w>
<w><t>Tos-ca-ni-ni</t></w>
<w><t>tosh</t></w>
-<w><t>toss</t></w>
+<w><t>toss</t><verb><regular-root/></verb></w>
<w><t>toss-pot</t></w>
<w><t>toss-up</t></w>
<w><t>tost</t></w>
@@ -179818,7 +179814,7 @@
<w><t>Val-pa-ra-i-so</t></w>
<w><t>Val-pa-ra-í-so</t></w>
<w><t>valse</t></w>
-<w><t>val-u-a-ble</t></w>
+<w><t>val-u-a-ble</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective></adjective></w>
<w><t>val-u-a-ble-ness</t></w>
<w><t>val-u-a-bly</t></w>
<w><t>val-u-ate</t></w>
@@ -180728,7 +180724,7 @@
<w><t>ver-sa-tile-ness</t></w>
<w><t>ver-sa-til-i-ty</t></w>
<phrase><t>vers de so-ci-t</t></phrase>
-<w><t>verse</t></w>
+<w><t>verse</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>versed</t></w>
<w><t>vers-et</t></w>
<w><t>ver-si-cle</t></w>
@@ -181266,7 +181262,7 @@
<w><t>vin-dic-tive</t></w>
<w><t>vin-dic-tive-ly</t></w>
<w><t>vin-dic-tive-ness</t></w>
-<w><t>vine</t></w>
+<w><t>vine</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>vined</t></w>
<w><t>vine-dress-er</t></w>
<w><t>vin-e-gar</t></w>
@@ -182049,7 +182045,7 @@
<phrase><t>vox hu-ma-na</t></phrase>
<phrase><t>vox po-pu-li</t></phrase>
<w><t>voy-age</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
-<w><t>voy-ag-er</t></w>
+<w><t>voy-ag-er</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>vo-ya-geur</t></w>
<w><t>vo-yeur</t></w>
<w><t>vo-yeur-ism</t></w>
@@ -182208,7 +182204,7 @@
<w><t>waf-fle</t></w>
<w><t>waf-fled</t></w>
<w><t>waff-ness</t></w>
-<w><t>waft</t></w>
+<w><t>waft</t><verb><regular-root/></verb></w>
<w><t>waft-age</t></w>
<w><t>waft-er</t></w>
<w><t>waf-ture</t></w>
@@ -182247,7 +182243,7 @@
<w><t>Wag-ner-ite</t></w>
<w><t>Wag-ner=Jau-regg</t></w>
<w><t>Wag-on</t></w>
-<w><t>wag-on</t></w>
+<w><t>wag-on</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>wag-on-age</t></w>
<w><t>Wag-on-er</t></w>
<w><t>wag-on-er</t></w>
@@ -183016,7 +183012,7 @@
<w><t>Wau-sau</t></w>
<w><t>Wau-se-on</t></w>
<w><t>Wau-wa-to-sa</t></w>
-<w><t>wave</t></w>
+<w><t>wave</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>wave-band</t></w>
<phrase><t>wave=cut plat-form</t></phrase>
<w><t>waved</t></w>
@@ -184814,7 +184810,7 @@
<w><t>Wil-bur</t></w>
<w><t>wil-co</t></w>
<w><t>Wil-cox</t></w>
-<w><t>wild</t></w>
+<w><t>wild</t><noun><pluralizable/><convertible-to-possessive/></noun><adjective><extensible/></adjective></w>
<w><t>Wil-da</t></w>
<phrase><t>wild bri-er</t></phrase>
<phrase><t>wild car-rot</t></phrase>
@@ -185867,7 +185863,7 @@
<w><t>would</t></w>
<w><t>would-n’t</t></w>
<phrase><t>Woulfe bot-tle</t></phrase>
-<w><t>wound</t></w>
+<w><t>wound</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>wound-ed</t></w>
<w><t>wound-ed-ly</t></w>
<w><t>wound-ing-ly</t></w>
@@ -185922,7 +185918,7 @@
<w><t>wreath-less</t></w>
<w><t>wreath-like</t></w>
<w><t>wreath-piece</t></w>
-<w><t>wreck</t></w>
+<w><t>wreck</t><noun><pluralizable/><convertible-to-possessive/></noun><verb><regular-root/></verb></w>
<w><t>wreck-age</t></w>
<w><t>wreck-er</t></w>
<w><t>wreck-fish</t></w>
@@ -186634,7 +186630,7 @@
<w><t>young-ling</t></w>
<phrase><t>Young Pre-tend-er</t></phrase>
<phrase><t>Young’s mod-u-lus</t></phrase>
-<w><t>young-ster</t></w>
+<w><t>young-ster</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>Youngs-town</t></w>
<w><t>youn-ker</t></w>
<w><t>your</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml 2022-07-04 17:00:20 UTC (rev 12701)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/fre-Latn-ZZZ.dict.xml 2022-07-04 18:17:54 UTC (rev 12702)
@@ -13,8 +13,10 @@
<w><t>de</t></w>
<w><t>en</t></w>
<w><t>France</t></w>
+<w><t>jér-é-mi-ade</t><noun><pluralizable/><convertible-to-possessive/></noun></w>
<w><t>la</t></w>
<w><t>masse</t></w>
+<w><t>route</t></w>
</axsl-dictionary>
Modified: trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2022-07-04 17:00:20 UTC (rev 12701)
+++ trunk/foray/foray-orthography/src/main/data/dictionaries/lat-Latn-ZZZ.dict.xml 2022-07-04 18:17:54 UTC (rev 12702)
@@ -17,6 +17,7 @@
<w><t>et</t></w>
<w><t>fide</t></w>
<w><t>fit</t></w>
+<w><t>i.e</t><abbrev referenced-word="id est"/><comment>Until "i.e." can be handled properly.</comment></w>
<w><t>in</t></w>
<w><t>in-fi-del-i-um</t></w>
<w><t>nas-ci-tur</t></w>
Modified: trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml
===================================================================
--- trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2022-07-04 17:00:20 UTC (rev 12701)
+++ trunk/foray/foray-orthography/src/main/data/orthographies/foray-orthography-config.xml 2022-07-04 18:17:54 UTC (rev 12702)
@@ -8,17 +8,19 @@
<match-rule-list id="eng-Latn-match-rules">
<match desc="Arabic digits">^[0-9]+$</match>
- <match desc="Formatted Arabic digits">^[0-9]{1,3},([0-9]{3},)*[0-9]{3}(\.[0-9]*$)?</match>
+ <match desc="Formatted Arabic digits">^[0-9]{1,3}(,[0-9]{3})*(\.[0-9]*)?$</match>
<match desc="Uppercase Roman numerals">^[IVXLCDM]+$</match>
<match desc="Lowercase Roman numerals">^[ivxlcdm]+$</match>
<match desc="Currency">^[$£][0-9]+[0-9,\.]*$</match>
<match desc="English ordinal ending in 1">^[0-9]*1st$</match>
<match desc="English ordinal ending in 2">^[0-9]*2n?d$</match>
- <match desc="English ordinal ending in 3">^[0-9]*3r?d$"</match>
+ <match desc="English ordinal ending in 3">^[0-9]*3r?d$</match>
<match desc="English ordinal ending in 0 or 4 thru 9">^[0-9]*[04-9]th$</match>
- <match desc="English ordinal ending in 11 or 12">^[0-9]*1[1-2]th$</match>
+ <match desc="English ordinal ending in 10 thru 19">^[0-9]*1[0-9]th$</match>
+ <match desc="English ordinal ending in multiple of 10">^[0-9]*[0-9]0th$</match>
<match desc="A single capital letter, such as a person's initial">^[A-Z]$</match>
<match desc="Contracted year">^’[0-9]+$</match>
+ <match desc="Two dimensions">^[0-9]{1,3}(,[0-9]{3})*(\.[0-9]*)?×[0-9]{1,3}(,[0-9]{3})*(\.[0-9]*)?$</match>
</match-rule-list>
<derivative-pattern-list id="eng-Latn-derivative-patterns">
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|