foray-commit Mailing List for FOray (Page 76)
Modular XSL-FO Implementation for Java.
Status: Alpha
Brought to you by:
victormote
You can subscribe to this list here.
| 2006 |
Jan
|
Feb
|
Mar
(139) |
Apr
(98) |
May
(250) |
Jun
(394) |
Jul
(84) |
Aug
(13) |
Sep
(420) |
Oct
(186) |
Nov
(1) |
Dec
(3) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2007 |
Jan
(108) |
Feb
(202) |
Mar
(291) |
Apr
(247) |
May
(374) |
Jun
(227) |
Jul
(231) |
Aug
(60) |
Sep
(31) |
Oct
(45) |
Nov
(18) |
Dec
|
| 2008 |
Jan
(38) |
Feb
(71) |
Mar
(142) |
Apr
|
May
(59) |
Jun
(6) |
Jul
(10) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2009 |
Jan
(12) |
Feb
(4) |
Mar
(88) |
Apr
(121) |
May
(17) |
Jun
(30) |
Jul
|
Aug
(5) |
Sep
|
Oct
(1) |
Nov
|
Dec
|
| 2010 |
Jan
(11) |
Feb
(76) |
Mar
(11) |
Apr
|
May
(11) |
Jun
|
Jul
|
Aug
(44) |
Sep
(14) |
Oct
(7) |
Nov
|
Dec
|
| 2011 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(9) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(10) |
Nov
|
Dec
|
| 2012 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
(3) |
Jul
(4) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2016 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(168) |
| 2017 |
Jan
(77) |
Feb
(11) |
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2018 |
Jan
|
Feb
|
Mar
(1) |
Apr
(6) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2019 |
Jan
|
Feb
(88) |
Mar
(118) |
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2020 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(6) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(141) |
| 2021 |
Jan
(170) |
Feb
(20) |
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
|
Sep
|
Oct
(62) |
Nov
(189) |
Dec
(162) |
| 2022 |
Jan
(201) |
Feb
(118) |
Mar
(8) |
Apr
|
May
(2) |
Jun
(47) |
Jul
(19) |
Aug
(14) |
Sep
(3) |
Oct
|
Nov
(28) |
Dec
(235) |
| 2023 |
Jan
(112) |
Feb
(23) |
Mar
(2) |
Apr
(2) |
May
|
Jun
(1) |
Jul
|
Aug
(70) |
Sep
(92) |
Oct
(20) |
Nov
(1) |
Dec
(1) |
| 2024 |
Jan
|
Feb
|
Mar
(1) |
Apr
(1) |
May
(14) |
Jun
(11) |
Jul
(1) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
| 2025 |
Jan
(10) |
Feb
(29) |
Mar
|
Apr
(162) |
May
(245) |
Jun
(83) |
Jul
|
Aug
(1) |
Sep
|
Oct
|
Nov
(4) |
Dec
|
|
From: <vic...@us...> - 2021-11-05 19:08:17
|
Revision: 12001
http://sourceforge.net/p/foray/code/12001
Author: victormote
Date: 2021-11-05 19:08:15 +0000 (Fri, 05 Nov 2021)
Log Message:
-----------
Add part-of-speech info for cardinals and ordinals.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml
Modified: trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml
===================================================================
--- trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml 2021-11-05 19:07:38 UTC (rev 12000)
+++ trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml 2021-11-05 19:08:15 UTC (rev 12001)
@@ -16155,9 +16155,9 @@
<w><t>Bil-lings</t></w>
<w><t>bil-lings-gate</t></w>
<w><t>Bil-lings-gate</t></w>
-<w><t>bil-lion</t></w>
+<w><t>bil-lion</t><cardinal/></w>
<w><t>bil-lion-aire</t></w>
-<w><t>bil-lionth</t></w>
+<w><t>bil-lionth</t><ordinal/></w>
<w><t>Bil-li-ton</t></w>
<w><t>bill-man</t></w>
<w><t>bil-lon</t></w>
@@ -47407,20 +47407,20 @@
<w><t>ei-gen-val-ue</t></w>
<w><t>ei-gen-vec-tor</t></w>
<w><t>Ei-ger</t></w>
-<w><t>eight</t></w>
+<w><t>eight</t><cardinal/></w>
<w><t>eight-ball</t></w>
-<w><t>eight-een</t></w>
+<w><t>eight-een</t><cardinal/></w>
<w><t>eight-een-mo</t></w>
-<w><t>eight-eenth</t></w>
+<w><t>eight-eenth</t><ordinal/></w>
<w><t>eight-fold</t></w>
-<w><t>eighth</t></w>
+<w><t>eighth</t><ordinal/></w>
<w><t>eighth-ly</t></w>
<w><t>eight-ies</t></w>
-<w><t>eight-i-eth</t></w>
+<w><t>eight-i-eth</t><ordinal/></w>
<w><t>eight-pen-ny</t></w>
<w><t>eight-some reel</t></w>
<w><t>eight-vo</t></w>
-<w><t>eight-y</t></w>
+<w><t>eight-y</t><cardinal/></w>
<w><t>eight-y=eight</t></w>
<w><t>eight-y=eighth</t></w>
<w><t>eight-y=fifth</t></w>
@@ -47983,12 +47983,12 @@
<w><t>el-e-vat-ing-ly</t></w>
<w><t>el-e-va-tion</t></w>
<w><t>el-e-va-tor</t></w>
-<w><t>e-lev-en</t></w>
+<w><t>e-lev-en</t><cardinal/></w>
<w><t>e-lev-en=plus</t></w>
<w><t>e-lev-ens</t></w>
<w><t>e-lev-ens-es</t></w>
<w><t>e-lev-en-ses</t></w>
-<w><t>e-lev-enth</t></w>
+<w><t>e-lev-enth</t><ordinal/></w>
<w><t>e-lev-enth chord</t></w>
<w><t>e-lev-enth hour</t></w>
<w><t>el-e-von</t></w>
@@ -55330,14 +55330,14 @@
<w><t>Fi-fine</t></w>
<w><t>fif-ing</t></w>
<w><t>FIFO</t></w>
-<w><t>fif-teen</t></w>
-<w><t>fif-teenth</t></w>
-<w><t>fifth</t></w>
+<w><t>fif-teen</t><cardinal/></w>
+<w><t>fif-teenth</t><ordinal/></w>
+<w><t>fifth</t><ordinal/></w>
<w><t>fifth col-umn</t></w>
<w><t>Fifth Re-pub-lic</t></w>
<w><t>fifth-ly</t></w>
-<w><t>fif-ti-eth</t></w>
-<w><t>fif-ty</t></w>
+<w><t>fif-ti-eth</t><ordinal/></w>
+<w><t>fif-ty</t><cardinal/></w>
<w><t>fif-ty=eight</t></w>
<w><t>fif-ty=eighth</t></w>
<w><t>fif-ty=fifth</t></w>
@@ -55836,7 +55836,7 @@
<w><t>firn</t></w>
<w><t>firn-i-fi-ca-tion</t></w>
<w><t>fir-ry</t></w>
-<w><t>first</t></w>
+<w><t>first</t><ordinal/></w>
<w><t>First Em-pire</t></w>
<w><t>first es-tate</t></w>
<w><t>First In-ter-na-tion-al</t></w>
@@ -55985,7 +55985,7 @@
<w><t>Fitz-roy</t></w>
<w><t>Fitz-sim-mons</t></w>
<w><t>Fiu-me</t></w>
-<w><t>five</t></w>
+<w><t>five</t><cardinal/></w>
<w><t>five hun-dred</t></w>
<w><t>Five Na-tions</t></w>
<w><t>five=faced bish-op</t></w>
@@ -57892,7 +57892,7 @@
<w><t>forth-right-ly</t></w>
<w><t>forth-right-ness</t></w>
<w><t>forth-with</t></w>
-<w><t>for-ti-eth</t></w>
+<w><t>for-ti-eth</t><ordinal/></w>
<w><t>for-ti-fi-a-ble</t></w>
<w><t>for-ti-fi-ca-tion</t></w>
<w><t>for-ti-fied</t></w>
@@ -57929,7 +57929,7 @@
<w><t>for-tune-tell-er</t></w>
<w><t>for-tune-tell-ing</t></w>
<w><t>for-tun-ing</t></w>
-<w><t>for-ty</t></w>
+<w><t>for-ty</t><cardinal/></w>
<w><t>for-ty winks</t></w>
<w><t>for-ty=eight</t></w>
<w><t>for-ty=eighth</t></w>
@@ -58065,7 +58065,7 @@
<w><t>Fou-quet</t></w>
<w><t>Fou-quier=Tin-ville</t></w>
<w><t>Fou-qué</t></w>
-<w><t>four</t></w>
+<w><t>four</t><cardinal/></w>
<w><t>Four Hun-dred</t></w>
<w><t>four=col-or</t></w>
<w><t>four=cy-cle</t></w>
@@ -58107,11 +58107,11 @@
<w><t>four-square</t></w>
<w><t>four-square-ly</t></w>
<w><t>four-square-ness</t></w>
-<w><t>four-teen</t></w>
+<w><t>four-teen</t><cardinal/></w>
<w><t>Four-teen Points</t></w>
<w><t>four-teen-er</t></w>
-<w><t>four-teenth</t></w>
-<w><t>fourth</t></w>
+<w><t>four-teenth</t><ordinal/></w>
+<w><t>fourth</t><ordinal/></w>
<w><t>fourth di-men-sion</t></w>
<w><t>fourth es-tate</t></w>
<w><t>Fourth In-ter-na-tion-al</t></w>
@@ -71804,7 +71804,7 @@
<w><t>hunch</t></w>
<w><t>hunch-back</t></w>
<w><t>hunch-backed</t></w>
-<w><t>hun-dred</t></w>
+<w><t>hun-dred</t><cardinal/></w>
<w><t>hun-dred days</t></w>
<w><t>Hun-dred Years' War</t></w>
<w><t>hund-red=per-cent-er</t></w>
@@ -71811,7 +71811,7 @@
<w><t>hun-dred=per-cent-er</t></w>
<w><t>hun-dred-fold</t></w>
<w><t>hun-dreds and thou-sands</t></w>
-<w><t>hun-dredth</t></w>
+<w><t>hun-dredth</t><ordinal/></w>
<w><t>hun-dred-weight</t></w>
<w><t>Hun-e-ker</t></w>
<w><t>hung</t></w>
@@ -95603,10 +95603,10 @@
<w><t>Mil-ling-ton</t></w>
<w><t>Mil-li-nock-et</t></w>
<w><t>mil-li-ohm</t></w>
-<w><t>mil-lion</t></w>
+<w><t>mil-lion</t><cardinal/></w>
<w><t>mil-lion-aire</t></w>
<w><t>mil-lion-naire</t></w>
-<w><t>mil-lionth</t></w>
+<w><t>mil-lionth</t><ordinal/></w>
<w><t>mil-li-pede</t></w>
<w><t>mil-li-phot</t></w>
<w><t>mil-li-poise</t></w>
@@ -101617,17 +101617,17 @@
<w><t>nin-com-poop-er-y</t></w>
<w><t>nin-com-poop-ish</t></w>
<w><t>Nine</t></w>
-<w><t>nine</t></w>
+<w><t>nine</t><cardinal/></w>
<w><t>nine-bark</t></w>
<w><t>nine-fold</t></w>
<w><t>nine-pence</t></w>
<w><t>nine-pins</t></w>
-<w><t>nine-teen</t></w>
-<w><t>nine-teenth</t></w>
+<w><t>nine-teen</t><cardinal/></w>
+<w><t>nine-teenth</t><ordinal/></w>
<w><t>nine-teenth hole</t></w>
<w><t>nine-teenth man</t></w>
-<w><t>nine-ti-eth</t></w>
-<w><t>nine-ty</t></w>
+<w><t>nine-ti-eth</t><ordinal/></w>
+<w><t>nine-ty</t><cardinal/></w>
<w><t>nine-ty=eight</t></w>
<w><t>nine-ty=eighth</t></w>
<w><t>nine-ty=fifth</t></w>
@@ -101657,7 +101657,7 @@
<w><t>nin-ny-ish</t></w>
<w><t>Ni-no</t></w>
<w><t>ni-non</t></w>
-<w><t>ninth</t></w>
+<w><t>ninth</t><ordinal/></w>
<w><t>ninth-ly</t></w>
<w><t>Ni-nus</t></w>
<w><t>Ni-o-be</t></w>
@@ -109370,7 +109370,7 @@
<w><t>on-do-graph</t></w>
<w><t>on-dom-e-ter</t></w>
<w><t>on-do-scope</t></w>
-<w><t>one</t></w>
+<w><t>one</t><cardinal/></w>
<w><t>one an-oth-er</t></w>
<w><t>One Thou-sand Guin-eas</t></w>
<w><t>one=act-er</t></w>
@@ -130363,7 +130363,8 @@
<w><t>quad-ri-lin-gual</t></w>
<w><t>quad-rille</t></w>
<w><t>qua-drille</t></w>
-<w><t>quad-ril-lion</t></w>
+<w><t>quad-ril-lion</t><cardinal/></w>
+<w><t>quad-ril-lionth</t><ordinal/></w>
<w><t>quad-ri-no-mi-al</t></w>
<w><t>quad-ri-par-tite</t></w>
<w><t>quad-ri-par-tite-ly</t></w>
@@ -143106,8 +143107,7 @@
<w><t>se-clu-sive-ly</t></w>
<w><t>se-clu-sive-ness</t></w>
<w><t>sec-o-bar-bi-tal</t></w>
-<w><t>sec-ond</t></w>
-<w><t>se-cond</t></w>
+<w><t>sec-ond</t><noun/><verb/><ordinal/></w>
<w><t>Sec-ond Ad-vent</t></w>
<w><t>sec-ond bal-lot</t></w>
<w><t>sec-ond ba-na-na</t></w>
@@ -145852,7 +145852,7 @@
<w><t>Seuss</t></w>
<w><t>Se-van</t></w>
<w><t>Se-vas-to-pol</t></w>
-<w><t>sev-en</t></w>
+<w><t>sev-en</t><cardinal/></w>
<w><t>Sev-en a-gainst Thebes</t></w>
<w><t>sev-en dead-ly sins</t></w>
<w><t>sev-en seas</t></w>
@@ -145866,10 +145866,10 @@
<w><t>Sev-en-er</t></w>
<w><t>sev-en-fold</t></w>
<w><t>sev-ens</t></w>
-<w><t>sev-en-teen</t></w>
+<w><t>sev-en-teen</t><cardinal/></w>
<w><t>sev-en-teen=year lo-cust</t></w>
-<w><t>sev-en-teenth</t></w>
-<w><t>sev-enth</t></w>
+<w><t>sev-en-teenth</t><ordinal/></w>
+<w><t>sev-enth</t><ordinal/></w>
<w><t>sev-enth chord</t></w>
<w><t>sev-enth heav-en</t></w>
<w><t>sev-enth=day</t></w>
@@ -145876,8 +145876,8 @@
<w><t>Sev-enth=Day</t></w>
<w><t>Sev-enth=Day Ad-ven-tist</t></w>
<w><t>sev-enth-ly</t></w>
-<w><t>sev-en-ti-eth</t></w>
-<w><t>sev-en-ty</t></w>
+<w><t>sev-en-ti-eth</t><ordinal/></w>
+<w><t>sev-en-ty</t><cardinal/></w>
<w><t>sev-en-ty=eight</t></w>
<w><t>sev-en-ty=eighth</t></w>
<w><t>sev-en-ty=fifth</t></w>
@@ -148139,7 +148139,7 @@
<w><t>si-ver</t></w>
<w><t>si-wash</t></w>
<w><t>Si-wash</t></w>
-<w><t>six</t></w>
+<w><t>six</t><cardinal/></w>
<w><t>Six</t></w>
<w><t>Six Coun-ties</t></w>
<w><t>Six Na-tions</t></w>
@@ -148153,16 +148153,16 @@
<w><t>six-penc-es</t></w>
<w><t>six-pen-ny</t></w>
<w><t>sixte</t></w>
-<w><t>six-teen</t></w>
+<w><t>six-teen</t><cardinal/></w>
<w><t>six-teen-mo</t></w>
<w><t>six-teen-pen-ny</t></w>
-<w><t>six-teenth</t></w>
+<w><t>six-teenth</t><ordinal/></w>
<w><t>six-teenth note</t></w>
-<w><t>sixth</t></w>
+<w><t>sixth</t><ordinal/></w>
<w><t>sixth-ly</t></w>
-<w><t>six-ti-eth</t></w>
+<w><t>six-ti-eth</t><ordinal/></w>
<w><t>Six-tus V</t></w>
-<w><t>six-ty</t></w>
+<w><t>six-ty</t><cardinal/></w>
<w><t>six-ty=eight</t></w>
<w><t>six-ty=first</t></w>
<w><t>six-ty=five</t></w>
@@ -160696,7 +160696,7 @@
<w><t>tem-pu-ra</t></w>
<w><t>tem-pus fu-git</t></w>
<w><t>Te-mu-co</t></w>
-<w><t>ten</t></w>
+<w><t>ten</t><cardinal/></w>
<w><t>Ten Com-mand-ments</t></w>
<w><t>ten=gal-lon hat</t></w>
<w><t>ten-a-bil-i-ty</t></w>
@@ -160865,7 +160865,7 @@
<w><t>tent-ed</t></w>
<w><t>ten-ter</t></w>
<w><t>ten-ter-hook</t></w>
-<w><t>tenth</t></w>
+<w><t>tenth</t><ordinal/></w>
<w><t>tenth-ly</t></w>
<w><t>tent-ie</t></w>
<w><t>tent-i-er</t></w>
@@ -161994,7 +161994,7 @@
<w><t>thi-o-u-re-a</t></w>
<w><t>thir</t></w>
<w><t>Thi-ra</t></w>
-<w><t>third</t></w>
+<w><t>third</t><ordinal/></w>
<w><t>third de-gree</t></w>
<w><t>third di-men-sion</t></w>
<w><t>third es-tate</t></w>
@@ -162021,11 +162021,11 @@
<w><t>thirst-less</t></w>
<w><t>thirst-less-ness</t></w>
<w><t>thirst-y</t></w>
-<w><t>thir-teen</t></w>
-<w><t>thir-teenth</t></w>
+<w><t>thir-teen</t><cardinal/></w>
+<w><t>thir-teenth</t><ordinal/></w>
<w><t>thir-teenth chord</t></w>
-<w><t>thir-ti-eth</t></w>
-<w><t>thir-ty</t></w>
+<w><t>thir-ti-eth</t><ordinal/></w>
+<w><t>thir-ty</t><cardinal/></w>
<w><t>Thir-ty Years' War</t></w>
<w><t>thir-ty=eight</t></w>
<w><t>thir-ty=eighth</t></w>
@@ -162165,13 +162165,13 @@
<w><t>thought-less</t></w>
<w><t>thought-less-ly</t></w>
<w><t>thought-less-ness</t></w>
-<w><t>thou-sand</t></w>
+<w><t>thou-sand</t><cardinal/></w>
<w><t>Thou-sand Guin-eas</t></w>
<w><t>Thou-sand Is-land dres-sing</t></w>
<w><t>Thou-sand Is-lands</t></w>
<w><t>thou-sand-fold</t></w>
<w><t>thou-sand-fold-ly</t></w>
-<w><t>thou-sandth</t></w>
+<w><t>thou-sandth</t><ordinal/></w>
<w><t>thow-less</t></w>
<w><t>Tho-ön</t></w>
<w><t>Thrace</t></w>
@@ -162214,7 +162214,7 @@
<w><t>threat-ful</t></w>
<w><t>threat-ful-ly</t></w>
<w><t>threat-less</t></w>
-<w><t>three</t></w>
+<w><t>three</t><cardinal/></w>
<w><t>Three Riv-ers</t></w>
<w><t>three=and=a=half-pen-ny</t></w>
<w><t>three=col-or</t></w>
@@ -165240,8 +165240,8 @@
<w><t>tri-lith</t></w>
<w><t>tri-lith-on</t></w>
<w><t>trill</t></w>
-<w><t>tril-lion</t></w>
-<w><t>tril-lionth</t></w>
+<w><t>tril-lion</t><cardinal/></w>
+<w><t>tril-lionth</t><ordinal/></w>
<w><t>tril-lium</t></w>
<w><t>tril-li-um</t></w>
<w><t>tri-lo-bate</t></w>
@@ -166505,9 +166505,9 @@
<w><t>tweez-ers</t></w>
<w><t>twee-zers</t></w>
<w><t>tweez-ing</t></w>
-<w><t>twelfth</t></w>
+<w><t>twelfth</t><ordinal/></w>
<w><t>Twelfth-tide</t></w>
-<w><t>twelve</t></w>
+<w><t>twelve</t><cardinal/></w>
<w><t>Twelve Ta-bles</t></w>
<w><t>twelve=mile lim-it</t></w>
<w><t>twelve-fold</t></w>
@@ -166515,9 +166515,9 @@
<w><t>twelve-month</t></w>
<w><t>twelve-pen-ny</t></w>
<w><t>Twelv-er</t></w>
-<w><t>twen-ti-eth</t></w>
+<w><t>twen-ti-eth</t><ordinal/></w>
<w><t>twen-ti-eth man</t></w>
-<w><t>twen-ty</t></w>
+<w><t>twen-ty</t><cardinal/></w>
<w><t>twen-ty=eight</t></w>
<w><t>twen-ty=eighth</t></w>
<w><t>twen-ty=fifth</t></w>
@@ -166594,7 +166594,7 @@
<w><t>twit-ter-ing-ly</t></w>
<w><t>twit-ter-y</t></w>
<w><t>twixt</t></w>
-<w><t>two</t></w>
+<w><t>two</t><cardinal/></w>
<w><t>Two Sic-i-lies</t></w>
<w><t>Two=and=a=half In-ter-na-tion-al</t></w>
<w><t>two=bit</t></w>
@@ -186844,7 +186844,7 @@
<w><t>Zep-pe-lin</t></w>
<w><t>Zer-matt</t></w>
<w><t>Zer-ni-ke</t></w>
-<w><t>ze-ro</t></w>
+<w><t>ze-ro</t><cardinal/></w>
<w><t>ze-ro grav-i-ty</t></w>
<w><t>ze-ro graz-ing</t></w>
<w><t>ze-ro hour</t></w>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-05 19:07:40
|
Revision: 12000
http://sourceforge.net/p/foray/code/12000
Author: victormote
Date: 2021-11-05 19:07:38 +0000 (Fri, 05 Nov 2021)
Log Message:
-----------
Conform parsers to aXSL DTD changes.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java 2021-11-05 17:52:22 UTC (rev 11999)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java 2021-11-05 19:07:38 UTC (rev 12000)
@@ -399,57 +399,57 @@
}
case "noun": {
this.currentPartOfSpeech = PartOfSpeech.NOUN;
- this.currentRegularity = parseRegularAttribute(attributes);
+ this.currentRegularity = parseRegularRootAttribute(attributes);
return;
}
case "pronoun": {
this.currentPartOfSpeech = PartOfSpeech.PRONOUN;
- this.currentRegularity = parseRegularAttribute(attributes);
+ this.currentRegularity = parseRegularRootAttribute(attributes);
return;
}
case "verb": {
this.currentPartOfSpeech = PartOfSpeech.VERB;
- this.currentRegularity = parseRegularAttribute(attributes);
+ this.currentRegularity = parseRegularRootAttribute(attributes);
return;
}
case "adjective": {
this.currentPartOfSpeech = PartOfSpeech.ADJECTIVE;
- this.currentRegularity = parseRegularAttribute(attributes);
+ this.currentRegularity = parseRegularRootAttribute(attributes);
return;
}
case "adverb": {
this.currentPartOfSpeech = PartOfSpeech.ADVERB;
- this.currentRegularity = parseRegularAttribute(attributes);
+ this.currentRegularity = parseRegularRootAttribute(attributes);
return;
}
case "preposition": {
this.currentPartOfSpeech = PartOfSpeech.PREPOSITION;
- this.currentRegularity = parseRegularAttribute(attributes);
+ this.currentRegularity = parseRegularRootAttribute(attributes);
return;
}
case "conjunction": {
this.currentPartOfSpeech = PartOfSpeech.CONJUNCTION;
- this.currentRegularity = parseRegularAttribute(attributes);
+ this.currentRegularity = parseRegularRootAttribute(attributes);
return;
}
case "article": {
this.currentPartOfSpeech = PartOfSpeech.ARTICLE;
- this.currentRegularity = parseRegularAttribute(attributes);
+ this.currentRegularity = parseRegularRootAttribute(attributes);
return;
}
case "interjection": {
this.currentPartOfSpeech = PartOfSpeech.INTERJECTION;
- this.currentRegularity = parseRegularAttribute(attributes);
+ this.currentRegularity = parseRegularRootAttribute(attributes);
return;
}
case "cardinal": {
this.currentPartOfSpeech = PartOfSpeech.CARDINAL;
- this.currentRegularity = parseRegularAttribute(attributes);
+ this.currentRegularity = parseRegularRootAttribute(attributes);
return;
}
case "ordinal": {
this.currentPartOfSpeech = PartOfSpeech.ORDINAL;
- this.currentRegularity = parseRegularAttribute(attributes);
+ this.currentRegularity = parseRegularRootAttribute(attributes);
return;
}
default: {
@@ -459,12 +459,12 @@
}
}
- private boolean parseRegularAttribute(final Attributes attributes) {
- final String regularString = attributes.getValue("type");
- if (regularString == null) {
+ private boolean parseRegularRootAttribute(final Attributes attributes) {
+ final String value = attributes.getValue("regular-root");
+ if (value == null) {
return false;
}
- return "true".equals(regularString);
+ return "true".equals(value);
}
/**
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java 2021-11-05 17:52:22 UTC (rev 11999)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java 2021-11-05 19:07:38 UTC (rev 12000)
@@ -321,6 +321,14 @@
this.currentPartsOfSpeech = PosUtils.encodePosInfo(this.currentPartsOfSpeech, PartOfSpeech.INTERJECTION);
break;
}
+ case "cardinal": {
+ this.currentPartsOfSpeech = PosUtils.encodePosInfo(this.currentPartsOfSpeech, PartOfSpeech.CARDINAL);
+ break;
+ }
+ case "ordinal": {
+ this.currentPartsOfSpeech = PosUtils.encodePosInfo(this.currentPartsOfSpeech, PartOfSpeech.ORDINAL);
+ break;
+ }
case "word-group": break;
case "axsl-dictionary": {
this.currentDictionary = new DictionaryElement();
@@ -413,7 +421,8 @@
case "conjunction": break;
case "article": break;
case "interjection": break;
- case "participle": break;
+ case "cardinal": break;
+ case "ordinal": break;
case "word-group": break;
case "axsl-dictionary": {
logger.info("End parsing for dictionary: " + this.currentDictionary.orthography.toString());
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-05 17:52:25
|
Revision: 11999
http://sourceforge.net/p/foray/code/11999
Author: victormote
Date: 2021-11-05 17:52:22 +0000 (Fri, 05 Nov 2021)
Log Message:
-----------
Conform to changes in aXSL parts-of-speech DTD.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java
Modified: trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml
===================================================================
--- trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml 2021-11-05 14:50:02 UTC (rev 11998)
+++ trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml 2021-11-05 17:52:22 UTC (rev 11999)
@@ -27274,7 +27274,7 @@
<w><t>chris-mon</t></w>
<w><t>chris-om</t></w>
<w><t>Chris-sie</t></w>
-<w><t>Christ</t><noun regular="true"/></w>
+<w><t>Christ</t><noun regular-root="true"/></w>
<w><t>Chris-ta-bel</t></w>
<w><t>Chris-ta-del-phi-an</t></w>
<w><t>Christ-church</t></w>
@@ -48702,7 +48702,7 @@
<w><t>emp-ti-ly</t></w>
<w><t>emp-ti-ness</t></w>
<w><t>emp-tor</t></w>
-<w><t>emp-ty</t><noun regular="true"/><verb regular="true"/><adjective/></w>
+<w><t>emp-ty</t><noun regular-root="true"/><verb regular-root="true"/><adjective/></w>
<w><t>Emp-ty Quar-ter</t></w>
<w><t>emp-ty=hand-ed</t></w>
<w><t>emp-ty=head-ed</t></w>
@@ -69565,7 +69565,7 @@
<w><t>hig-gle</t></w>
<w><t>hig-gle-dy=pig-gle-dy</t></w>
<w><t>hig-gler</t></w>
-<w><t>high</t><noun regular="true"/><adjective regular="true"/><adverb/></w>
+<w><t>high</t><noun regular-root="true"/><adjective regular-root="true"/><adverb/></w>
<w><t>high al-tar</t></w>
<w><t>high com-e-dy</t></w>
<w><t>high com-mand</t></w>
@@ -79286,7 +79286,7 @@
<w><t>in-vert-i-ble</t></w>
<w><t>in-ver-tin</t></w>
<w><t>in-ver-tor</t></w>
-<w><t>in-vest</t><verb regular="true"/></w>
+<w><t>in-vest</t><verb regular-root="true"/></w>
<w><t>in-vest-a-ble</t></w>
<w><t>in-vest-i-ble</t></w>
<w><t>in-ves-ti-ga-ble</t></w>
@@ -83488,7 +83488,7 @@
<w><t>kind-heart-ed-ly</t></w>
<w><t>kind-heart-ed-ness</t></w>
<w><t>kind-jal</t></w>
-<w><t>kin-dle</t><verb regular="true"/></w>
+<w><t>kin-dle</t><verb regular-root="true"/></w>
<w><t>kin-dler</t></w>
<w><t>kind-less</t></w>
<w><t>kind-less-ly</t></w>
@@ -89511,7 +89511,7 @@
<w><t>lov-age</t></w>
<w><t>lov-at</t></w>
<w><t>Love</t></w>
-<w><t>love</t><noun regular="true"/><verb regular="true"/></w>
+<w><t>love</t><noun regular-root="true"/><verb regular-root="true"/></w>
<w><t>love af-fair</t></w>
<w><t>love ap-ple</t></w>
<w><t>love let-ter</t></w>
@@ -89559,7 +89559,7 @@
<w><t>lov-ing-ly</t></w>
<w><t>lov-ing-ness</t></w>
<w><t>Lov-ing-ton</t></w>
-<w><t>low</t><noun regular="true"/><verb regular="true"/><adjective regular="true"/><adverb/></w>
+<w><t>low</t><noun regular-root="true"/><verb regular-root="true"/><adjective regular-root="true"/><adverb/></w>
<w><t>Low</t></w>
<w><t>Low Ar-chi-pel-a-go</t></w>
<w><t>low com-e-dy</t></w>
@@ -132972,7 +132972,7 @@
<w><t>re-al-ist</t></w>
<w><t>re-al-is-tic</t></w>
<w><t>re-al-is-ti-cal-ly</t></w>
-<w><t>re-al-i-ty</t><noun regular="true"/></w>
+<w><t>re-al-i-ty</t><noun regular-root="true"/></w>
<w><t>re-al-iz-a-bil-i-ty</t></w>
<w><t>re-al-iz-a-ble</t></w>
<w><t>re-al-iz-a-ble-ness</t></w>
@@ -151860,7 +151860,7 @@
<w><t>spray-ful-ly</t></w>
<w><t>spray-less</t></w>
<w><t>spray-like</t></w>
-<w><t>spread</t><noun/><verb regular="false"/></w>
+<w><t>spread</t><noun/><verb regular-root="false"/></w>
<w><t>spread ea-gle</t></w>
<w><t>spread=ea-gle</t></w>
<w><t>spread=ea-gle-ism</t></w>
@@ -163568,7 +163568,7 @@
<w><t>Tor-bay</t></w>
<w><t>tor-bern-ite</t></w>
<w><t>torc</t></w>
-<w><t>torch</t><noun regular="true"/><verb regular="true"/></w>
+<w><t>torch</t><noun regular-root="true"/><verb regular-root="true"/></w>
<w><t>torch-bear-er</t></w>
<w><t>torch-i-er</t></w>
<w><t>tor-chier</t></w>
@@ -182732,7 +182732,7 @@
<w><t>watch-tow-er</t></w>
<w><t>Wat-chung</t></w>
<w><t>watch-word</t></w>
-<w><t>wa-ter</t><verb regular="true"/></w>
+<w><t>wa-ter</t><verb regular-root="true"/></w>
<w><t>wa-ter back</t></w>
<w><t>wa-ter bear</t></w>
<w><t>wa-ter bed</t></w>
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java 2021-11-05 14:50:02 UTC (rev 11998)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java 2021-11-05 17:52:22 UTC (rev 11999)
@@ -275,7 +275,7 @@
}
case "noun": {
this.currentPartsOfSpeech = PosUtils.encodePosInfo(this.currentPartsOfSpeech, PartOfSpeech.NOUN);
- final String regularity = attributes.getValue("regular");
+ final String regularity = attributes.getValue("regular-root");
if ("true".equals(regularity)) {
this.currentPartsOfSpeech = PosUtils.encodeRegularNoun(this.currentPartsOfSpeech);
}
@@ -287,7 +287,7 @@
}
case "verb": {
this.currentPartsOfSpeech = PosUtils.encodePosInfo(this.currentPartsOfSpeech, PartOfSpeech.VERB);
- final String regularity = attributes.getValue("regular");
+ final String regularity = attributes.getValue("regular-root");
if ("true".equals(regularity)) {
this.currentPartsOfSpeech = PosUtils.encodeRegularVerb(this.currentPartsOfSpeech);
}
@@ -295,7 +295,7 @@
}
case "adjective": {
this.currentPartsOfSpeech = PosUtils.encodePosInfo(this.currentPartsOfSpeech, PartOfSpeech.ADJECTIVE);
- final String regularity = attributes.getValue("regular");
+ final String regularity = attributes.getValue("regular-root");
if ("true".equals(regularity)) {
this.currentPartsOfSpeech = PosUtils.encodeRegularAdjective(this.currentPartsOfSpeech);
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-05 14:50:05
|
Revision: 11998
http://sourceforge.net/p/foray/code/11998
Author: victormote
Date: 2021-11-05 14:50:02 +0000 (Fri, 05 Nov 2021)
Log Message:
-----------
Move DerivativeType from FOray to aXSL, as it is now used in aXSL DTD.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java
Removed Paths:
-------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeType.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java 2021-11-05 14:17:49 UTC (rev 11997)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java 2021-11-05 14:50:02 UTC (rev 11998)
@@ -38,6 +38,7 @@
import org.foray.common.resource.ResourceLocationClasspath;
import org.foray.common.resource.ResourceLocationUrl;
+import org.axsl.hyphen.DerivativeType;
import org.axsl.hyphen.HyphenationException;
import org.axsl.hyphen.PartOfSpeech;
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java 2021-11-05 14:17:49 UTC (rev 11997)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java 2021-11-05 14:50:02 UTC (rev 11998)
@@ -28,6 +28,7 @@
package org.foray.hyphen;
+import org.axsl.hyphen.DerivativeType;
import org.axsl.hyphen.PartOfSpeech;
import org.axsl.hyphen.PosRegularity;
import org.axsl.hyphen.Word;
Deleted: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeType.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeType.java 2021-11-05 14:17:49 UTC (rev 11997)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeType.java 2021-11-05 14:50:02 UTC (rev 11998)
@@ -1,110 +0,0 @@
-/*
- * Copyright 2021 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-package org.foray.hyphen;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Types of derivatives, a derivative being a word that is formed from another.
- * In English, this is often done by adding a suffix to the root.
- */
-public enum DerivativeType {
-
- /** The derivative is a plural of the root. English examples: "road(s)" derived from "road", "lunch(es)" derived
- * from "lunch", "part(ies)" derived from "part(y)". */
- PLURAL("plural"),
-
- /** The derivative is a possessive form of the root. English examples: "teacher(’s)" derived from "teacher",
- * "company(’s)" derived from "company". */
- POSSESSIVE("possessive"),
-
- /** The derivative is a different verb form of the root, which should also be a verb. English examples:
- * "empt(ies)" derived from "empt(y)", a change to the 3rd person singular form. */
- VERB_FORM("verb-form"),
-
- /** The derivative is a past participle. English examples: "finish(ed)" derived from "finish". */
- PAST_PARTICIPLE("past-participle"),
-
- /** The derivative is a present participle. English examples: "finish(ing)" derived from "finish". */
- PRESENT_PARTICIPLE("present-participle"),
-
- /** The derivative is a future participle. This is not used much in English, but is in some other languages, such
- * as Latin. */
- FUTURE_PARTICIPLE("future-participle"),
-
- /** The derivative is a gerund, a noun whose root is a verb. English example: "speaking well is an art" where the
- * "speaking" (noun) derived from "speak" (verb). */
- GERUND("gerund"),
-
- /** The derivative is a comparative, an adjective derived from another adjective. English examples: "great(er)"
- * derived from "great", "happ(ier)" derived from "happy". */
- COMPARATIVE("comparative"),
-
- /** The derivative is a superlative, an adjective derived from another adjective. English examples: "great(est)"
- * derived from "great", "happ(iest)" derived from "happy". */
- SUPERLATIVE("superlative");
-
- /** View of the underlying array that prevents it from being copied each time access is needed. */
- private static final List<DerivativeType> AS_LIST = Arrays.asList(DerivativeType.values());
-
- /** A descriptor suitable for use in configuration files. */
- private String token;
-
- /**
- * Constructor.
- * @param token A descriptor suitable for use in configuration files.
- */
- DerivativeType(final String token) {
- this.token = token;
- }
-
- /**
- * Returns a descriptor suitable for use in configuration files.
- * @return A descriptor suitable for use in configuration files.
- */
- public String getToken() {
- return this.token;
- }
-
- /**
- * Finds the instance of this enum, if any, matching {@code token}.
- * @param token The token for the instance of this enum that is wanted.
- * @return The instance of this enum matching {@code token}, or null if not found.
- */
- public static DerivativeType fromToken(final String token) {
- for (int index = 0; index < AS_LIST.size(); index ++) {
- final DerivativeType type = AS_LIST.get(index);
- if (type.token.equals(token)) {
- return type;
- }
- }
- return null;
- }
-}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-05 14:17:52
|
Revision: 11997
http://sourceforge.net/p/foray/code/11997
Author: victormote
Date: 2021-11-05 14:17:49 +0000 (Fri, 05 Nov 2021)
Log Message:
-----------
Conform to aXSL changes in orthography config.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/AxslDtdUtil.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativePattern.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/HyphenationServer4a.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java
trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/DerivativeRuleTests.java
Added Paths:
-----------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeType.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/AxslDtdUtil.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/AxslDtdUtil.java 2021-11-04 18:47:58 UTC (rev 11996)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/AxslDtdUtil.java 2021-11-05 14:17:49 UTC (rev 11997)
@@ -58,6 +58,9 @@
/** The public id of the "area tree" DTD. */
public static final String AREA_TREE_PUBLIC_ID = "-//aXSL//DTD Area Tree V0.1//EN";
+ /** The public id of the "parts of speech" DTD. */
+ public static final String PARTS_OF_SPEECH = "-//aXSL//DTD Parts of Speech V0.1//EN";
+
/**
* Private Constructor. This class is a utility class and should never be instantiated.
*/
@@ -94,6 +97,8 @@
inputStream = AxslDtdUtil.getAxslDtdAsInputStream("axsl-orthography-config.dtd");
} else if (AxslDtdUtil.AREA_TREE_PUBLIC_ID.equals(publicId)) {
inputStream = AxslDtdUtil.getAxslDtdAsInputStream("axsl-area-tree.dtd");
+ } else if (AxslDtdUtil.PARTS_OF_SPEECH.equals(publicId)) {
+ inputStream = AxslDtdUtil.getAxslDtdAsInputStream("axsl-parts-of-speech.dtd");
}
if (inputStream == null) {
return null;
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java 2021-11-04 18:47:58 UTC (rev 11996)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java 2021-11-05 14:17:49 UTC (rev 11997)
@@ -39,6 +39,7 @@
import org.foray.common.resource.ResourceLocationUrl;
import org.axsl.hyphen.HyphenationException;
+import org.axsl.hyphen.PartOfSpeech;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -89,11 +90,20 @@
private List<Pattern> currentMatchRuleList;
/** Stateful variable. */
- private List<DerivativePattern> currentDerivativeRuleList;
+ private List<DerivativePattern> currentDerivativePatternList;
/** Stateful variable. */
- private DerivativePattern.Type currentDerivativeRuleType;
+ private List<DerivativeRule> currentDerivativeRuleList;
+ /** Component of: derivative-rule. */
+ private PartOfSpeech currentPartOfSpeech;
+
+ /** Component of: derivative-rule. */
+ private Boolean currentRegularity;
+
+ /** Component of: derivative-rule. */
+ private List<DerivativeType> currentDerivativeTypeList;
+
/** Stateful variable. */
private Pattern currentDerivativeRuleMatch;
@@ -228,17 +238,34 @@
this.hyphenationServer.registerMatchRules(id, currentMatchRuleList);
return;
}
- case "derivative-rule-list": {
+ case "derivative-pattern-list": {
final String id = attributes.getValue("id");
- this.currentDerivativeRuleList = new ArrayList<DerivativePattern>();
- this.hyphenationServer.registerDerivativeRules(id, currentDerivativeRuleList);
+ this.currentDerivativePatternList = new ArrayList<DerivativePattern>();
+ this.hyphenationServer.registerDerivativeRules(id, currentDerivativePatternList);
return;
}
+ case "derivative-pattern": {
+ this.currentDerivativeRuleList = new ArrayList<DerivativeRule>();
+ return;
+ }
case "derivative-rule": {
+ this.currentPartOfSpeech = null;
+ this.currentRegularity = null;
+ this.currentDerivativeTypeList = new ArrayList<DerivativeType>();
+ return;
+ }
+ case "derivative-type": {
final String typeString = attributes.getValue("type");
- this.currentDerivativeRuleType = DerivativePattern.Type.fromConfigDescriptor(typeString);
+ final DerivativeType type = DerivativeType.fromToken(typeString);
+ this.currentDerivativeTypeList.add(type);
return;
}
+ case "match": {
+ return;
+ }
+ case "replace": {
+ return;
+ }
case "derivative-factory-list": {
final String id = attributes.getValue("id");
this.currentDerivateFactoryList = new ArrayList<WordWrapperFactory<?>>();
@@ -304,7 +331,7 @@
}
case "derivative-rules": {
final String reference = attributes.getValue("reference");
- final List<DerivativePattern> rules = this.hyphenationServer.getDerivativeRules(reference);
+ final List<DerivativePattern> rules = this.hyphenationServer.getDerivativePatterns(reference);
if (rules == null) {
this.logger.error("derivative-rules not found: {}", reference);
this.logger.error(getContextMessage());
@@ -369,19 +396,76 @@
parseElementOrthography(attributes);
return;
}
- case "match": {
+ case "noun": {
+ this.currentPartOfSpeech = PartOfSpeech.NOUN;
+ this.currentRegularity = parseRegularAttribute(attributes);
return;
}
- case "replace": {
+ case "pronoun": {
+ this.currentPartOfSpeech = PartOfSpeech.PRONOUN;
+ this.currentRegularity = parseRegularAttribute(attributes);
return;
}
+ case "verb": {
+ this.currentPartOfSpeech = PartOfSpeech.VERB;
+ this.currentRegularity = parseRegularAttribute(attributes);
+ return;
+ }
+ case "adjective": {
+ this.currentPartOfSpeech = PartOfSpeech.ADJECTIVE;
+ this.currentRegularity = parseRegularAttribute(attributes);
+ return;
+ }
+ case "adverb": {
+ this.currentPartOfSpeech = PartOfSpeech.ADVERB;
+ this.currentRegularity = parseRegularAttribute(attributes);
+ return;
+ }
+ case "preposition": {
+ this.currentPartOfSpeech = PartOfSpeech.PREPOSITION;
+ this.currentRegularity = parseRegularAttribute(attributes);
+ return;
+ }
+ case "conjunction": {
+ this.currentPartOfSpeech = PartOfSpeech.CONJUNCTION;
+ this.currentRegularity = parseRegularAttribute(attributes);
+ return;
+ }
+ case "article": {
+ this.currentPartOfSpeech = PartOfSpeech.ARTICLE;
+ this.currentRegularity = parseRegularAttribute(attributes);
+ return;
+ }
+ case "interjection": {
+ this.currentPartOfSpeech = PartOfSpeech.INTERJECTION;
+ this.currentRegularity = parseRegularAttribute(attributes);
+ return;
+ }
+ case "cardinal": {
+ this.currentPartOfSpeech = PartOfSpeech.CARDINAL;
+ this.currentRegularity = parseRegularAttribute(attributes);
+ return;
+ }
+ case "ordinal": {
+ this.currentPartOfSpeech = PartOfSpeech.ORDINAL;
+ this.currentRegularity = parseRegularAttribute(attributes);
+ return;
+ }
default: {
- // Make sure user knows about unknown tag
+ /* Make sure user knows about unknown tag. */
this.logger.error("Unknown tag in orthography configuration: {}", localName);
}
}
}
+ private boolean parseRegularAttribute(final Attributes attributes) {
+ final String regularString = attributes.getValue("type");
+ if (regularString == null) {
+ return false;
+ }
+ return "true".equals(regularString);
+ }
+
/**
* Parses the "orthography" element.
* @param attributes The raw parsed attributes.
@@ -482,11 +566,36 @@
this.currentMatchRuleList = null;
return;
}
+ case "derivative-pattern-list": {
+ this.currentDerivativePatternList = null;
+ return;
+ }
+ case "derivative-pattern": {
+ final DerivativePattern pattern = new DerivativePattern(this.currentDerivativeRuleMatch,
+ this.currentDerivativeRuleReplace, this.currentDerivativeRuleList);
+ this.currentDerivativePatternList.add(pattern);
+ this.currentDerivativeRuleList = null;
+ this.currentDerivativeRuleMatch = null;
+ this.currentDerivativeRuleReplace = null;
+ return;
+ }
+ case "derivative-rule": {
+ final DerivativeRule rule = new DerivativeRule(this.currentPartOfSpeech, this.currentRegularity,
+ this.currentDerivativeTypeList);
+ this.currentDerivativeRuleList.add(rule);
+ this.currentPartOfSpeech = null;
+ this.currentRegularity = null;
+ this.currentDerivativeTypeList = null;
+ return;
+ }
+ case "derivative-type": {
+ return;
+ }
case "match": {
final String matchString = this.textAccumulator.toString();
StringUtils.clear(this.textAccumulator);
final Pattern pattern = Pattern.compile(matchString);
- if (this.currentDerivativeRuleType != null) {
+ if (this.currentDerivativeRuleList != null) {
this.currentDerivativeRuleMatch = pattern;
} else {
this.currentMatchRuleList.add(pattern);
@@ -493,19 +602,6 @@
}
return;
}
- case "derivative-rule": {
- final DerivativePattern rule = new DerivativePattern(this.currentDerivativeRuleType,
- this.currentDerivativeRuleMatch, this.currentDerivativeRuleReplace);
- this.currentDerivativeRuleList.add(rule);
- this.currentDerivativeRuleType = null;
- this.currentDerivativeRuleMatch = null;
- this.currentDerivativeRuleReplace = null;
- return;
- }
- case "derivative-rule-list": {
- this.currentDerivativeRuleList = null;
- return;
- }
case "replace": {
final String replaceString = this.textAccumulator.toString();
StringUtils.clear(this.textAccumulator);
@@ -531,6 +627,12 @@
case "hyphenation-patterns": {
return;
}
+ case "match-rules": {
+ return;
+ }
+ case "match-derivative": {
+ return;
+ }
case "derivative-factories": {
return;
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativePattern.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativePattern.java 2021-11-04 18:47:58 UTC (rev 11996)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativePattern.java 2021-11-05 14:17:49 UTC (rev 11997)
@@ -28,11 +28,8 @@
package org.foray.hyphen;
-import org.axsl.hyphen.PartOfSpeech;
-import org.axsl.hyphen.PosRegularity;
import org.axsl.hyphen.Word;
-import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
@@ -44,65 +41,6 @@
*/
public class DerivativePattern {
- /**
- * Enumeration of the types of derivative rules.
- */
- public enum Type {
-
- /** Matching word can be of any type. */
- ANY("any"),
-
- /** Matching word must be a regular noun. */
- REGULAR_NOUN("regular-noun"),
-
- /** Matching word must be a regular verb. */
- REGULAR_VERB("regular-verb"),
-
- /** Matching word must be a regular adjective. */
- REGULAR_ADJECTIVE("regular-adjective");
-
- /** View of the underlying array that prevents it from being copied each time access is needed. */
- private static final List<Type> AS_LIST = Arrays.asList(Type.values());
- /** A descriptor suitable for use in configuration files. */
-
- private String configDescriptor;
-
- /**
- * Constructor.
- * @param configDescriptor A descriptor suitable for use in configuration files.
- */
- Type(final String configDescriptor) {
- this.configDescriptor = configDescriptor;
- }
-
- /**
- * Returns a descriptor suitable for use in configuration files.
- * @return A descriptor suitable for use in configuration files.
- */
- public String getDescriptor() {
- return this.configDescriptor;
- }
-
- /**
- * Finds the instance of this enum, if any, matching {@code configDescriptor}.
- * @param configDescriptor A descriptor suitable for use in configuration files.
- * @return The instance of this enum matching {@code configDescriptor}, or null if not found.
- */
- public static Type fromConfigDescriptor(final String configDescriptor) {
- for (int index = 0; index < AS_LIST.size(); index ++) {
- final Type type = AS_LIST.get(index);
- if (type.configDescriptor.equals(configDescriptor)) {
- return type;
- }
- }
- return null;
- }
-
- }
-
- /** The type of words to which this rule applies. */
- private Type type;
-
/** The regex pattern to which input words must "match". */
private Pattern match;
@@ -109,62 +47,46 @@
/** The regex replacement string to obtain a possible root. */
private String replace;
+ /** The rules that apply to this pattern. */
+ private List<DerivativeRule> rules;
+
/**
* Constructor.
- * @param type The type of words to which this rule applies.
* @param match The regex pattern to which input words must "match".
* @param replace The regex replacement string to obtain a possible root.
+ * @param rules The rules that apply to this pattern.
*/
- public DerivativePattern(final Type type, final Pattern match, final String replace) {
- /* Type is immutable. */
- this.type = type;
+ public DerivativePattern(final Pattern match, final String replace, final List<DerivativeRule> rules) {
/* Pattern is immutable. */
this.match = match;
/* String is immutable. */
this.replace = replace;
+ this.rules = rules;
}
/**
- * Returns the type of this derivative rule.
- * @return The type of this derivative rule.
- */
- public Type getType() {
- return this.type;
- }
-
- /**
* Applies the match and replace patterns to an input word, and returns the computed root of that input word if the
* rule applies.
* @param inputWord The input word being tested.
* @return The root, if any, indicated by this rule, for {@code inputWord}, or null if there is no match.
*/
- CharSequence applyRule(final CharSequence inputWord) {
+ public CharSequence applyRule(final CharSequence inputWord) {
return inputWord.toString().replaceAll(this.match.pattern(), replace);
}
/**
- * Indicates whether this rule applies to a given word.
+ * Checks the rules that are attached to this pattern, and returns the first one that matches.
* @param word The word to be tested.
- * @return True if and only if this rule applies to {@code word}.
+ * @return The first rule in the pattern that matches {@code word}, or null if none match.
*/
- boolean doesRulyApply(final Word word) {
- switch(this.type) {
- case ANY: {
- return true;
+ DerivativeRule doesRulyApply(final Word word) {
+ for (int index = 0; index < this.rules.size(); index ++) {
+ final DerivativeRule rule = this.rules.get(index);
+ if (rule.matches(word)) {
+ return rule;
+ }
}
- case REGULAR_NOUN: {
- return word.isOfType(PartOfSpeech.NOUN, PosRegularity.REGULAR);
- }
- case REGULAR_VERB: {
- return word.isOfType(PartOfSpeech.VERB, PosRegularity.REGULAR);
- }
- case REGULAR_ADJECTIVE: {
- return word.isOfType(PartOfSpeech.ADJECTIVE, PosRegularity.REGULAR);
- }
- default: {
- return false;
- }
- }
+ return null;
}
}
Added: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java (rev 0)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java 2021-11-05 14:17:49 UTC (rev 11997)
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2021 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.hyphen;
+
+import org.axsl.hyphen.PartOfSpeech;
+import org.axsl.hyphen.PosRegularity;
+import org.axsl.hyphen.Word;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * One derivative rule that can be used to find the root of a derived word.
+ */
+public class DerivativeRule {
+
+ /** The part of speech to which the root must belong. */
+ private PartOfSpeech rootPos;
+
+ /** Indicates whether the {@link #rootPos} must be of regular form for this rule to apply. */
+ private boolean isRegular;
+
+ /** The (unmodifiable) list of derivative types which this rule applies.
+ * In other words, if this rule applies, identifies the types of derivative that the derivative could be. */
+ private List<DerivativeType> types;
+
+ public DerivativeRule(final PartOfSpeech rootPos, final boolean isRegular, final List<DerivativeType> types) {
+ this.rootPos = rootPos;
+ this.isRegular = isRegular;
+ final List<DerivativeType> defensiveCopy = new ArrayList<DerivativeType>(types.size());
+ Collections.copy(types, defensiveCopy);
+ this.types = Collections.unmodifiableList(defensiveCopy);
+ }
+
+ /**
+ * Indicates whether a given word matches the criteria for this rule.
+ * @param word The word to be tested.
+ * @return True if and only if {@code word} meets the criteria for this rule.
+ */
+ public boolean matches(final Word word) {
+ final PosRegularity regularity = this.isRegular ? PosRegularity.REGULAR : PosRegularity.IRREGULAR;
+ return word.isOfType(this.rootPos, regularity);
+ }
+
+ /**
+ * Returns the (unmodifiable) list of derivative types which this rule applies.
+ * @return The list of derivative types which this rule applies.
+ */
+ public List<DerivativeType> getTypes() {
+ return this.types;
+ }
+
+}
Property changes on: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Added: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeType.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeType.java (rev 0)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeType.java 2021-11-05 14:17:49 UTC (rev 11997)
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2021 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.hyphen;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Types of derivatives, a derivative being a word that is formed from another.
+ * In English, this is often done by adding a suffix to the root.
+ */
+public enum DerivativeType {
+
+ /** The derivative is a plural of the root. English examples: "road(s)" derived from "road", "lunch(es)" derived
+ * from "lunch", "part(ies)" derived from "part(y)". */
+ PLURAL("plural"),
+
+ /** The derivative is a possessive form of the root. English examples: "teacher(’s)" derived from "teacher",
+ * "company(’s)" derived from "company". */
+ POSSESSIVE("possessive"),
+
+ /** The derivative is a different verb form of the root, which should also be a verb. English examples:
+ * "empt(ies)" derived from "empt(y)", a change to the 3rd person singular form. */
+ VERB_FORM("verb-form"),
+
+ /** The derivative is a past participle. English examples: "finish(ed)" derived from "finish". */
+ PAST_PARTICIPLE("past-participle"),
+
+ /** The derivative is a present participle. English examples: "finish(ing)" derived from "finish". */
+ PRESENT_PARTICIPLE("present-participle"),
+
+ /** The derivative is a future participle. This is not used much in English, but is in some other languages, such
+ * as Latin. */
+ FUTURE_PARTICIPLE("future-participle"),
+
+ /** The derivative is a gerund, a noun whose root is a verb. English example: "speaking well is an art" where the
+ * "speaking" (noun) derived from "speak" (verb). */
+ GERUND("gerund"),
+
+ /** The derivative is a comparative, an adjective derived from another adjective. English examples: "great(er)"
+ * derived from "great", "happ(ier)" derived from "happy". */
+ COMPARATIVE("comparative"),
+
+ /** The derivative is a superlative, an adjective derived from another adjective. English examples: "great(est)"
+ * derived from "great", "happ(iest)" derived from "happy". */
+ SUPERLATIVE("superlative");
+
+ /** View of the underlying array that prevents it from being copied each time access is needed. */
+ private static final List<DerivativeType> AS_LIST = Arrays.asList(DerivativeType.values());
+
+ /** A descriptor suitable for use in configuration files. */
+ private String token;
+
+ /**
+ * Constructor.
+ * @param token A descriptor suitable for use in configuration files.
+ */
+ DerivativeType(final String token) {
+ this.token = token;
+ }
+
+ /**
+ * Returns a descriptor suitable for use in configuration files.
+ * @return A descriptor suitable for use in configuration files.
+ */
+ public String getToken() {
+ return this.token;
+ }
+
+ /**
+ * Finds the instance of this enum, if any, matching {@code token}.
+ * @param token The token for the instance of this enum that is wanted.
+ * @return The instance of this enum matching {@code token}, or null if not found.
+ */
+ public static DerivativeType fromToken(final String token) {
+ for (int index = 0; index < AS_LIST.size(); index ++) {
+ final DerivativeType type = AS_LIST.get(index);
+ if (type.token.equals(token)) {
+ return type;
+ }
+ }
+ return null;
+ }
+}
Property changes on: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeType.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/HyphenationServer4a.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/HyphenationServer4a.java 2021-11-04 18:47:58 UTC (rev 11996)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/HyphenationServer4a.java 2021-11-05 14:17:49 UTC (rev 11997)
@@ -336,7 +336,7 @@
* @param id The id of the derivative rules to be returned.
* @return The derivative rules for {@code id}.
*/
- public List<DerivativePattern> getDerivativeRules(final String id) {
+ public List<DerivativePattern> getDerivativePatterns(final String id) {
return this.derivativeRuleLists.get(id);
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java 2021-11-04 18:47:58 UTC (rev 11996)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java 2021-11-05 14:17:49 UTC (rev 11997)
@@ -320,13 +320,13 @@
private boolean isDerivativeFound(final Dictionary dictionary, final CharSequence wordChars) {
for (int listIndex = 0; listIndex < this.derivativeRuleListIds.size(); listIndex ++) {
final String ruleListKey = this.derivativeRuleListIds.get(listIndex);
- final List<DerivativePattern> ruleList = this.server.getDerivativeRules(ruleListKey);
- for (int ruleIndex = 0; ruleIndex < ruleList.size(); ruleIndex ++) {
- final DerivativePattern rule = ruleList.get(ruleIndex);
- final String root = rule.applyRule(wordChars).toString();
+ final List<DerivativePattern> patternList = this.server.getDerivativePatterns(ruleListKey);
+ for (int patternIndex = 0; patternIndex < patternList.size(); patternIndex ++) {
+ final DerivativePattern pattern = patternList.get(patternIndex);
+ final String root = pattern.applyRule(wordChars).toString();
final Word word = dictionary.getWord(root, null);
if (word != null) {
- if (rule.doesRulyApply(word)) {
+ if (pattern.doesRulyApply(word) != null) {
return true;
}
}
Modified: trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/DerivativeRuleTests.java
===================================================================
--- trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/DerivativeRuleTests.java 2021-11-04 18:47:58 UTC (rev 11996)
+++ trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/DerivativeRuleTests.java 2021-11-05 14:17:49 UTC (rev 11997)
@@ -31,6 +31,7 @@
import org.junit.Assert;
import org.junit.Test;
+import java.util.Collections;
import java.util.regex.Pattern;
/**
@@ -43,8 +44,8 @@
*/
@Test
public void testApplyRule() {
- final DerivativePattern out = new DerivativePattern(DerivativePattern.Type.REGULAR_VERB,
- Pattern.compile("^([a-zA-Z\\-]+)ed$"), "$1");
+ final DerivativePattern out = new DerivativePattern(Pattern.compile("^([a-zA-Z\\-]+)ed$"), "$1",
+ Collections.<DerivativeRule>emptyList());
Assert.assertEquals("trust", out.applyRule("trusted"));
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-04 18:48:01
|
Revision: 11996
http://sourceforge.net/p/foray/code/11996
Author: victormote
Date: 2021-11-04 18:47:58 +0000 (Thu, 04 Nov 2021)
Log Message:
-----------
Rename class to match configuration XML.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/HyphenationServer4a.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java
trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/DerivativeRuleTests.java
Added Paths:
-----------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativePattern.java
Removed Paths:
-------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java 2021-11-04 18:43:31 UTC (rev 11995)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java 2021-11-04 18:47:58 UTC (rev 11996)
@@ -89,10 +89,10 @@
private List<Pattern> currentMatchRuleList;
/** Stateful variable. */
- private List<DerivativeRule> currentDerivativeRuleList;
+ private List<DerivativePattern> currentDerivativeRuleList;
/** Stateful variable. */
- private DerivativeRule.Type currentDerivativeRuleType;
+ private DerivativePattern.Type currentDerivativeRuleType;
/** Stateful variable. */
private Pattern currentDerivativeRuleMatch;
@@ -230,13 +230,13 @@
}
case "derivative-rule-list": {
final String id = attributes.getValue("id");
- this.currentDerivativeRuleList = new ArrayList<DerivativeRule>();
+ this.currentDerivativeRuleList = new ArrayList<DerivativePattern>();
this.hyphenationServer.registerDerivativeRules(id, currentDerivativeRuleList);
return;
}
case "derivative-rule": {
final String typeString = attributes.getValue("type");
- this.currentDerivativeRuleType = DerivativeRule.Type.fromConfigDescriptor(typeString);
+ this.currentDerivativeRuleType = DerivativePattern.Type.fromConfigDescriptor(typeString);
return;
}
case "derivative-factory-list": {
@@ -304,7 +304,7 @@
}
case "derivative-rules": {
final String reference = attributes.getValue("reference");
- final List<DerivativeRule> rules = this.hyphenationServer.getDerivativeRules(reference);
+ final List<DerivativePattern> rules = this.hyphenationServer.getDerivativeRules(reference);
if (rules == null) {
this.logger.error("derivative-rules not found: {}", reference);
this.logger.error(getContextMessage());
@@ -494,7 +494,7 @@
return;
}
case "derivative-rule": {
- final DerivativeRule rule = new DerivativeRule(this.currentDerivativeRuleType,
+ final DerivativePattern rule = new DerivativePattern(this.currentDerivativeRuleType,
this.currentDerivativeRuleMatch, this.currentDerivativeRuleReplace);
this.currentDerivativeRuleList.add(rule);
this.currentDerivativeRuleType = null;
Copied: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativePattern.java (from rev 11981, trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java)
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativePattern.java (rev 0)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativePattern.java 2021-11-04 18:47:58 UTC (rev 11996)
@@ -0,0 +1,170 @@
+/*
+ * Copyright 2021 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.hyphen;
+
+import org.axsl.hyphen.PartOfSpeech;
+import org.axsl.hyphen.PosRegularity;
+import org.axsl.hyphen.Word;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
+
+/**
+ * A pattern, attached to an orthography, that specifies one possible way that the root of a given word can be found,
+ * and how that affects hyphenation.
+ *
+ * Instances of this class are immutable.
+ */
+public class DerivativePattern {
+
+ /**
+ * Enumeration of the types of derivative rules.
+ */
+ public enum Type {
+
+ /** Matching word can be of any type. */
+ ANY("any"),
+
+ /** Matching word must be a regular noun. */
+ REGULAR_NOUN("regular-noun"),
+
+ /** Matching word must be a regular verb. */
+ REGULAR_VERB("regular-verb"),
+
+ /** Matching word must be a regular adjective. */
+ REGULAR_ADJECTIVE("regular-adjective");
+
+ /** View of the underlying array that prevents it from being copied each time access is needed. */
+ private static final List<Type> AS_LIST = Arrays.asList(Type.values());
+ /** A descriptor suitable for use in configuration files. */
+
+ private String configDescriptor;
+
+ /**
+ * Constructor.
+ * @param configDescriptor A descriptor suitable for use in configuration files.
+ */
+ Type(final String configDescriptor) {
+ this.configDescriptor = configDescriptor;
+ }
+
+ /**
+ * Returns a descriptor suitable for use in configuration files.
+ * @return A descriptor suitable for use in configuration files.
+ */
+ public String getDescriptor() {
+ return this.configDescriptor;
+ }
+
+ /**
+ * Finds the instance of this enum, if any, matching {@code configDescriptor}.
+ * @param configDescriptor A descriptor suitable for use in configuration files.
+ * @return The instance of this enum matching {@code configDescriptor}, or null if not found.
+ */
+ public static Type fromConfigDescriptor(final String configDescriptor) {
+ for (int index = 0; index < AS_LIST.size(); index ++) {
+ final Type type = AS_LIST.get(index);
+ if (type.configDescriptor.equals(configDescriptor)) {
+ return type;
+ }
+ }
+ return null;
+ }
+
+ }
+
+ /** The type of words to which this rule applies. */
+ private Type type;
+
+ /** The regex pattern to which input words must "match". */
+ private Pattern match;
+
+ /** The regex replacement string to obtain a possible root. */
+ private String replace;
+
+ /**
+ * Constructor.
+ * @param type The type of words to which this rule applies.
+ * @param match The regex pattern to which input words must "match".
+ * @param replace The regex replacement string to obtain a possible root.
+ */
+ public DerivativePattern(final Type type, final Pattern match, final String replace) {
+ /* Type is immutable. */
+ this.type = type;
+ /* Pattern is immutable. */
+ this.match = match;
+ /* String is immutable. */
+ this.replace = replace;
+ }
+
+ /**
+ * Returns the type of this derivative rule.
+ * @return The type of this derivative rule.
+ */
+ public Type getType() {
+ return this.type;
+ }
+
+ /**
+ * Applies the match and replace patterns to an input word, and returns the computed root of that input word if the
+ * rule applies.
+ * @param inputWord The input word being tested.
+ * @return The root, if any, indicated by this rule, for {@code inputWord}, or null if there is no match.
+ */
+ CharSequence applyRule(final CharSequence inputWord) {
+ return inputWord.toString().replaceAll(this.match.pattern(), replace);
+ }
+
+ /**
+ * Indicates whether this rule applies to a given word.
+ * @param word The word to be tested.
+ * @return True if and only if this rule applies to {@code word}.
+ */
+ boolean doesRulyApply(final Word word) {
+ switch(this.type) {
+ case ANY: {
+ return true;
+ }
+ case REGULAR_NOUN: {
+ return word.isOfType(PartOfSpeech.NOUN, PosRegularity.REGULAR);
+ }
+ case REGULAR_VERB: {
+ return word.isOfType(PartOfSpeech.VERB, PosRegularity.REGULAR);
+ }
+ case REGULAR_ADJECTIVE: {
+ return word.isOfType(PartOfSpeech.ADJECTIVE, PosRegularity.REGULAR);
+ }
+ default: {
+ return false;
+ }
+ }
+ }
+
+}
Deleted: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java 2021-11-04 18:43:31 UTC (rev 11995)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java 2021-11-04 18:47:58 UTC (rev 11996)
@@ -1,170 +0,0 @@
-/*
- * Copyright 2021 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-package org.foray.hyphen;
-
-import org.axsl.hyphen.PartOfSpeech;
-import org.axsl.hyphen.PosRegularity;
-import org.axsl.hyphen.Word;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.regex.Pattern;
-
-/**
- * A rule, attached to an orthography, that specifies one possible way that the root of a given word can be found, and
- * how that affects hyphenation.
- *
- * Instances of this class are immutable.
- */
-public class DerivativeRule {
-
- /**
- * Enumeration of the types of derivative rules.
- */
- public enum Type {
-
- /** Matching word can be of any type. */
- ANY("any"),
-
- /** Matching word must be a regular noun. */
- REGULAR_NOUN("regular-noun"),
-
- /** Matching word must be a regular verb. */
- REGULAR_VERB("regular-verb"),
-
- /** Matching word must be a regular adjective. */
- REGULAR_ADJECTIVE("regular-adjective");
-
- /** View of the underlying array that prevents it from being copied each time access is needed. */
- private static final List<Type> AS_LIST = Arrays.asList(Type.values());
- /** A descriptor suitable for use in configuration files. */
-
- private String configDescriptor;
-
- /**
- * Constructor.
- * @param configDescriptor A descriptor suitable for use in configuration files.
- */
- Type(final String configDescriptor) {
- this.configDescriptor = configDescriptor;
- }
-
- /**
- * Returns a descriptor suitable for use in configuration files.
- * @return A descriptor suitable for use in configuration files.
- */
- public String getDescriptor() {
- return this.configDescriptor;
- }
-
- /**
- * Finds the instance of this enum, if any, matching {@code configDescriptor}.
- * @param configDescriptor A descriptor suitable for use in configuration files.
- * @return The instance of this enum matching {@code configDescriptor}, or null if not found.
- */
- public static Type fromConfigDescriptor(final String configDescriptor) {
- for (int index = 0; index < AS_LIST.size(); index ++) {
- final Type type = AS_LIST.get(index);
- if (type.configDescriptor.equals(configDescriptor)) {
- return type;
- }
- }
- return null;
- }
-
- }
-
- /** The type of words to which this rule applies. */
- private Type type;
-
- /** The regex pattern to which input words must "match". */
- private Pattern match;
-
- /** The regex replacement string to obtain a possible root. */
- private String replace;
-
- /**
- * Constructor.
- * @param type The type of words to which this rule applies.
- * @param match The regex pattern to which input words must "match".
- * @param replace The regex replacement string to obtain a possible root.
- */
- public DerivativeRule(final Type type, final Pattern match, final String replace) {
- /* Type is immutable. */
- this.type = type;
- /* Pattern is immutable. */
- this.match = match;
- /* String is immutable. */
- this.replace = replace;
- }
-
- /**
- * Returns the type of this derivative rule.
- * @return The type of this derivative rule.
- */
- public Type getType() {
- return this.type;
- }
-
- /**
- * Applies the match and replace patterns to an input word, and returns the computed root of that input word if the
- * rule applies.
- * @param inputWord The input word being tested.
- * @return The root, if any, indicated by this rule, for {@code inputWord}, or null if there is no match.
- */
- CharSequence applyRule(final CharSequence inputWord) {
- return inputWord.toString().replaceAll(this.match.pattern(), replace);
- }
-
- /**
- * Indicates whether this rule applies to a given word.
- * @param word The word to be tested.
- * @return True if and only if this rule applies to {@code word}.
- */
- boolean doesRulyApply(final Word word) {
- switch(this.type) {
- case ANY: {
- return true;
- }
- case REGULAR_NOUN: {
- return word.isOfType(PartOfSpeech.NOUN, PosRegularity.REGULAR);
- }
- case REGULAR_VERB: {
- return word.isOfType(PartOfSpeech.VERB, PosRegularity.REGULAR);
- }
- case REGULAR_ADJECTIVE: {
- return word.isOfType(PartOfSpeech.ADJECTIVE, PosRegularity.REGULAR);
- }
- default: {
- return false;
- }
- }
- }
-
-}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/HyphenationServer4a.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/HyphenationServer4a.java 2021-11-04 18:43:31 UTC (rev 11995)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/HyphenationServer4a.java 2021-11-04 18:47:58 UTC (rev 11996)
@@ -84,7 +84,7 @@
private Map<String, List<Pattern>> matchRuleLists = new HashMap<String, List<Pattern>>();
/** The map of derivative rule lists, keyed by id. */
- private Map<String, List<DerivativeRule>> derivativeRuleLists = new HashMap<String, List<DerivativeRule>>();
+ private Map<String, List<DerivativePattern>> derivativeRuleLists = new HashMap<String, List<DerivativePattern>>();
/**
* Constructor.
@@ -324,7 +324,7 @@
* @param id The id of the derivative rules to be registered.
* @param derivativeRules The derivative rules being registered.
*/
- public void registerDerivativeRules(final String id, final List<DerivativeRule> derivativeRules) {
+ public void registerDerivativeRules(final String id, final List<DerivativePattern> derivativeRules) {
if (this.derivativeRuleLists.get(id) != null) {
throw new IllegalArgumentException("Derivative Rules already exist for id: " + id);
}
@@ -336,7 +336,7 @@
* @param id The id of the derivative rules to be returned.
* @return The derivative rules for {@code id}.
*/
- public List<DerivativeRule> getDerivativeRules(final String id) {
+ public List<DerivativePattern> getDerivativeRules(final String id) {
return this.derivativeRuleLists.get(id);
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java 2021-11-04 18:43:31 UTC (rev 11995)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java 2021-11-04 18:47:58 UTC (rev 11996)
@@ -320,9 +320,9 @@
private boolean isDerivativeFound(final Dictionary dictionary, final CharSequence wordChars) {
for (int listIndex = 0; listIndex < this.derivativeRuleListIds.size(); listIndex ++) {
final String ruleListKey = this.derivativeRuleListIds.get(listIndex);
- final List<DerivativeRule> ruleList = this.server.getDerivativeRules(ruleListKey);
+ final List<DerivativePattern> ruleList = this.server.getDerivativeRules(ruleListKey);
for (int ruleIndex = 0; ruleIndex < ruleList.size(); ruleIndex ++) {
- final DerivativeRule rule = ruleList.get(ruleIndex);
+ final DerivativePattern rule = ruleList.get(ruleIndex);
final String root = rule.applyRule(wordChars).toString();
final Word word = dictionary.getWord(root, null);
if (word != null) {
Modified: trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/DerivativeRuleTests.java
===================================================================
--- trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/DerivativeRuleTests.java 2021-11-04 18:43:31 UTC (rev 11995)
+++ trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/DerivativeRuleTests.java 2021-11-04 18:47:58 UTC (rev 11996)
@@ -34,16 +34,16 @@
import java.util.regex.Pattern;
/**
- * Tests of {@link DerivativeRule}.
+ * Tests of {@link DerivativePattern}.
*/
public class DerivativeRuleTests {
/**
- * Test of {@link DerivativeRule#applyRule(CharSequence)}.
+ * Test of {@link DerivativePattern#applyRule(CharSequence)}.
*/
@Test
public void testApplyRule() {
- final DerivativeRule out = new DerivativeRule(DerivativeRule.Type.REGULAR_VERB,
+ final DerivativePattern out = new DerivativePattern(DerivativePattern.Type.REGULAR_VERB,
Pattern.compile("^([a-zA-Z\\-]+)ed$"), "$1");
Assert.assertEquals("trust", out.applyRule("trusted"));
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-04 18:43:34
|
Revision: 11995
http://sourceforge.net/p/foray/code/11995
Author: victormote
Date: 2021-11-04 18:43:31 +0000 (Thu, 04 Nov 2021)
Log Message:
-----------
Normal dictionary editing.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml
Modified: trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml
===================================================================
--- trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml 2021-11-04 18:43:03 UTC (rev 11994)
+++ trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml 2021-11-04 18:43:31 UTC (rev 11995)
@@ -27274,8 +27274,7 @@
<w><t>chris-mon</t></w>
<w><t>chris-om</t></w>
<w><t>Chris-sie</t></w>
-<w><t>Christ</t></w>
-<w><t>Christ</t></w>
+<w><t>Christ</t><noun regular="true"/></w>
<w><t>Chris-ta-bel</t></w>
<w><t>Chris-ta-del-phi-an</t></w>
<w><t>Christ-church</t></w>
@@ -69566,7 +69565,7 @@
<w><t>hig-gle</t></w>
<w><t>hig-gle-dy=pig-gle-dy</t></w>
<w><t>hig-gler</t></w>
-<w><t>high</t></w>
+<w><t>high</t><noun regular="true"/><adjective regular="true"/><adverb/></w>
<w><t>high al-tar</t></w>
<w><t>high com-e-dy</t></w>
<w><t>high com-mand</t></w>
@@ -89512,7 +89511,7 @@
<w><t>lov-age</t></w>
<w><t>lov-at</t></w>
<w><t>Love</t></w>
-<w><t>love</t></w>
+<w><t>love</t><noun regular="true"/><verb regular="true"/></w>
<w><t>love af-fair</t></w>
<w><t>love ap-ple</t></w>
<w><t>love let-ter</t></w>
@@ -89560,7 +89559,7 @@
<w><t>lov-ing-ly</t></w>
<w><t>lov-ing-ness</t></w>
<w><t>Lov-ing-ton</t></w>
-<w><t>low</t></w>
+<w><t>low</t><noun regular="true"/><verb regular="true"/><adjective regular="true"/><adverb/></w>
<w><t>Low</t></w>
<w><t>Low Ar-chi-pel-a-go</t></w>
<w><t>low com-e-dy</t></w>
@@ -132973,7 +132972,7 @@
<w><t>re-al-ist</t></w>
<w><t>re-al-is-tic</t></w>
<w><t>re-al-is-ti-cal-ly</t></w>
-<w><t>re-al-i-ty</t></w>
+<w><t>re-al-i-ty</t><noun regular="true"/></w>
<w><t>re-al-iz-a-bil-i-ty</t></w>
<w><t>re-al-iz-a-ble</t></w>
<w><t>re-al-iz-a-ble-ness</t></w>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-04 18:43:06
|
Revision: 11994
http://sourceforge.net/p/foray/code/11994
Author: victormote
Date: 2021-11-04 18:43:03 +0000 (Thu, 04 Nov 2021)
Log Message:
-----------
Handle new parts of speech.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java 2021-11-04 13:19:16 UTC (rev 11993)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java 2021-11-04 18:43:03 UTC (rev 11994)
@@ -59,9 +59,9 @@
MASKS[PartOfSpeech.CONJUNCTION.getNumericValue()] = 0x0040; // Index 6. 64
MASKS[PartOfSpeech.ARTICLE.getNumericValue()] = 0x0080; // Index 7. 128
MASKS[PartOfSpeech.INTERJECTION.getNumericValue()] = 0x0100; // Index 8. 256
- /* Leave some room in the middle for exapnsion from either end. */
- MASKS[9] = 0x0200; // Index 9. 512
- MASKS[10] = 0x0400; // Index 10. 1,024
+ MASKS[PartOfSpeech.CARDINAL.getNumericValue()] = 0x0200; // Index 9. 512
+ MASKS[PartOfSpeech.ORDINAL.getNumericValue()] = 0x0400; // Index 10. 1,024
+ /* Leave some room in the middle for expansion from either end. */
MASKS[11] = 0x0800; // Index 11. 2,048
MASKS[12] = 0x1000; // Index 12. 4,096
MASKS[REGULAR_NOUN_INDEX] = 0x2000; // Index 13. 8,192
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-04 13:19:18
|
Revision: 11993
http://sourceforge.net/p/foray/code/11993
Author: victormote
Date: 2021-11-04 13:19:16 +0000 (Thu, 04 Nov 2021)
Log Message:
-----------
Update tests to match word-breaking changes.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/WordBreakerLatin1Tests.java
Modified: trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/WordBreakerLatin1Tests.java
===================================================================
--- trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/WordBreakerLatin1Tests.java 2021-11-04 12:03:32 UTC (rev 11992)
+++ trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/WordBreakerLatin1Tests.java 2021-11-04 13:19:16 UTC (rev 11993)
@@ -77,7 +77,7 @@
public void testMedium() {
final String testString = "39. It was the best of times. It was the worst of times. <----";
final List<CharSequence> actual = this.out.breakIntoWords(testString);
- Assert.assertEquals(28, actual.size());
+ Assert.assertEquals(27, actual.size());
Assert.assertEquals("39", actual.get(0));
Assert.assertEquals(". ", actual.get(1));
Assert.assertEquals("It", actual.get(2));
@@ -104,8 +104,7 @@
Assert.assertEquals(" ", actual.get(23));
Assert.assertEquals("times", actual.get(24));
Assert.assertEquals(". ", actual.get(25));
- Assert.assertEquals("<", actual.get(26));
- Assert.assertEquals("----", actual.get(27));
+ Assert.assertEquals("<----", actual.get(26));
}
/**
@@ -117,9 +116,8 @@
final String testString = "Gallop apace, you fiery-footed steeds,";
final List<CharSequence> actual = this.out.breakIntoWords(testString);
- /* Eventually, "fiery-footed" needs to be combined into a hyphenated compound word, but that comes farther
- * downstream. */
- Assert.assertEquals(12, actual.size());
+ /* Compound word "fiery-footed" treated as one word. */
+ Assert.assertEquals(10, actual.size());
Assert.assertEquals("Gallop", actual.get(0));
Assert.assertEquals(" ", actual.get(1));
Assert.assertEquals("apace", actual.get(2));
@@ -126,12 +124,10 @@
Assert.assertEquals(", ", actual.get(3));
Assert.assertEquals("you", actual.get(4));
Assert.assertEquals(" ", actual.get(5));
- Assert.assertEquals("fiery", actual.get(6));
- Assert.assertEquals("-", actual.get(7));
- Assert.assertEquals("footed", actual.get(8));
- Assert.assertEquals(" ", actual.get(9));
- Assert.assertEquals("steeds", actual.get(10));
- Assert.assertEquals(",", actual.get(11));
+ Assert.assertEquals("fiery-footed", actual.get(6));
+ Assert.assertEquals(" ", actual.get(7));
+ Assert.assertEquals("steeds", actual.get(8));
+ Assert.assertEquals(",", actual.get(9));
}
/**
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-04 12:03:35
|
Revision: 11992
http://sourceforge.net/p/foray/code/11992
Author: victormote
Date: 2021-11-04 12:03:32 +0000 (Thu, 04 Nov 2021)
Log Message:
-----------
Progress handling compound words.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/primitive/BooleanUtils.java
trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharSequenceUtils.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/WordBreaker.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/WordBreakerLatin1.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/primitive/BooleanUtils.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/primitive/BooleanUtils.java 2021-11-04 00:13:21 UTC (rev 11991)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/primitive/BooleanUtils.java 2021-11-04 12:03:32 UTC (rev 11992)
@@ -48,4 +48,18 @@
return (byte) (input ? 1 : 0);
}
+ /**
+ * Indicates whether all elements in an array of booleans are true.
+ * @param input The array of booleans to be tested.
+ * @return True if and only if all elements in {@code input} are true;
+ */
+ public static boolean allTrue(final boolean[] input) {
+ for (int index = 0; index < input.length; index ++) {
+ if (! input[index]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
}
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharSequenceUtils.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharSequenceUtils.java 2021-11-04 00:13:21 UTC (rev 11991)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/primitive/CharSequenceUtils.java 2021-11-04 12:03:32 UTC (rev 11992)
@@ -393,4 +393,20 @@
return builder;
}
+ /**
+ * Tests a sequence of chars to see if it contains a given char.
+ * @param sequence The sequence of chars to be tested.
+ * @param charToFind The char being tested for.
+ * @return True if and only if {@code sequence} contains at least one {@code charToFind}.
+ */
+ public static boolean contains(final CharSequence sequence, final char charToFind) {
+ for (int index = 0; index < sequence.length(); index ++) {
+ final char c = sequence.charAt(index);
+ if (c == charToFind) {
+ return true;
+ }
+ }
+ return false;
+ }
+
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java 2021-11-04 00:13:21 UTC (rev 11991)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java 2021-11-04 12:03:32 UTC (rev 11992)
@@ -28,6 +28,9 @@
package org.foray.hyphen;
+import org.foray.common.primitive.BooleanUtils;
+import org.foray.common.primitive.CharSequenceUtils;
+
import org.axsl.hyphen.Dictionary;
import org.axsl.hyphen.OrthographyConfig;
import org.axsl.hyphen.PartOfSpeech;
@@ -64,6 +67,14 @@
/** The parent hyphenation server. */
private HyphenationServer4a server;
+ /* TODO: Following orthography-specific config needs to be moved to XML or subclass. */
+ /** Character marking a compound word. */
+ private char compoundWordMarker = '-';
+
+ /* TODO: Following orthography-specific config needs to be moved to XML or subclass. */
+ /** Regex pattern used to break compound words into their components. */
+ private Pattern compoundWordBreaker = Pattern.compile(Character.toString(compoundWordMarker));
+
/**
* Constructor.
* @param server The parent hyphenation server.
@@ -275,7 +286,19 @@
return true;
}
- /* 4. Check derivative matches in adhoc dictionaries. */
+ /* 4. Check for compound word. */
+ if (CharSequenceUtils.contains(wordChars, '-')) {
+ final String[] components = this.compoundWordBreaker.split(wordChars);
+ final boolean[] componentsValid = new boolean[components.length];
+ for (int index = 0; index < components.length; index ++) {
+ componentsValid[index] = isValidWord(components[index], pos, adhocDictionaries);
+ }
+ if (BooleanUtils.allTrue(componentsValid)) {
+ return true;
+ }
+ }
+
+ /* 5. Check derivative matches in adhoc dictionaries. */
if (adhocDictionaries != null) {
for (int dictIndex = 0; dictIndex < adhocDictionaries.size(); dictIndex ++) {
final Dictionary adhocDictionary = adhocDictionaries.get(dictIndex);
@@ -285,7 +308,7 @@
}
}
- /* 5. Check derivative matches in standard dictionaries for the orthography. */
+ /* 6. Check derivative matches in standard dictionaries for the orthography. */
if (orthoDictionary != null) {
return isDerivativeFound(orthoDictionary, wordChars);
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/WordBreaker.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/WordBreaker.java 2021-11-04 00:13:21 UTC (rev 11991)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/WordBreaker.java 2021-11-04 12:03:32 UTC (rev 11992)
@@ -224,7 +224,7 @@
/**
* Indicates whether a char is a possible word character, depending on context.
* For example, in English, a single quote or typographic apostrophe could be the end of a quotation (not a word
- * character, or mark a contraction or possession (is a word character).
+ * character), or mark a contraction or possession (is a word character).
* @param c The character to be tested.
* @return True if and only if {@code c} is sometimes (but not always) a word character.
*/
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/WordBreakerLatin1.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/WordBreakerLatin1.java 2021-11-04 00:13:21 UTC (rev 11991)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/WordBreakerLatin1.java 2021-11-04 12:03:32 UTC (rev 11992)
@@ -36,6 +36,14 @@
public class WordBreakerLatin1 extends WordBreaker {
@Override
+ public CharType isWordChar(final char c) {
+ switch (c) {
+ case '-': return CharType.ALWAYS_WORD_CHAR;
+ default: return super.isWordChar(c);
+ }
+ }
+
+ @Override
public boolean isSometimesWordChar(final char c) {
switch (c) {
/* Typographic apostrophe. */
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-04 00:10:35
|
Revision: 11990
http://sourceforge.net/p/foray/code/11990
Author: victormote
Date: 2021-11-04 00:10:32 +0000 (Thu, 04 Nov 2021)
Log Message:
-----------
Use specialized Comparator to do the sorting.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/util/DictionarySorter.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/util/DictionarySorter.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/util/DictionarySorter.java 2021-11-03 23:31:22 UTC (rev 11989)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/util/DictionarySorter.java 2021-11-04 00:10:32 UTC (rev 11990)
@@ -28,7 +28,6 @@
package org.foray.hyphen.util;
-import org.foray.common.primitive.StringUtils;
import org.foray.hyphen.SpellChecker;
import org.apache.commons.cli.CommandLine;
@@ -50,6 +49,8 @@
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -56,8 +57,7 @@
/**
* Reads a dictionary XML file <em>as a text file</em> and sorts the word lines in it.
- * This only switches adjacent lines that are out of order, and therefore needs to be run multiple (perhaps many) times
- * to get the entire file sorted.
+ * It assumes that each word is on exactly one line.
*/
public class DictionarySorter {
@@ -67,7 +67,46 @@
/** The logger. */
private Logger logger = LoggerFactory.getLogger(this.getClass());
+ /**
+ * Comparator that knows how to sort the lines of a dictionary file.
+ * It assumes that words are one per line.
+ */
+ private class WordComparator implements Comparator<String> {
+ /** Pattern to find a line containing a <w> element. */
+ private Pattern linePattern = Pattern.compile("^<w><t>(.*)</t>(.*)</w>$");
+
+ /** Pattern used to remove hyphens. */
+ private Pattern hyphenPattern = Pattern.compile("-");
+
+ /** Pattern used to convert equal signs to hyphens. */
+ private Pattern equalPattern = Pattern.compile("=");
+
+ @Override
+ public int compare(final String string1, final String string2) {
+ final Matcher matcher1 = linePattern.matcher(string1);
+ final Matcher matcher2 = linePattern.matcher(string2);
+
+ /* If either one of them is not a word line, no switching should be done. Return 0 indicating equality. */
+ if (! matcher1.matches()
+ || ! matcher2.matches()) {
+ return 0;
+ }
+
+ final String word1 = computeWord(matcher1);
+ final String word2 = computeWord(matcher2);
+ return word1.compareTo(word2);
+ }
+
+ private String computeWord(final Matcher matcher) {
+ String word = matcher.group(1).toLowerCase();
+ word = hyphenPattern.matcher(word).replaceAll("");
+ word = equalPattern.matcher(word).replaceAll("-");
+ return word;
+ }
+
+ }
+
/**
* Reads an input stream, sorts adjacant lines and writes them to an output stream.
* @param inputStream The input stream to read.
@@ -77,9 +116,9 @@
public void sort(final InputStream inputStream, final OutputStream outputStream) throws IOException {
logger.info("Begin dictionary sorting.");
+ /* Read the lines into a List. */
final InputStreamReader isReader = new InputStreamReader(inputStream);
final BufferedReader reader = new BufferedReader(isReader);
-
final List<String> lines = new ArrayList<String>();
String inputLine = reader.readLine();
while (inputLine != null) {
@@ -87,61 +126,20 @@
inputLine = reader.readLine();
}
+ /* Sort them. */
+ Collections.sort(lines, new WordComparator());
+
+ /* Write them out in sorted order. */
final PrintStream writer = new PrintStream(outputStream);
-
- final Pattern linePattern = Pattern.compile("^<w><t>(.*)</t>(.*)</w>$");
- final Pattern hyphenPattern = Pattern.compile("-");
- final Pattern equalPattern = Pattern.compile("=");
-
- String lastLine = StringUtils.EMPTY_STRING;
- String lastWord = StringUtils.EMPTY_STRING;
- String currentWord = null;
- int changeCount = 0;
-
for (int index = 0; index < lines.size(); index ++) {
- final String currentLine = lines.get(index);
- final Matcher matcher = linePattern.matcher(currentLine);
- if (matcher.matches()) {
- final String hyphenatedWord = matcher.group(1).toLowerCase();
- currentWord = hyphenPattern.matcher(hyphenatedWord).replaceAll("");
- currentWord = equalPattern.matcher(currentWord).replaceAll("-");
- if (currentWord.compareTo(lastWord) < 0) {
- /* Switch them. */
- this.logger.info("Line " + (index + 1) + " moved to line " + index);
- changeCount ++;
- print(writer, currentLine);
- /* Leave lastLine alone. */
- /* Leave lastWord alone. */
-
- } else {
- if (StringUtils.EMPTY_STRING.equals(lastLine)
- && changeCount == 0) {
- } else {
- print(writer, lastLine);
- }
- lastLine = currentLine;
- lastWord = currentWord;
- }
- } else {
- if ("</axsl-dictionary>".equals(currentLine)) {
- print(writer, lastLine);
- lastLine = currentLine;
- } else {
- print(writer, currentLine);
- }
- }
+ final String line = lines.get(index);
+ writer.print(line);
+ writer.print(eol);
}
- print(writer, lastLine);
logger.info("End dictionary sorting. Lines read: " + lines.size());
- logger.info("Qty of lines moved: " + changeCount);
}
- private void print(final PrintStream writer, final String line) {
- writer.print(line);
- writer.print(eol);
- }
-
/**
* Returns the command-line options for the {@link #main(String[])} method.
* @return Command-line options.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-03 23:31:24
|
Revision: 11989
http://sourceforge.net/p/foray/code/11989
Author: victormote
Date: 2021-11-03 23:31:22 +0000 (Wed, 03 Nov 2021)
Log Message:
-----------
Read entire file into memory.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/util/DictionarySorter.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/util/DictionarySorter.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/util/DictionarySorter.java 2021-11-03 23:18:23 UTC (rev 11988)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/util/DictionarySorter.java 2021-11-03 23:31:22 UTC (rev 11989)
@@ -49,6 +49,8 @@
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -77,6 +79,14 @@
final InputStreamReader isReader = new InputStreamReader(inputStream);
final BufferedReader reader = new BufferedReader(isReader);
+
+ final List<String> lines = new ArrayList<String>();
+ String inputLine = reader.readLine();
+ while (inputLine != null) {
+ lines.add(inputLine);
+ inputLine = reader.readLine();
+ }
+
final PrintStream writer = new PrintStream(outputStream);
final Pattern linePattern = Pattern.compile("^<w><t>(.*)</t>(.*)</w>$");
@@ -86,13 +96,11 @@
String lastLine = StringUtils.EMPTY_STRING;
String lastWord = StringUtils.EMPTY_STRING;
String currentWord = null;
- int lineNumber = 0;
int changeCount = 0;
- String inputLine = reader.readLine();
- while (inputLine != null) {
- lineNumber ++;
- final Matcher matcher = linePattern.matcher(inputLine);
+ for (int index = 0; index < lines.size(); index ++) {
+ final String currentLine = lines.get(index);
+ final Matcher matcher = linePattern.matcher(currentLine);
if (matcher.matches()) {
final String hyphenatedWord = matcher.group(1).toLowerCase();
currentWord = hyphenPattern.matcher(hyphenatedWord).replaceAll("");
@@ -99,9 +107,9 @@
currentWord = equalPattern.matcher(currentWord).replaceAll("-");
if (currentWord.compareTo(lastWord) < 0) {
/* Switch them. */
- this.logger.info("Line " + lineNumber + " moved to line " + (lineNumber - 1));
+ this.logger.info("Line " + (index + 1) + " moved to line " + index);
changeCount ++;
- print(writer, inputLine);
+ print(writer, currentLine);
/* Leave lastLine alone. */
/* Leave lastWord alone. */
@@ -111,22 +119,21 @@
} else {
print(writer, lastLine);
}
- lastLine = inputLine;
+ lastLine = currentLine;
lastWord = currentWord;
}
} else {
- if ("</axsl-dictionary>".equals(inputLine)) {
+ if ("</axsl-dictionary>".equals(currentLine)) {
print(writer, lastLine);
- lastLine = inputLine;
+ lastLine = currentLine;
} else {
- print(writer, inputLine);
+ print(writer, currentLine);
}
}
- inputLine = reader.readLine();
}
print(writer, lastLine);
- logger.info("End dictionary sorting. Lines read: " + lineNumber);
+ logger.info("End dictionary sorting. Lines read: " + lines.size());
logger.info("Qty of lines moved: " + changeCount);
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-03 23:18:26
|
Revision: 11988
http://sourceforge.net/p/foray/code/11988
Author: victormote
Date: 2021-11-03 23:18:23 +0000 (Wed, 03 Nov 2021)
Log Message:
-----------
Add class for sorting the Dictionary XML as a text file.
Added Paths:
-----------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/util/DictionarySorter.java
Added: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/util/DictionarySorter.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/util/DictionarySorter.java (rev 0)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/util/DictionarySorter.java 2021-11-03 23:18:23 UTC (rev 11988)
@@ -0,0 +1,191 @@
+/*
+ * Copyright 2021 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.hyphen.util;
+
+import org.foray.common.primitive.StringUtils;
+import org.foray.hyphen.SpellChecker;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Reads a dictionary XML file <em>as a text file</em> and sorts the word lines in it.
+ * This only switches adjacent lines that are out of order, and therefore needs to be run multiple (perhaps many) times
+ * to get the entire file sorted.
+ */
+public class DictionarySorter {
+
+ /** The line-ending to write to output. */
+ private String eol = "\n";
+
+ /** The logger. */
+ private Logger logger = LoggerFactory.getLogger(this.getClass());
+
+
+ /**
+ * Reads an input stream, sorts adjacant lines and writes them to an output stream.
+ * @param inputStream The input stream to read.
+ * @param outputStream The output stream to write.
+ * @throws IOException For IO errors during parsing.
+ */
+ public void sort(final InputStream inputStream, final OutputStream outputStream) throws IOException {
+ logger.info("Begin dictionary sorting.");
+
+ final InputStreamReader isReader = new InputStreamReader(inputStream);
+ final BufferedReader reader = new BufferedReader(isReader);
+ final PrintStream writer = new PrintStream(outputStream);
+
+ final Pattern linePattern = Pattern.compile("^<w><t>(.*)</t>(.*)</w>$");
+ final Pattern hyphenPattern = Pattern.compile("-");
+ final Pattern equalPattern = Pattern.compile("=");
+
+ String lastLine = StringUtils.EMPTY_STRING;
+ String lastWord = StringUtils.EMPTY_STRING;
+ String currentWord = null;
+ int lineNumber = 0;
+ int changeCount = 0;
+
+ String inputLine = reader.readLine();
+ while (inputLine != null) {
+ lineNumber ++;
+ final Matcher matcher = linePattern.matcher(inputLine);
+ if (matcher.matches()) {
+ final String hyphenatedWord = matcher.group(1).toLowerCase();
+ currentWord = hyphenPattern.matcher(hyphenatedWord).replaceAll("");
+ currentWord = equalPattern.matcher(currentWord).replaceAll("-");
+ if (currentWord.compareTo(lastWord) < 0) {
+ /* Switch them. */
+ this.logger.info("Line " + lineNumber + " moved to line " + (lineNumber - 1));
+ changeCount ++;
+ print(writer, inputLine);
+ /* Leave lastLine alone. */
+ /* Leave lastWord alone. */
+
+ } else {
+ if (StringUtils.EMPTY_STRING.equals(lastLine)
+ && changeCount == 0) {
+ } else {
+ print(writer, lastLine);
+ }
+ lastLine = inputLine;
+ lastWord = currentWord;
+ }
+ } else {
+ if ("</axsl-dictionary>".equals(inputLine)) {
+ print(writer, lastLine);
+ lastLine = inputLine;
+ } else {
+ print(writer, inputLine);
+ }
+ }
+ inputLine = reader.readLine();
+ }
+ print(writer, lastLine);
+
+ logger.info("End dictionary sorting. Lines read: " + lineNumber);
+ logger.info("Qty of lines moved: " + changeCount);
+ }
+
+ private void print(final PrintStream writer, final String line) {
+ writer.print(line);
+ writer.print(eol);
+ }
+
+ /**
+ * Returns the command-line options for the {@link #main(String[])} method.
+ * @return Command-line options.
+ */
+ private static Options getCommandLineOptions() {
+ final Options clOptions = new Options();
+ final Option input = new Option("i", "input", true, "path to the input file");
+ input.setRequired(true);
+ final Option output = new Option("o", "output", true, "path to the output file");
+ output.setRequired(true);
+
+ clOptions.addOption(input);
+ clOptions.addOption(output);
+ return clOptions;
+ }
+
+ /**
+ * Command line interface.
+ * @param args The command-line arguments. There are two:
+ * <ol>
+ * <li>--input [input file path]</li>
+ * <li>--dict [dictionary file directory]</li>
+ * </ol>
+ */
+ public static void main(final String[] args) {
+ final Logger logger = LoggerFactory.getLogger(DictionarySorter.class);
+
+ final Options commandLineOptions = DictionarySorter.getCommandLineOptions();
+ final CommandLineParser commandLineParser = new DefaultParser();
+ CommandLine parsedCommandLine = null;
+ try {
+ parsedCommandLine = commandLineParser.parse(commandLineOptions, args);
+ } catch (final ParseException e) {
+ logger.error(e.getMessage(), e);
+ final HelpFormatter helpFormatter = new HelpFormatter();
+ helpFormatter.printHelp("java -cp $FORAY_CLASSPATH " + DictionarySorter.class.getName(), commandLineOptions,
+ true);
+ /* CheckStyle: Allow System.exit() in main method. */
+ System.exit(SpellChecker.STATUS_COMMAND_LINE_ERROR);
+ }
+
+ final String input = parsedCommandLine.getOptionValue("input");
+ final String output = parsedCommandLine.getOptionValue("output");
+
+ try (InputStream is = new FileInputStream(input);
+ OutputStream os = new FileOutputStream(output)) {
+ final DictionarySorter sorter = new DictionarySorter();
+ sorter.sort(is, os);
+ } catch (final IOException e) {
+ logger.error(e.getMessage());
+ }
+ }
+
+}
Property changes on: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/util/DictionarySorter.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-03 19:24:23
|
Revision: 11987
http://sourceforge.net/p/foray/code/11987
Author: victormote
Date: 2021-11-03 19:24:20 +0000 (Wed, 03 Nov 2021)
Log Message:
-----------
Normal dictionary editing.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml
Modified: trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml
===================================================================
--- trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml 2021-11-03 19:23:01 UTC (rev 11986)
+++ trunk/foray/foray-hyphen/src/main/data/dictionaries/eng-999-Latn.dict.xml 2021-11-03 19:24:20 UTC (rev 11987)
@@ -39,6 +39,13 @@
# We have changed the encoding from MacRomanEncoding to UTF-8, changed the
# hard- and soft-hyphenation characters, wrapped the content in XML tags,
# and made many, many other changes.
+#
+#
+# It is desirable that the entries in this dictionary should be in
+# case-insensitive alphabetical order: 1) alphabetical to assist human authors
+# and users, and 2) case-insensitive to keep similar words together, to clarify
+# the effects of these similar words.
+# TODO: Sort the words in this dictionary as described above.
-->
<w><t>a</t></w>
@@ -47549,28 +47556,6 @@
<w><t>El-ze-vier</t></w>
<w><t>El-ze-vir</t></w>
<w><t>El-ze-vir-i-an</t></w>
-<w><t>eme</t></w>
-<w><t>EMF</t></w>
-<w><t>emf</t></w>
-<w><t>Emp</t></w>
-<w><t>empt</t></w>
-<w><t>Emp-son</t></w>
-<w><t>emp-tied</t></w>
-<w><t>emp-ti-a-ble</t></w>
-<w><t>emp-ti-er</t></w>
-<w><t>emp-ti-est</t></w>
-<w><t>emp-ti-ly</t></w>
-<w><t>emp-ti-ness</t></w>
-<w><t>emp-tor</t></w>
-<w><t>emp-ty</t></w>
-<w><t>Emp-ty Quar-ter</t></w>
-<w><t>emp-ty=hand-ed</t></w>
-<w><t>emp-ty=head-ed</t></w>
-<w><t>emp-ty-ing</t></w>
-<w><t>Ems</t></w>
-<w><t>Ems-worth</t></w>
-<w><t>EMU</t></w>
-<w><t>emu</t></w>
<w><t>em-a-gram</t></w>
<w><t>em-a-nant</t></w>
<w><t>em-a-nate</t></w>
@@ -47749,6 +47734,7 @@
<w><t>em-ceed</t></w>
<w><t>em-cee-ing</t></w>
<w><t>Em-den</t></w>
+<w><t>eme</t></w>
<w><t>Em-er-ald</t></w>
<w><t>em-er-ald</t></w>
<w><t>Em-er-ald Isle</t></w>
@@ -47767,6 +47753,8 @@
<w><t>em-e-sis</t></w>
<w><t>em-e-tin</t></w>
<w><t>em-e-tine</t></w>
+<w><t>EMF</t></w>
+<w><t>emf</t></w>
<w><t>em-is-sar-ies</t></w>
<w><t>em-is-sar-y</t></w>
<w><t>em-is-siv-i-ty</t></w>
@@ -47822,6 +47810,7 @@
<w><t>em-ol-li-tion</t></w>
<w><t>Em-o-gene</t></w>
<w><t>Em-o-ry</t></w>
+<w><t>Emp</t></w>
<w><t>em-paes-tic</t></w>
<w><t>em-pai-stic</t></w>
<w><t>em-pale</t></w>
@@ -47909,6 +47898,19 @@
<w><t>em-presse-ment</t></w>
<w><t>em-prise</t></w>
<w><t>em-prize</t></w>
+<w><t>Emp-son</t></w>
+<w><t>emp-tied</t></w>
+<w><t>emp-ti-a-ble</t></w>
+<w><t>emp-ti-er</t></w>
+<w><t>emp-ti-est</t></w>
+<w><t>emp-ti-ly</t></w>
+<w><t>emp-ti-ness</t></w>
+<w><t>emp-tor</t></w>
+<w><t>emp-ty</t><noun regular="true"/><verb regular="true"/><adjective/></w>
+<w><t>Emp-ty Quar-ter</t></w>
+<w><t>emp-ty=hand-ed</t></w>
+<w><t>emp-ty=head-ed</t></w>
+<w><t>emp-ty-ing</t></w>
<w><t>em-pur-ple</t></w>
<w><t>Em-pu-sae</t></w>
<w><t>em-pyr-e-al</t></w>
@@ -47916,6 +47918,10 @@
<w><t>em-py-e-mic</t></w>
<w><t>em-py-reu-ma</t></w>
<w><t>em-py-re-an</t></w>
+<w><t>Ems</t></w>
+<w><t>Ems-worth</t></w>
+<w><t>EMU</t></w>
+<w><t>emu</t></w>
<w><t>em-ul-gens</t></w>
<w><t>em-u-late</t></w>
<w><t>em-u-lat-ed</t></w>
@@ -83538,7 +83544,7 @@
<w><t>kin-der-gart-ner</t></w>
<w><t>kin-der-gar-ten</t></w>
<w><t>kin-der-gar-ten-er</t></w>
-<w><t>kin-dle</t></w>
+<w><t>kin-dle</t><verb regular="true"/></w>
<w><t>kin-dler</t></w>
<w><t>kin-dling</t></w>
<w><t>kin-dred</t></w>
@@ -151855,12 +151861,13 @@
<w><t>spray-ful-ly</t></w>
<w><t>spray-less</t></w>
<w><t>spray-like</t></w>
-<w><t>spread</t></w>
+<w><t>spread</t><noun/><verb regular="false"/></w>
<w><t>spread ea-gle</t></w>
<w><t>spread=ea-gle</t></w>
<w><t>spread=ea-gle-ism</t></w>
<w><t>spread=ea-gle-ist</t></w>
<w><t>spread-er</t></w>
+<w><t>spread-ing</t><verb/><adjective/></w>
<w><t>sprech-ge-sang</t></w>
<w><t>Sprech-ge-sang</t></w>
<w><t>sprech-stim-me</t></w>
@@ -163434,7 +163441,7 @@
<w><t>toque</t></w>
<w><t>tor</t></w>
<w><t>torc</t></w>
-<w><t>torch</t></w>
+<w><t>torch</t><noun regular="true"/><verb regular="true"/></w>
<w><t>torch-bear-er</t></w>
<w><t>torch-i-er</t></w>
<w><t>torch-i-est</t></w>
@@ -163455,7 +163462,6 @@
<w><t>tor-ban-ite</t></w>
<w><t>Tor-bay</t></w>
<w><t>tor-bern-ite</t></w>
-<w><t>tor-ch</t></w>
<w><t>tor-chier</t></w>
<w><t>tor-chiere</t></w>
<w><t>tor-chon lace</t></w>
@@ -182843,7 +182849,7 @@
<w><t>wa-skied</t></w>
<w><t>wa-ski-ing</t></w>
<w><t>wa-tap</t></w>
-<w><t>wa-ter</t></w>
+<w><t>wa-ter</t><verb regular="true"/></w>
<w><t>wa-ter back</t></w>
<w><t>wa-ter bear</t></w>
<w><t>wa-ter bed</t></w>
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-03 19:23:03
|
Revision: 11986
http://sourceforge.net/p/foray/code/11986
Author: victormote
Date: 2021-11-03 19:23:01 +0000 (Wed, 03 Nov 2021)
Log Message:
-----------
Add ability to log dictionary problems.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryResource.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SpellChecker.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java 2021-11-03 19:10:09 UTC (rev 11985)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java 2021-11-03 19:23:01 UTC (rev 11986)
@@ -133,6 +133,12 @@
/** Buffer in which to capture parsed element content. */
private StringBuilder charBuffer = new StringBuilder(MAX_EXPECTED_WORD_LENGTH);
+ /** Indicates whether this parser should log dictionary problems. */
+ private boolean logDictionaryProblems = false;
+
+ /** The last parsed word, used to verify alphabetical order. */
+ private String lastWord = StringUtils.EMPTY_STRING;
+
/**
* Parses a given InputStream and places the parsed information into the dictionary.
* @param inputSource The input source to parse.
@@ -348,7 +354,15 @@
switch(localName) {
case "w": {
final StringWord word = new StringWord(this.currentPartsOfSpeech, this.currentSegments);
- wordMap.put(word.getActualContent().toString(), word);
+ final String actualContent = word.getActualContent().toString();
+ if (this.logDictionaryProblems) {
+ final String actualContentLowercase = actualContent.toLowerCase();
+ if (actualContentLowercase.compareTo(this.lastWord) < 0) {
+ this.logger.warn("Out of alphabetical sequence: " + actualContent + " " + locationString());
+ }
+ this.lastWord = actualContentLowercase;
+ }
+ wordMap.put(actualContent, word);
break;
}
case "t": {
@@ -439,4 +453,12 @@
return null;
}
+ /**
+ * Sets flag that tells parser to log warnings about problems found in the dictionary input.
+ * @param logDictionaryProblems The logDictionaryProblems to set.
+ */
+ public void setLogDictionaryProblems(final boolean logDictionaryProblems) {
+ this.logDictionaryProblems = logDictionaryProblems;
+ }
+
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryResource.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryResource.java 2021-11-03 19:10:09 UTC (rev 11985)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryResource.java 2021-11-03 19:23:01 UTC (rev 11986)
@@ -135,6 +135,7 @@
getLogger().debug("Parsing {}", rawResource.toExternalForm());
final DictionaryParserXml parser = new DictionaryParserXml();
+ parser.setLogDictionaryProblems(true);
SegmentDictionary dictionary = null;
try {
final InputSource source = new InputSource(inputStream);
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SpellChecker.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SpellChecker.java 2021-11-03 19:10:09 UTC (rev 11985)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SpellChecker.java 2021-11-03 19:23:01 UTC (rev 11986)
@@ -196,6 +196,7 @@
if (adhocDictionaryPaths != null) {
for (URL adhocDictionaryPath : adhocDictionaryPaths) {
final DictionaryParserXml dictParser = new DictionaryParserXml();
+ dictParser.setLogDictionaryProblems(true);
final InputStream dictInput = adhocDictionaryPath.openStream();
final InputSource source = new InputSource(dictInput);
final List<SegmentDictionary> dictionaries = dictParser.parse(source, adhocDictionaryPath.toString());
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-03 19:10:12
|
Revision: 11985
http://sourceforge.net/p/foray/code/11985
Author: victormote
Date: 2021-11-03 19:10:09 +0000 (Wed, 03 Nov 2021)
Log Message:
-----------
1. Handle new elements. 2. Use switch/case instead of if statements.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java 2021-11-03 14:40:11 UTC (rev 11984)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/ConfigParser.java 2021-11-03 19:10:09 UTC (rev 11985)
@@ -217,34 +217,35 @@
public void startElement(final String uri, final String localName, final String qName,
final Attributes attributes) throws SAXException {
this.elementStack.push(localName);
- if (localName.equals("axsl-orthography-config")) {
+ switch(localName) {
+ case "axsl-orthography-config": {
/* Nothing to do here. */
return;
}
- if (localName.equals("match-rule-list")) {
+ case "match-rule-list": {
final String id = attributes.getValue("id");
this.currentMatchRuleList = new ArrayList<Pattern>();
this.hyphenationServer.registerMatchRules(id, currentMatchRuleList);
return;
}
- if (localName.equals("derivative-rule-list")) {
+ case "derivative-rule-list": {
final String id = attributes.getValue("id");
this.currentDerivativeRuleList = new ArrayList<DerivativeRule>();
this.hyphenationServer.registerDerivativeRules(id, currentDerivativeRuleList);
return;
}
- if (localName.equals("derivative-rule")) {
+ case "derivative-rule": {
final String typeString = attributes.getValue("type");
this.currentDerivativeRuleType = DerivativeRule.Type.fromConfigDescriptor(typeString);
return;
}
- if (localName.equals("derivative-factory-list")) {
+ case "derivative-factory-list": {
final String id = attributes.getValue("id");
this.currentDerivateFactoryList = new ArrayList<WordWrapperFactory<?>>();
this.derivativeLists.put(id, currentDerivateFactoryList);
return;
}
- if (localName.equals("derivative-factory")) {
+ case "derivative-factory": {
final String factoryClassName = attributes.getValue("class");
final WordWrapperFactory<?> factory = instantiate(factoryClassName, WordWrapperFactory.class);
if (factory == null) {
@@ -253,7 +254,7 @@
this.currentDerivateFactoryList.add(factory);
return;
}
- if (localName.equals("word-breaker")) {
+ case "word-breaker": {
final String className = attributes.getValue("class");
final WordBreaker breaker = instantiate(className, WordBreaker.class);
if (breaker == null) {
@@ -262,13 +263,13 @@
this.currentOrthographyConfig.setWordBreaker(breaker);
return;
}
- if (localName.equals("exclusion")) {
+ case "exclusion": {
final String regexPatternString = attributes.getValue("regex-pattern");
final Pattern regexPattern = Pattern.compile(regexPatternString);
this.currentWordListElement.addExclusionPattern(regexPattern);
return;
}
- if (localName.equals("dictionary")) {
+ case "dictionary": {
final String reference = attributes.getValue("reference");
final DictionaryResource resource = this.dictionaries.get(reference);
if (resource == null) {
@@ -279,7 +280,7 @@
}
return;
}
- if (localName.equals("hyphenation-patterns")) {
+ case "hyphenation-patterns": {
final String reference = attributes.getValue("reference");
final HyphenationPatternsResource resource = this.hyphenationPatterns.get(reference);
if (resource == null) {
@@ -290,7 +291,7 @@
}
return;
}
- if (localName.equals("match-rules")) {
+ case "match-rules": {
final String reference = attributes.getValue("reference");
final List<Pattern> patterns = this.hyphenationServer.getMatchRules(reference);
if (patterns == null) {
@@ -301,7 +302,7 @@
}
return;
}
- if (localName.equals("derivative-rules")) {
+ case "derivative-rules": {
final String reference = attributes.getValue("reference");
final List<DerivativeRule> rules = this.hyphenationServer.getDerivativeRules(reference);
if (rules == null) {
@@ -312,7 +313,7 @@
}
return;
}
- if (localName.equals("derivative-factories")) {
+ case "derivative-factories": {
final String reference = attributes.getValue("reference");
final List<WordWrapperFactory<?>> factories = this.derivativeLists.get(reference);
if (factories == null) {
@@ -323,23 +324,23 @@
}
return;
}
- if (localName.equals("dictionary-resource")) {
+ case "dictionary-resource": {
final String id = attributes.getValue("id");
this.currentDictionaryResource = new DictionaryResource(id);
this.dictionaries.put(id, this.currentDictionaryResource);
return;
}
- if (localName.equals("hyphenation-patterns-resource")) {
+ case "hyphenation-patterns-resource": {
final String id = attributes.getValue("id");
this.currentHyphenationPatternsResource = new HyphenationPatternsResource(id);
this.hyphenationPatterns.put(id, this.currentHyphenationPatternsResource);
return;
}
- if (localName.equals("parsed-resource")) {
+ case "parsed-resource": {
/* All processing is done at endElement. */
return;
}
- if (localName.equals("resource-location")) {
+ case "resource-location": {
final String typeString = attributes.getValue("type");
this.currentResourceLocationType = ResourceLocation.Type.fromId(typeString);
if (this.currentResourceLocationType == null) {
@@ -347,29 +348,38 @@
}
return;
}
- if (localName.equals("unparsed-dictionary")) {
+ case "unparsed-dictionary": {
/* All processing is done at endElement. */
return;
}
- if (localName.equals("dictionary-element")) {
+ case "dictionary-element": {
this.currentWordListElement = this.currentDictionaryResource.new WordListElement();
this.currentDictionaryResource.addWordListElement(this.currentWordListElement);
return;
}
- if (localName.equals("unparsed-hyphenation-patterns")) {
+ case "unparsed-hyphenation-patterns": {
/* All processing is done at endElement. */
return;
}
- if (localName.equals("configuration")) {
+ case "configuration": {
this.currentOrthographyConfig = new OrthographyConfig4a(this.hyphenationServer);
return;
}
- if (localName.equals("orthography")) {
+ case "orthography": {
parseElementOrthography(attributes);
return;
}
- // Make sure user knows about unknown tag
- this.logger.error("Unknown tag in orthography configuration: {}", localName);
+ case "match": {
+ return;
+ }
+ case "replace": {
+ return;
+ }
+ default: {
+ // Make sure user knows about unknown tag
+ this.logger.error("Unknown tag in orthography configuration: {}", localName);
+ }
+ }
}
/**
@@ -464,14 +474,15 @@
* @param qName See {@link DefaultHandler#endElement(String, String, String)}.
*/
private void endElementInside(final String uri, final String localName, final String qName) {
- if (localName.equals("axsl-orthography-config")) {
+ switch(localName) {
+ case "axsl-orthography-config": {
return;
}
- if (localName.equals("match-rule-list")) {
+ case "match-rule-list": {
this.currentMatchRuleList = null;
return;
}
- if (localName.equals("match")) {
+ case "match": {
final String matchString = this.textAccumulator.toString();
StringUtils.clear(this.textAccumulator);
final Pattern pattern = Pattern.compile(matchString);
@@ -482,7 +493,7 @@
}
return;
}
- if (localName.equals("derivative-rule")) {
+ case "derivative-rule": {
final DerivativeRule rule = new DerivativeRule(this.currentDerivativeRuleType,
this.currentDerivativeRuleMatch, this.currentDerivativeRuleReplace);
this.currentDerivativeRuleList.add(rule);
@@ -491,50 +502,50 @@
this.currentDerivativeRuleReplace = null;
return;
}
- if (localName.equals("derivative-rule-list")) {
+ case "derivative-rule-list": {
this.currentDerivativeRuleList = null;
return;
}
- if (localName.equals("replace")) {
+ case "replace": {
final String replaceString = this.textAccumulator.toString();
StringUtils.clear(this.textAccumulator);
this.currentDerivativeRuleReplace = replaceString;
return;
}
- if (localName.equals("derivative-factory-list")) {
+ case "derivative-factory-list": {
this.currentDerivateFactoryList = null;
return;
}
- if (localName.equals("derivative-factory")) {
+ case "derivative-factory": {
return;
}
- if (localName.equals("word-breaker")) {
+ case "word-breaker": {
return;
}
- if (localName.equals("exclusion")) {
+ case "exclusion": {
return;
}
- if (localName.equals("dictionary")) {
+ case "dictionary": {
return;
}
- if (localName.equals("hyphenation-patterns")) {
+ case "hyphenation-patterns": {
return;
}
- if (localName.equals("derivative-factories")) {
+ case "derivative-factories": {
return;
}
- if (localName.equals("dictionary-resource")) {
+ case "dictionary-resource": {
this.currentDictionaryResource = null;
return;
}
- if (localName.equals("hyphenation-patterns-resource")) {
+ case "hyphenation-patterns-resource": {
this.currentHyphenationPatternsResource = null;
return;
}
- if (localName.equals("parsed-resource")) {
+ case "parsed-resource": {
return;
}
- if (localName.equals("resource-location")) {
+ case "resource-location": {
final String content = this.textAccumulator.toString();
StringUtils.clear(this.textAccumulator);
switch (this.currentResourceLocationType) {
@@ -567,23 +578,24 @@
this.currentResourceLocation = null;
return;
}
- if (localName.equals("unparsed-dictionary")) {
+ case "unparsed-dictionary": {
return;
}
- if (localName.equals("dictionary-element")) {
+ case "dictionary-element": {
this.currentWordListElement = null;
return;
}
- if (localName.equals("unparsed-hyphenation-patterns")) {
+ case "unparsed-hyphenation-patterns": {
return;
}
- if (localName.equals("configuration")) {
+ case "configuration": {
this.currentOrthographyConfig = null;
return;
}
- if (localName.equals("orthography")) {
+ case "orthography": {
return;
}
+ }
}
/**
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-03 14:40:13
|
Revision: 11984
http://sourceforge.net/p/foray/code/11984
Author: victormote
Date: 2021-11-03 14:40:11 +0000 (Wed, 03 Nov 2021)
Log Message:
-----------
Get part-of-speech data transferred to SegmentDictionaryWord.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SegmentDictionary.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SegmentDictionaryWord.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SegmentDictionary.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SegmentDictionary.java 2021-11-03 14:38:45 UTC (rev 11983)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SegmentDictionary.java 2021-11-03 14:40:11 UTC (rev 11984)
@@ -87,7 +87,8 @@
/* Size of this.wordSegments was checked at construction time, so this cast should be safe. */
dictionarySegmentIndexes[segmentIndex] = (char) dictionarySegmentIndex;
}
- final SegmentDictionaryWord dictWord = new SegmentDictionaryWord(this, dictionarySegmentIndexes);
+ final SegmentDictionaryWord dictWord = new SegmentDictionaryWord(
+ word.getPartsOfSpeech(), this, dictionarySegmentIndexes);
this.wordMap.put(rawWord, dictWord);
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SegmentDictionaryWord.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SegmentDictionaryWord.java 2021-11-03 14:38:45 UTC (rev 11983)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SegmentDictionaryWord.java 2021-11-03 14:40:11 UTC (rev 11984)
@@ -47,12 +47,17 @@
/** Array of the indexes into the dictionary segments. */
private char[] segments;
+ /** The encoded part(s) of speech for this word. */
+ private char partsOfSpeech;
+
/**
* Constructor.
+ * @param partsOfSpeech The encoded part(s) of speech for this word.
* @param dictionary The parent dictionary that contains the character data.
* @param segments Array of the indexes into the dictionary segments.
*/
- public SegmentDictionaryWord(final SegmentDictionary dictionary, final char[] segments) {
+ public SegmentDictionaryWord(final int partsOfSpeech, final SegmentDictionary dictionary, final char[] segments) {
+ this.partsOfSpeech = (char) partsOfSpeech;
this.dictionary = dictionary;
this.segments = segments;
}
@@ -70,8 +75,7 @@
@Override
public Boolean isOfType(final PartOfSpeech pos, final PosRegularity regularity) {
- /* TODO: Implement this. */
- return Boolean.FALSE;
+ return PosUtils.isOfType(this.partsOfSpeech, pos, regularity);
}
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java 2021-11-03 14:38:45 UTC (rev 11983)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java 2021-11-03 14:40:11 UTC (rev 11984)
@@ -109,4 +109,12 @@
return PosUtils.isOfType(this.partsOfSpeech, pos, regularity);
}
+ /**
+ * Returns the encoded parts of speech data for this word.
+ * @return The encoded parts of speech data for this word.
+ */
+ public char getPartsOfSpeech() {
+ return this.partsOfSpeech;
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-03 14:38:48
|
Revision: 11983
http://sourceforge.net/p/foray/code/11983
Author: victormote
Date: 2021-11-03 14:38:45 +0000 (Wed, 03 Nov 2021)
Log Message:
-----------
Fix cut-and-paste error in evaluating regularity.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java 2021-11-03 14:37:59 UTC (rev 11982)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java 2021-11-03 14:38:45 UTC (rev 11983)
@@ -179,10 +179,15 @@
*/
public static boolean isOfType(final char flags, final PartOfSpeech pos, final PosRegularity regularity) {
final boolean isPosMatch = PosUtils.isPartOfSpeech(flags, pos);
+ if (isPosMatch == false) {
+ return false;
+ }
+
if (regularity == null
|| regularity == PosRegularity.IRREGULAR) {
- return isPosMatch;
+ return true;
}
+
boolean isRegularityMatch = false;
switch(pos) {
case NOUN: {
@@ -190,11 +195,11 @@
break;
}
case VERB: {
- isRegularityMatch = PosUtils.isRegularNoun(flags);
+ isRegularityMatch = PosUtils.isRegularVerb(flags);
break;
}
case ADJECTIVE: {
- isRegularityMatch = PosUtils.isRegularNoun(flags);
+ isRegularityMatch = PosUtils.isRegularAdjective(flags);
break;
}
default: {
@@ -201,7 +206,7 @@
isRegularityMatch = true;
}
}
- return isPosMatch && isRegularityMatch;
+ return isRegularityMatch;
}
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-03 14:38:02
|
Revision: 11982
http://sourceforge.net/p/foray/code/11982
Author: victormote
Date: 2021-11-03 14:37:59 +0000 (Wed, 03 Nov 2021)
Log Message:
-----------
Create the word after part-of-speech data has been parsed.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java 2021-11-03 13:31:25 UTC (rev 11981)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java 2021-11-03 14:37:59 UTC (rev 11982)
@@ -64,7 +64,7 @@
*/
public class DictionaryParserXml extends DefaultHandler2 {
- private class Dictionary {
+ private class DictionaryElement {
/** The orthography for this dictionary. */
private Orthography4a orthography;
@@ -101,8 +101,11 @@
private List<SegmentDictionary> parsedDictionaries = new ArrayList<SegmentDictionary>();
/** The current dictionary being parsed. */
- private Dictionary currentDictionary;
+ private DictionaryElement currentDictionary;
+ /** The current word content being parsed. */
+ private StringWordSegment[] currentSegments;
+
/** The current parts of speech being parsed. */
private char currentPartsOfSpeech;
@@ -314,7 +317,7 @@
}
case "word-group": break;
case "axsl-dictionary": {
- this.currentDictionary = new Dictionary();
+ this.currentDictionary = new DictionaryElement();
final String language = attributes.getValue(StringUtils.EMPTY_STRING, "language");
final String country = attributes.getValue(StringUtils.EMPTY_STRING, "country");
final String script = attributes.getValue(StringUtils.EMPTY_STRING, "script");
@@ -343,7 +346,11 @@
@Override
public void endElement(final String uri, final String localName, final String qName) throws SAXException {
switch(localName) {
- case "w": break;
+ case "w": {
+ final StringWord word = new StringWord(this.currentPartsOfSpeech, this.currentSegments);
+ wordMap.put(word.getActualContent().toString(), word);
+ break;
+ }
case "t": {
final String inputLine = this.charBuffer.toString().trim();
StringUtils.clear(this.charBuffer);
@@ -379,10 +386,8 @@
if (segmentList.size() < 1) {
throw new SAXException("0-syllable word: " + this.locationString());
}
- final StringWordSegment[] segments = new StringWordSegment[segmentList.size()];
- segmentList.toArray(segments);
- final StringWord word = new StringWord(this.currentPartsOfSpeech, segments);
- wordMap.put(word.getActualContent().toString(), word);
+ this.currentSegments = new StringWordSegment[segmentList.size()];
+ segmentList.toArray(this.currentSegments);
break;
}
case "noun": break;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-03 13:31:28
|
Revision: 11981
http://sourceforge.net/p/foray/code/11981
Author: victormote
Date: 2021-11-03 13:31:25 +0000 (Wed, 03 Nov 2021)
Log Message:
-----------
Improvements in dictionary/spell-checking logic.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParser.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/HyphenationConsumer4a.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/MutatingWord4a.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PatternTree.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java
trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/PosUtilsTests.java
trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/SegmentDictionaryTests.java
trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/StringWordTests.java
trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/TotalFitLbTests.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java 2021-11-02 18:06:44 UTC (rev 11980)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java 2021-11-03 13:31:25 UTC (rev 11981)
@@ -28,6 +28,8 @@
package org.foray.hyphen;
+import org.axsl.hyphen.PartOfSpeech;
+import org.axsl.hyphen.PosRegularity;
import org.axsl.hyphen.Word;
import java.util.Arrays;
@@ -146,8 +148,23 @@
* @return True if and only if this rule applies to {@code word}.
*/
boolean doesRulyApply(final Word word) {
- /* TODO: Complete this method after word knows how to tell its parts-of-speech. */
- return false;
+ switch(this.type) {
+ case ANY: {
+ return true;
+ }
+ case REGULAR_NOUN: {
+ return word.isOfType(PartOfSpeech.NOUN, PosRegularity.REGULAR);
+ }
+ case REGULAR_VERB: {
+ return word.isOfType(PartOfSpeech.VERB, PosRegularity.REGULAR);
+ }
+ case REGULAR_ADJECTIVE: {
+ return word.isOfType(PartOfSpeech.ADJECTIVE, PosRegularity.REGULAR);
+ }
+ default: {
+ return false;
+ }
+ }
}
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParser.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParser.java 2021-11-02 18:06:44 UTC (rev 11980)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParser.java 2021-11-03 13:31:25 UTC (rev 11981)
@@ -141,7 +141,7 @@
}
final StringWordSegment[] segments = new StringWordSegment[segmentList.size()];
segmentList.toArray(segments);
- final StringWord word = new StringWord(segments);
+ final StringWord word = new StringWord(0, segments);
wordMap.put(word.getActualContent().toString(), word);
inputLine = reader.readLine();
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java 2021-11-02 18:06:44 UTC (rev 11980)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DictionaryParserXml.java 2021-11-03 13:31:25 UTC (rev 11981)
@@ -32,6 +32,8 @@
import org.foray.common.i18n.Orthography4a;
import org.foray.common.primitive.StringUtils;
+import org.axsl.hyphen.PartOfSpeech;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
@@ -101,6 +103,9 @@
/** The current dictionary being parsed. */
private Dictionary currentDictionary;
+ /** The current parts of speech being parsed. */
+ private char currentPartsOfSpeech;
+
/* TODO: The following List and contents are oriented toward English & Western European languages.
* They should be moved to the orthography configuration. */
/** The list of string factories that should be tried when building the strings. */
@@ -252,20 +257,61 @@
public void startElement(final String uri, final String localName, final String qName, final Attributes attributes)
throws SAXException {
switch(localName) {
- case "w": break;
+ case "w": {
+ this.currentPartsOfSpeech = 0;
+ break;
+ }
case "t": {
break;
}
- case "noun": break;
- case "pronoun": break;
- case "verb": break;
- case "adjective": break;
- case "adverb": break;
- case "preposition": break;
- case "conjunction": break;
- case "article": break;
- case "interjection": break;
- case "participle": break;
+ case "noun": {
+ this.currentPartsOfSpeech = PosUtils.encodePosInfo(this.currentPartsOfSpeech, PartOfSpeech.NOUN);
+ final String regularity = attributes.getValue("regular");
+ if ("true".equals(regularity)) {
+ this.currentPartsOfSpeech = PosUtils.encodeRegularNoun(this.currentPartsOfSpeech);
+ }
+ break;
+ }
+ case "pronoun": {
+ this.currentPartsOfSpeech = PosUtils.encodePosInfo(this.currentPartsOfSpeech, PartOfSpeech.PRONOUN);
+ break;
+ }
+ case "verb": {
+ this.currentPartsOfSpeech = PosUtils.encodePosInfo(this.currentPartsOfSpeech, PartOfSpeech.VERB);
+ final String regularity = attributes.getValue("regular");
+ if ("true".equals(regularity)) {
+ this.currentPartsOfSpeech = PosUtils.encodeRegularVerb(this.currentPartsOfSpeech);
+ }
+ break;
+ }
+ case "adjective": {
+ this.currentPartsOfSpeech = PosUtils.encodePosInfo(this.currentPartsOfSpeech, PartOfSpeech.ADJECTIVE);
+ final String regularity = attributes.getValue("regular");
+ if ("true".equals(regularity)) {
+ this.currentPartsOfSpeech = PosUtils.encodeRegularAdjective(this.currentPartsOfSpeech);
+ }
+ break;
+ }
+ case "adverb": {
+ this.currentPartsOfSpeech = PosUtils.encodePosInfo(this.currentPartsOfSpeech, PartOfSpeech.ADVERB);
+ break;
+ }
+ case "preposition": {
+ this.currentPartsOfSpeech = PosUtils.encodePosInfo(this.currentPartsOfSpeech, PartOfSpeech.PREPOSITION);
+ break;
+ }
+ case "conjunction": {
+ this.currentPartsOfSpeech = PosUtils.encodePosInfo(this.currentPartsOfSpeech, PartOfSpeech.CONJUNCTION);
+ break;
+ }
+ case "article": {
+ this.currentPartsOfSpeech = PosUtils.encodePosInfo(this.currentPartsOfSpeech, PartOfSpeech.ARTICLE);
+ break;
+ }
+ case "interjection": {
+ this.currentPartsOfSpeech = PosUtils.encodePosInfo(this.currentPartsOfSpeech, PartOfSpeech.INTERJECTION);
+ break;
+ }
case "word-group": break;
case "axsl-dictionary": {
this.currentDictionary = new Dictionary();
@@ -335,7 +381,7 @@
}
final StringWordSegment[] segments = new StringWordSegment[segmentList.size()];
segmentList.toArray(segments);
- final StringWord word = new StringWord(segments);
+ final StringWord word = new StringWord(this.currentPartsOfSpeech, segments);
wordMap.put(word.getActualContent().toString(), word);
break;
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/HyphenationConsumer4a.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/HyphenationConsumer4a.java 2021-11-02 18:06:44 UTC (rev 11980)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/HyphenationConsumer4a.java 2021-11-03 13:31:25 UTC (rev 11981)
@@ -155,7 +155,7 @@
/* Chunk is a word. */
Word word = hyphenate(chunk, 0, chunk.length(), paraConfig.getOrthography());
if (word == null) {
- word = new StringWord(chunk);
+ word = new StringWord(0, chunk);
}
wordSequence.add(word);
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/MutatingWord4a.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/MutatingWord4a.java 2021-11-02 18:06:44 UTC (rev 11980)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/MutatingWord4a.java 2021-11-03 13:31:25 UTC (rev 11981)
@@ -48,20 +48,22 @@
/**
* Constructor.
+ * @param partsOfSpeech The encoded part(s) of speech for this word.
* @param word The word being hyphenated.
*/
- public MutatingWord4a(final StringWordSegmentUtf16[] word) {
- super(word);
+ public MutatingWord4a(final int partsOfSpeech, final StringWordSegmentUtf16[] word) {
+ super(partsOfSpeech, word);
this.breaks = new DiscretionaryHyphenMutating4a[word.length - 1];
}
/**
* Constructor for already-computed hyphenation points.
+ * @param partsOfSpeech The encoded part(s) of speech for this word.
* @param wordSequence The word being hyphenated.
* @param points The hyphenation points for the word.
*/
- public MutatingWord4a(final CharSequence wordSequence, final byte[] points) {
- super(wordSequence, points);
+ public MutatingWord4a(final int partsOfSpeech, final CharSequence wordSequence, final byte[] points) {
+ super(partsOfSpeech, wordSequence, points);
this.breaks = new DiscretionaryHyphenMutating4a[points.length];
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java 2021-11-02 18:06:44 UTC (rev 11980)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java 2021-11-03 13:31:25 UTC (rev 11981)
@@ -286,9 +286,9 @@
}
/* 5. Check derivative matches in standard dictionaries for the orthography. */
-// if (orthoDictionary != null) {
-// return isDerivativeFound(orthoDictionary, wordChars);
-// }
+ if (orthoDictionary != null) {
+ return isDerivativeFound(orthoDictionary, wordChars);
+ }
/* Not found in any dictionary. */
return false;
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PatternTree.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PatternTree.java 2021-11-02 18:06:44 UTC (rev 11980)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PatternTree.java 2021-11-03 13:31:25 UTC (rev 11981)
@@ -307,9 +307,9 @@
final byte[] returnValues = new byte[k];
System.arraycopy(values, 0, returnValues, 0, k);
if (qtyMorphExceptions > 0) {
- return new MutatingWord4a(theWord, returnPoints);
+ return new MutatingWord4a(0, theWord, returnPoints);
} else {
- return new StringWord(theWord, returnPoints);
+ return new StringWord(0, theWord, returnPoints);
}
}
return null;
@@ -476,9 +476,9 @@
final StringWord exception;
if (qtyMorphExceptions > 0) {
- exception = new MutatingWord4a(unhyphenatedWord, points);
+ exception = new MutatingWord4a(0, unhyphenatedWord, points);
} else {
- exception = new StringWord(unhyphenatedWord, points);
+ exception = new StringWord(0, unhyphenatedWord, points);
}
this.exceptions.put(unhyphenatedWord, exception);
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java 2021-11-02 18:06:44 UTC (rev 11980)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java 2021-11-03 13:31:25 UTC (rev 11981)
@@ -29,6 +29,7 @@
package org.foray.hyphen;
import org.axsl.hyphen.PartOfSpeech;
+import org.axsl.hyphen.PosRegularity;
/**
* Utilities related to {@link PartOfSpeech}.
@@ -169,4 +170,38 @@
return (flags & mask) != 0;
}
+ /**
+ * Indicates whether a given set of flags matches both a given part of speech and regularity indicator.
+ * @param flags The encoded flags to be tested.
+ * @param pos The part of speech being tested for.
+ * @param regularity The regularity being tested for.
+ * @return True if and only if the flags match both the part of speech and regularity indicator.
+ */
+ public static boolean isOfType(final char flags, final PartOfSpeech pos, final PosRegularity regularity) {
+ final boolean isPosMatch = PosUtils.isPartOfSpeech(flags, pos);
+ if (regularity == null
+ || regularity == PosRegularity.IRREGULAR) {
+ return isPosMatch;
+ }
+ boolean isRegularityMatch = false;
+ switch(pos) {
+ case NOUN: {
+ isRegularityMatch = PosUtils.isRegularNoun(flags);
+ break;
+ }
+ case VERB: {
+ isRegularityMatch = PosUtils.isRegularNoun(flags);
+ break;
+ }
+ case ADJECTIVE: {
+ isRegularityMatch = PosUtils.isRegularNoun(flags);
+ break;
+ }
+ default: {
+ isRegularityMatch = true;
+ }
+ }
+ return isPosMatch && isRegularityMatch;
+ }
+
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java 2021-11-02 18:06:44 UTC (rev 11980)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java 2021-11-03 13:31:25 UTC (rev 11981)
@@ -45,11 +45,16 @@
/** The segments being hyphenated, stored as an array of string segments. */
private StringWordSegmentUtf16[] segments;
+ /** The encoded part(s) of speech for this word. */
+ private char partsOfSpeech;
+
/**
* Constructor for a sequence of segments.
+ * @param partsOfSpeech The encoded part(s) of speech for this word.
* @param segments The segments being hyphenated.
*/
- public StringWord(final StringWordSegmentUtf16... segments) {
+ public StringWord(final int partsOfSpeech, final StringWordSegmentUtf16... segments) {
+ this.partsOfSpeech = (char) partsOfSpeech;
this.segments = segments;
}
@@ -56,9 +61,11 @@
/**
* Constructor for a sequence of Strings.
* Each String is converted to a WordSegment.
+ * @param partsOfSpeech The encoded part(s) of speech for this word.
* @param segments The segments being hyphenated.
*/
- public StringWord(final CharSequence... segments) {
+ public StringWord(final int partsOfSpeech, final CharSequence... segments) {
+ this.partsOfSpeech = (char) partsOfSpeech;
this.segments = new StringWordSegmentUtf16[segments.length];
for (int index = 0; index < segments.length; index ++) {
final StringWordSegmentUtf16 segment = new StringWordSegmentUtf16(segments[index]);
@@ -68,10 +75,12 @@
/**
* Constructor for already-computed hyphenation points.
+ * @param partsOfSpeech The encoded part(s) of speech for this word.
* @param wordSequence The segments being hyphenated.
* @param points The hyphenation points for the segments.
*/
- public StringWord(final CharSequence wordSequence, final byte[] points) {
+ public StringWord(final int partsOfSpeech, final CharSequence wordSequence, final byte[] points) {
+ this.partsOfSpeech = (char) partsOfSpeech;
final char[][] segmentChars = CharSequenceUtils.split(wordSequence, points);
final StringWordSegmentUtf16[] word = new StringWordSegmentUtf16[segmentChars.length];
for (int index = 0; index < segmentChars.length; index ++) {
@@ -97,8 +106,7 @@
@Override
public Boolean isOfType(final PartOfSpeech pos, final PosRegularity regularity) {
- /* TODO: Implement this or document why not. */
- return Boolean.FALSE;
+ return PosUtils.isOfType(this.partsOfSpeech, pos, regularity);
}
}
Modified: trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/PosUtilsTests.java
===================================================================
--- trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/PosUtilsTests.java 2021-11-02 18:06:44 UTC (rev 11980)
+++ trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/PosUtilsTests.java 2021-11-03 13:31:25 UTC (rev 11981)
@@ -153,7 +153,7 @@
}
/**
- * Test of {@link PosUtils#isPartOfSpeech(char, PartOfSpeech)} and related encoding methods.
+ * Test of {@link PosUtils#isPartOfSpeech(char, PartOfSpeech)} and related decoding methods.
*/
@Test
public void testIsPartOfSpeech() {
Modified: trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/SegmentDictionaryTests.java
===================================================================
--- trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/SegmentDictionaryTests.java 2021-11-02 18:06:44 UTC (rev 11980)
+++ trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/SegmentDictionaryTests.java 2021-11-03 13:31:25 UTC (rev 11981)
@@ -42,19 +42,19 @@
public class SegmentDictionaryTests {
/** A test word: at-ten-tion. */
- public static final StringWord WORD_ATTENTION = new StringWord("at", "ten", "tion");
+ public static final StringWord WORD_ATTENTION = new StringWord(0, "at", "ten", "tion");
/** A test word: in-ten-tion. */
- public static final StringWord WORD_INTENTION = new StringWord("in", "ten", "tion");
+ public static final StringWord WORD_INTENTION = new StringWord(0, "in", "ten", "tion");
/** A test word: am-bi-tion. */
- public static final StringWord WORD_AMBITION = new StringWord("am", "bi", "tion");
+ public static final StringWord WORD_AMBITION = new StringWord(0, "am", "bi", "tion");
/** A test word: in-trep-id. */
- public static final StringWord WORD_INTREPID = new StringWord("in", "trep", "id");
+ public static final StringWord WORD_INTREPID = new StringWord(0, "in", "trep", "id");
/** A test word: har-mo-ni-ous. */
- public static final StringWord WORD_HARMONIOUS = new StringWord("har", "mo", "ni", "ous");
+ public static final StringWord WORD_HARMONIOUS = new StringWord(0, "har", "mo", "ni", "ous");
/** The object under test. */
private SegmentDictionary out;
Modified: trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/StringWordTests.java
===================================================================
--- trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/StringWordTests.java 2021-11-02 18:06:44 UTC (rev 11980)
+++ trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/StringWordTests.java 2021-11-03 13:31:25 UTC (rev 11981)
@@ -41,20 +41,20 @@
public class StringWordTests {
/** A test word: phi-los-o-phy. */
- public static final StringWord WORD_PHILOSOPHY = new StringWord("phi", "los", "o", "phy");
+ public static final StringWord WORD_PHILOSOPHY = new StringWord(0, "phi", "los", "o", "phy");
/** A test word: daugh-ter. */
- public static final StringWord WORD_DAUGHTER = new StringWord("daugh", "ter");
+ public static final StringWord WORD_DAUGHTER = new StringWord(0, "daugh", "ter");
/** A possessive test word: build-ing. */
- public static final StringWord WORD_BUILDING = new StringWord("build", "ing");
+ public static final StringWord WORD_BUILDING = new StringWord(0, "build", "ing");
/** A word that, if pluralized, changes the spelling of part of the root: com-pa-ny becomes com-pa-nies. */
- public static final StringWord WORD_COMPANY = new StringWord("com", "pa", "ny");
+ public static final StringWord WORD_COMPANY = new StringWord(0, "com", "pa", "ny");
/** A word that, if converted to past tense or converted to adjective form, adds the syllable "ed" as a suffix:
* as-ton-ish becomes as-ton-ish-ed. */
- public static final StringWord WORD_ASTONISH = new StringWord("as", "ton", "ish");
+ public static final StringWord WORD_ASTONISH = new StringWord(0, "as", "ton", "ish");
/**
* Test of {@link StringWord#getWordComponent(int)}.
@@ -77,7 +77,7 @@
public void testCharAt() {
final String input = "a12b12345c1";
final byte[] hyphenationPoints = new byte[] {3, 9};
- final StringWord out = new StringWord(input, hyphenationPoints);
+ final StringWord out = new StringWord(0, input, hyphenationPoints);
Assert.assertEquals('a', out.charAt(0));
Assert.assertEquals('1', out.charAt(1));
Modified: trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/TotalFitLbTests.java
===================================================================
--- trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/TotalFitLbTests.java 2021-11-02 18:06:44 UTC (rev 11980)
+++ trunk/foray/foray-linebreak/src/test/java/org/foray/linebreak/TotalFitLbTests.java 2021-11-03 13:31:25 UTC (rev 11981)
@@ -105,14 +105,14 @@
/* Make manual changes to get the paragraph features identical to our baseline paragraph, as documented
* in the Knuth-Plass article. These are all cases where the native hyphenation opportunies violate the
* "leave at least 2, and push at least 3" general rule. */
- paragraph.setParaNodeChild(2, new StringWord("olden")); // Native = old-en
- paragraph.setParaNodeChild(66, new StringWord("aston", "ished")); // Native = as-ton-ish-ed
- paragraph.setParaNodeChild(68, new StringWord("when", "ever")); // Native = when-ev-er
- paragraph.setParaNodeChild(88, new StringWord("castle")); // Native = cas-tle
- paragraph.setParaNodeChild(134, new StringWord("very")); // Native = ver-y
- paragraph.setParaNodeChild(148, new StringWord("into")); // Native = in-to
- paragraph.setParaNodeChild(190, new StringWord("golden")); // Native = gold-en
- paragraph.setParaNodeChild(222, new StringWord("favor", "ite")); // Native = fa-vor-ite
+ paragraph.setParaNodeChild(2, new StringWord(0, "olden")); // Native = old-en
+ paragraph.setParaNodeChild(66, new StringWord(0, "aston", "ished")); // Native = as-ton-ish-ed
+ paragraph.setParaNodeChild(68, new StringWord(0, "when", "ever")); // Native = when-ev-er
+ paragraph.setParaNodeChild(88, new StringWord(0, "castle")); // Native = cas-tle
+ paragraph.setParaNodeChild(134, new StringWord(0, "very")); // Native = ver-y
+ paragraph.setParaNodeChild(148, new StringWord(0, "into")); // Native = in-to
+ paragraph.setParaNodeChild(190, new StringWord(0, "golden")); // Native = gold-en
+ paragraph.setParaNodeChild(222, new StringWord(0, "favor", "ite")); // Native = fa-vor-ite
final LineBreakControl lbControl = Mockito.mock(LineBreakControl.class);
final OutputLine lineOutput = Mockito.mock(OutputLine.class);
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-02 18:06:47
|
Revision: 11980
http://sourceforge.net/p/foray/code/11980
Author: victormote
Date: 2021-11-02 18:06:44 +0000 (Tue, 02 Nov 2021)
Log Message:
-----------
Handle "regular" flags. Add more tests.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java
trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/PosUtilsTests.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java 2021-11-02 17:19:13 UTC (rev 11979)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java 2021-11-02 18:06:44 UTC (rev 11980)
@@ -49,23 +49,23 @@
/** Mask suitable for accumulating multiple parts of speech in one char. */
private static final char[] MASKS = new char[16];
static {
- MASKS[PartOfSpeech.NOUN.getNumericValue()] = 0x0001; // Index 0.
- MASKS[PartOfSpeech.PRONOUN.getNumericValue()] = 0x0002; // Index 1.
- MASKS[PartOfSpeech.VERB.getNumericValue()] = 0x0004; // Index 2.
- MASKS[PartOfSpeech.ADJECTIVE.getNumericValue()] = 0x0008; // Index 3.
- MASKS[PartOfSpeech.ADVERB.getNumericValue()] = 0x0010; // Index 4.
- MASKS[PartOfSpeech.PREPOSITION.getNumericValue()] = 0x0020; // Index 5.
- MASKS[PartOfSpeech.CONJUNCTION.getNumericValue()] = 0x0040; // Index 6.
- MASKS[PartOfSpeech.ARTICLE.getNumericValue()] = 0x0080; // Index 7.
- MASKS[PartOfSpeech.INTERJECTION.getNumericValue()] = 0x0100; // Index 8.
+ MASKS[PartOfSpeech.NOUN.getNumericValue()] = 0x0001; // Index 0. 1
+ MASKS[PartOfSpeech.PRONOUN.getNumericValue()] = 0x0002; // Index 1. 2
+ MASKS[PartOfSpeech.VERB.getNumericValue()] = 0x0004; // Index 2. 4
+ MASKS[PartOfSpeech.ADJECTIVE.getNumericValue()] = 0x0008; // Index 3. 8
+ MASKS[PartOfSpeech.ADVERB.getNumericValue()] = 0x0010; // Index 4. 16
+ MASKS[PartOfSpeech.PREPOSITION.getNumericValue()] = 0x0020; // Index 5. 32
+ MASKS[PartOfSpeech.CONJUNCTION.getNumericValue()] = 0x0040; // Index 6. 64
+ MASKS[PartOfSpeech.ARTICLE.getNumericValue()] = 0x0080; // Index 7. 128
+ MASKS[PartOfSpeech.INTERJECTION.getNumericValue()] = 0x0100; // Index 8. 256
/* Leave some room in the middle for exapnsion from either end. */
- MASKS[9] = 0x0200; // Index 9.
- MASKS[10] = 0x0400; // Index 10.
- MASKS[11] = 0x0800; // Index 11.
- MASKS[12] = 0x1000; // Index 12.
- MASKS[REGULAR_NOUN_INDEX] = 0x2000; // Index 13.
- MASKS[REGULAR_VERB_INDEX] = 0x4000; // Index 14.
- MASKS[REGULAR_ADJECTIVE_INDEX] = 0x8000; // Index 15.
+ MASKS[9] = 0x0200; // Index 9. 512
+ MASKS[10] = 0x0400; // Index 10. 1,024
+ MASKS[11] = 0x0800; // Index 11. 2,048
+ MASKS[12] = 0x1000; // Index 12. 4,096
+ MASKS[REGULAR_NOUN_INDEX] = 0x2000; // Index 13. 8,192
+ MASKS[REGULAR_VERB_INDEX] = 0x4000; // Index 14. 16,384
+ MASKS[REGULAR_ADJECTIVE_INDEX] = 0x8000; // Index 15. 32,768
/* */
}
@@ -88,19 +88,85 @@
return (char) (existing | mask);
}
+ /**
+ * Adds the "regular noun" flag to the existing flags.
+ * @param existing The existing flags.
+ * @return The new flags, consisting of all existing flags plus the one just added.
+ */
public static char encodeRegularNoun(final char existing) {
+ if (! PosUtils.isPartOfSpeech(existing, PartOfSpeech.NOUN)) {
+ throw new IllegalStateException("Cannot set \"regular noun\" flag unless already a noun.");
+ }
final char mask = MASKS[REGULAR_NOUN_INDEX];
return (char) (existing | mask);
}
+ /**
+ * Adds the "regular verb" flag to the existing flags.
+ * @param existing The existing flags.
+ * @return The new flags, consisting of all existing flags plus the one just added.
+ */
public static char encodeRegularVerb(final char existing) {
+ if (! PosUtils.isPartOfSpeech(existing, PartOfSpeech.VERB)) {
+ throw new IllegalStateException("Cannot set \"regular verb\" flag unless already a verb.");
+ }
final char mask = MASKS[REGULAR_VERB_INDEX];
return (char) (existing | mask);
}
+ /**
+ * Adds the "regular adjective" flag to the existing flags.
+ * @param existing The existing flags.
+ * @return The new flags, consisting of all existing flags plus the one just added.
+ */
public static char encodeRegularAdjective(final char existing) {
+ if (! PosUtils.isPartOfSpeech(existing, PartOfSpeech.ADJECTIVE)) {
+ throw new IllegalStateException("Cannot set \"regular adjective\" flag unless already an adjective.");
+ }
final char mask = MASKS[REGULAR_ADJECTIVE_INDEX];
return (char) (existing | mask);
}
+ /**
+ * Indicates whether a given part of speech flag is set.
+ * @param flags The flags being tested.
+ * @param pos The part of speech being tested for.
+ * @return True if and only if the flag is set for {@code pos}.
+ */
+ public static boolean isPartOfSpeech(final char flags, final PartOfSpeech pos) {
+ final int index = pos.getNumericValue();
+ final char mask = MASKS[index];
+ return (flags & mask) != 0;
+ }
+
+ /**
+ * Indicates whether the "regular noun" flag is set.
+ * @param flags The flags being tested.
+ * @return True if and only if the "regular noun" flag is set.
+ */
+ public static boolean isRegularNoun(final char flags) {
+ final char mask = MASKS[REGULAR_NOUN_INDEX];
+ return (flags & mask) != 0;
+ }
+
+ /**
+ * Indicates whether the "regular verb" flag is set.
+ * @param flags The flags being tested.
+ * @return True if and only if the "regular verb" flag is set.
+ */
+ public static boolean isRegularVerb(final char flags) {
+ final char mask = MASKS[REGULAR_VERB_INDEX];
+ return (flags & mask) != 0;
+ }
+
+ /**
+ * Indicates whether the "regular adjective" flag is set.
+ * @param flags The flags being tested.
+ * @return True if and only if the "regular adjective" flag is set.
+ */
+ public static boolean isRegularAdjective(final char flags) {
+ final char mask = MASKS[REGULAR_ADJECTIVE_INDEX];
+ return (flags & mask) != 0;
+ }
+
}
Modified: trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/PosUtilsTests.java
===================================================================
--- trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/PosUtilsTests.java 2021-11-02 17:19:13 UTC (rev 11979)
+++ trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/PosUtilsTests.java 2021-11-02 18:06:44 UTC (rev 11980)
@@ -69,17 +69,17 @@
/* Add 8192. */
running = PosUtils.encodeRegularNoun(running);
- Assert.assertEquals(511 + 8192, running);
+ Assert.assertEquals(511 + 8_192, running);
/* Add 16,384. */
running = PosUtils.encodeRegularVerb(running);
- Assert.assertEquals(511 + 8192 + 16384, running);
+ Assert.assertEquals(511 + 8_192 + 16_384, running);
/* Add 32,768. */
running = PosUtils.encodeRegularAdjective(running);
- Assert.assertEquals(511 + 8192 + 16384 + 32768, running);
+ Assert.assertEquals(511 + 8_192 + 16_384 + 32_768, running);
}
/**
- * Test of {@link PosUtils#encodePosInfo(char, org.axsl.hyphen.PartOfSpeech)} and related encoding methods.
+ * Test of {@link PosUtils#encodePosInfo(char, org.axsl.hyphen.PartOfSpeech)}.
*/
@Test
public void testOfOrInsteadOfAdd() {
@@ -91,4 +91,86 @@
Assert.assertEquals(256, running);
}
+ /**
+ * Test of {@link PosUtils#encodeRegularNoun(char)} and related, to ensure that an exception is thrown if the
+ * related part-of-speech flag is not set.
+ */
+ @Test
+ public void testOfIllegalStates() {
+ char flags = 0;
+ try {
+ flags = PosUtils.encodeRegularNoun(flags);
+ Assert.fail("Exception expected.");
+ } catch (final IllegalStateException e) {
+ /* This is the expected case. */
+ Assert.assertEquals("Cannot set \"regular noun\" flag unless already a noun.", e.getMessage());
+ }
+
+ flags = 0;
+ try {
+ flags = PosUtils.encodeRegularVerb(flags);
+ Assert.fail("Exception expected.");
+ } catch (final IllegalStateException e) {
+ /* This is the expected case. */
+ Assert.assertEquals("Cannot set \"regular verb\" flag unless already a verb.", e.getMessage());
+ }
+
+ flags = 0;
+ try {
+ flags = PosUtils.encodeRegularAdjective(flags);
+ Assert.fail("Exception expected.");
+ } catch (final IllegalStateException e) {
+ /* This is the expected case. */
+ Assert.assertEquals("Cannot set \"regular adjective\" flag unless already an adjective.", e.getMessage());
+ }
+
+ /* Now make sure the normal cases work correctly. */
+ flags = 0;
+ flags = PosUtils.encodePosInfo(flags, PartOfSpeech.NOUN);
+ Assert.assertEquals(1, flags);
+ flags = PosUtils.encodeRegularNoun(flags);
+ Assert.assertEquals(1 + 8_192, flags);
+ Assert.assertTrue(PosUtils.isPartOfSpeech(flags, PartOfSpeech.NOUN));
+ Assert.assertTrue(PosUtils.isRegularNoun(flags));
+
+ /* Now make sure the normal cases work correctly. */
+ flags = 0;
+ flags = PosUtils.encodePosInfo(flags, PartOfSpeech.VERB);
+ Assert.assertEquals(4, flags);
+ flags = PosUtils.encodeRegularVerb(flags);
+ Assert.assertEquals(4 + 16_384, flags);
+ Assert.assertTrue(PosUtils.isPartOfSpeech(flags, PartOfSpeech.VERB));
+ Assert.assertTrue(PosUtils.isRegularVerb(flags));
+
+ /* Now make sure the normal cases work correctly. */
+ flags = 0;
+ flags = PosUtils.encodePosInfo(flags, PartOfSpeech.ADJECTIVE);
+ Assert.assertEquals(8, flags);
+ flags = PosUtils.encodeRegularAdjective(flags);
+ Assert.assertEquals(8 + 32_768, flags);
+ Assert.assertTrue(PosUtils.isPartOfSpeech(flags, PartOfSpeech.ADJECTIVE));
+ Assert.assertTrue(PosUtils.isRegularAdjective(flags));
+ }
+
+ /**
+ * Test of {@link PosUtils#isPartOfSpeech(char, PartOfSpeech)} and related encoding methods.
+ */
+ @Test
+ public void testIsPartOfSpeech() {
+ char flags = 0;
+ flags = PosUtils.encodePosInfo(flags, PartOfSpeech.PREPOSITION);
+ Assert.assertFalse(PosUtils.isPartOfSpeech(flags, PartOfSpeech.NOUN));
+ Assert.assertFalse(PosUtils.isPartOfSpeech(flags, PartOfSpeech.PRONOUN));
+ Assert.assertFalse(PosUtils.isPartOfSpeech(flags, PartOfSpeech.VERB));
+ Assert.assertFalse(PosUtils.isPartOfSpeech(flags, PartOfSpeech.ADJECTIVE));
+ Assert.assertFalse(PosUtils.isPartOfSpeech(flags, PartOfSpeech.ADVERB));
+ Assert.assertTrue(PosUtils.isPartOfSpeech(flags, PartOfSpeech.PREPOSITION));
+ Assert.assertFalse(PosUtils.isPartOfSpeech(flags, PartOfSpeech.CONJUNCTION));
+ Assert.assertFalse(PosUtils.isPartOfSpeech(flags, PartOfSpeech.ARTICLE));
+ Assert.assertFalse(PosUtils.isPartOfSpeech(flags, PartOfSpeech.INTERJECTION));
+ Assert.assertFalse(PosUtils.isRegularNoun(flags));
+ Assert.assertFalse(PosUtils.isRegularVerb(flags));
+ Assert.assertFalse(PosUtils.isRegularAdjective(flags));
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-02 17:19:15
|
Revision: 11979
http://sourceforge.net/p/foray/code/11979
Author: victormote
Date: 2021-11-02 17:19:13 +0000 (Tue, 02 Nov 2021)
Log Message:
-----------
Progress on encoding part-of-speech data.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java
Added Paths:
-----------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java
trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/PosUtilsTests.java
Added: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java (rev 0)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java 2021-11-02 17:19:13 UTC (rev 11979)
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2021 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.hyphen;
+
+import org.axsl.hyphen.PartOfSpeech;
+
+/**
+ * Utilities related to {@link PartOfSpeech}.
+ */
+public final class PosUtils {
+
+ /* Checkstyle: Allow Magic Numbers that are hard-coded data. */
+
+ /** Index to the flag indicating whether the word is a regular noun. */
+ private static final int REGULAR_NOUN_INDEX = 13;
+
+ /** Index to the flag indicating whether the word is a regular verb. */
+ private static final int REGULAR_VERB_INDEX = 14;
+
+ /** Index to the flag indicating whether the word is a regular adjective. */
+ private static final int REGULAR_ADJECTIVE_INDEX = 15;
+
+ /** Mask suitable for accumulating multiple parts of speech in one char. */
+ private static final char[] MASKS = new char[16];
+ static {
+ MASKS[PartOfSpeech.NOUN.getNumericValue()] = 0x0001; // Index 0.
+ MASKS[PartOfSpeech.PRONOUN.getNumericValue()] = 0x0002; // Index 1.
+ MASKS[PartOfSpeech.VERB.getNumericValue()] = 0x0004; // Index 2.
+ MASKS[PartOfSpeech.ADJECTIVE.getNumericValue()] = 0x0008; // Index 3.
+ MASKS[PartOfSpeech.ADVERB.getNumericValue()] = 0x0010; // Index 4.
+ MASKS[PartOfSpeech.PREPOSITION.getNumericValue()] = 0x0020; // Index 5.
+ MASKS[PartOfSpeech.CONJUNCTION.getNumericValue()] = 0x0040; // Index 6.
+ MASKS[PartOfSpeech.ARTICLE.getNumericValue()] = 0x0080; // Index 7.
+ MASKS[PartOfSpeech.INTERJECTION.getNumericValue()] = 0x0100; // Index 8.
+ /* Leave some room in the middle for exapnsion from either end. */
+ MASKS[9] = 0x0200; // Index 9.
+ MASKS[10] = 0x0400; // Index 10.
+ MASKS[11] = 0x0800; // Index 11.
+ MASKS[12] = 0x1000; // Index 12.
+ MASKS[REGULAR_NOUN_INDEX] = 0x2000; // Index 13.
+ MASKS[REGULAR_VERB_INDEX] = 0x4000; // Index 14.
+ MASKS[REGULAR_ADJECTIVE_INDEX] = 0x8000; // Index 15.
+ /* */
+ }
+
+ /* Checkstyle: Restart Magic Number checking. */
+
+ /**
+ * Private constructor. This is a utility class, and should never be instantiated.
+ */
+ private PosUtils() { }
+
+ /**
+ * Adds the flag for a given part of speech to the existing flags.
+ * @param existing The existing flags.
+ * @param pos The part of speech to be added to existing flags.
+ * @return The new flags, consisting of all existing flags plus the one just added.
+ */
+ public static char encodePosInfo(final char existing, final PartOfSpeech pos) {
+ final int index = pos.getNumericValue();
+ final char mask = MASKS[index];
+ return (char) (existing | mask);
+ }
+
+ public static char encodeRegularNoun(final char existing) {
+ final char mask = MASKS[REGULAR_NOUN_INDEX];
+ return (char) (existing | mask);
+ }
+
+ public static char encodeRegularVerb(final char existing) {
+ final char mask = MASKS[REGULAR_VERB_INDEX];
+ return (char) (existing | mask);
+ }
+
+ public static char encodeRegularAdjective(final char existing) {
+ final char mask = MASKS[REGULAR_ADJECTIVE_INDEX];
+ return (char) (existing | mask);
+ }
+
+}
Property changes on: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/PosUtils.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java 2021-11-02 15:13:08 UTC (rev 11978)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java 2021-11-02 17:19:13 UTC (rev 11979)
@@ -26,11 +26,6 @@
* $LastChangedBy$
*/
-/*
- * Known contributors:
- * Carlos Villegas <ca...@un...> (original author)
- */
-
package org.foray.hyphen;
import org.foray.common.primitive.CharSequenceUtils;
Added: trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/PosUtilsTests.java
===================================================================
--- trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/PosUtilsTests.java (rev 0)
+++ trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/PosUtilsTests.java 2021-11-02 17:19:13 UTC (rev 11979)
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2016 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.hyphen;
+
+import org.axsl.hyphen.PartOfSpeech;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Tests of {@link PosUtils}.
+ */
+public class PosUtilsTests {
+
+ /**
+ * Test of {@link PosUtils#encodePosInfo(char, org.axsl.hyphen.PartOfSpeech)} and related encoding methods.
+ */
+ @Test
+ public void testEncoding() {
+ char running = 0;
+ running = PosUtils.encodePosInfo(running, PartOfSpeech.NOUN);
+ Assert.assertEquals(1, running);
+ running = PosUtils.encodePosInfo(running, PartOfSpeech.PRONOUN);
+ Assert.assertEquals(3, running);
+ running = PosUtils.encodePosInfo(running, PartOfSpeech.VERB);
+ Assert.assertEquals(7, running);
+ running = PosUtils.encodePosInfo(running, PartOfSpeech.ADJECTIVE);
+ Assert.assertEquals(15, running);
+ running = PosUtils.encodePosInfo(running, PartOfSpeech.ADVERB);
+ Assert.assertEquals(31, running);
+ running = PosUtils.encodePosInfo(running, PartOfSpeech.PREPOSITION);
+ Assert.assertEquals(63, running);
+ running = PosUtils.encodePosInfo(running, PartOfSpeech.CONJUNCTION);
+ Assert.assertEquals(127, running);
+ running = PosUtils.encodePosInfo(running, PartOfSpeech.ARTICLE);
+ Assert.assertEquals(255, running);
+ running = PosUtils.encodePosInfo(running, PartOfSpeech.INTERJECTION);
+ Assert.assertEquals(511, running);
+ /* Index 9, if used, would add 512. */
+ /* Index 10, if used, would add 1024. */
+ /* Index 11, if used, would add 2048. */
+ /* Index 12, if used, would add 4096. */
+
+ /* Add 8192. */
+ running = PosUtils.encodeRegularNoun(running);
+ Assert.assertEquals(511 + 8192, running);
+ /* Add 16,384. */
+ running = PosUtils.encodeRegularVerb(running);
+ Assert.assertEquals(511 + 8192 + 16384, running);
+ /* Add 32,768. */
+ running = PosUtils.encodeRegularAdjective(running);
+ Assert.assertEquals(511 + 8192 + 16384 + 32768, running);
+ }
+
+ /**
+ * Test of {@link PosUtils#encodePosInfo(char, org.axsl.hyphen.PartOfSpeech)} and related encoding methods.
+ */
+ @Test
+ public void testOfOrInsteadOfAdd() {
+ char running = 0;
+ running = PosUtils.encodePosInfo(running, PartOfSpeech.INTERJECTION);
+ Assert.assertEquals(256, running);
+ /* Deliberately set the same one again, which should not change the value. */
+ running = PosUtils.encodePosInfo(running, PartOfSpeech.INTERJECTION);
+ Assert.assertEquals(256, running);
+ }
+
+}
Property changes on: trunk/foray/foray-hyphen/src/test/java/org/foray/hyphen/PosUtilsTests.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-02 15:13:11
|
Revision: 11978
http://sourceforge.net/p/foray/code/11978
Author: victormote
Date: 2021-11-02 15:13:08 +0000 (Tue, 02 Nov 2021)
Log Message:
-----------
Conform to aXSL changes regarding Word implementations returning information about parts of speech and regularity.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SegmentDictionaryWord.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/WordWrapper.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SegmentDictionaryWord.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SegmentDictionaryWord.java 2021-11-02 12:24:23 UTC (rev 11977)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SegmentDictionaryWord.java 2021-11-02 15:13:08 UTC (rev 11978)
@@ -28,6 +28,8 @@
package org.foray.hyphen;
+import org.axsl.hyphen.PartOfSpeech;
+import org.axsl.hyphen.PosRegularity;
import org.axsl.hyphen.WordSegment;
/**
@@ -66,4 +68,10 @@
return this.dictionary.getWordSegment(dictionaryIndex);
}
+ @Override
+ public Boolean isOfType(final PartOfSpeech pos, final PosRegularity regularity) {
+ /* TODO: Implement this. */
+ return Boolean.FALSE;
+ }
+
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java 2021-11-02 12:24:23 UTC (rev 11977)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/StringWord.java 2021-11-02 15:13:08 UTC (rev 11978)
@@ -35,6 +35,9 @@
import org.foray.common.primitive.CharSequenceUtils;
+import org.axsl.hyphen.PartOfSpeech;
+import org.axsl.hyphen.PosRegularity;
+
/**
* A word implementation that wraps a set of {@link StringWordSegmentUtf16}, a thin wrapper around a {@link String}.
* Instances of this class are immutable.
@@ -97,4 +100,10 @@
return this.segments[segmentIndex];
}
+ @Override
+ public Boolean isOfType(final PartOfSpeech pos, final PosRegularity regularity) {
+ /* TODO: Implement this or document why not. */
+ return Boolean.FALSE;
+ }
+
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/WordWrapper.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/WordWrapper.java 2021-11-02 12:24:23 UTC (rev 11977)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/WordWrapper.java 2021-11-02 15:13:08 UTC (rev 11978)
@@ -28,6 +28,8 @@
package org.foray.hyphen;
+import org.axsl.hyphen.PartOfSpeech;
+import org.axsl.hyphen.PosRegularity;
import org.axsl.hyphen.Word;
import org.axsl.hyphen.WordSegment;
@@ -74,4 +76,9 @@
return this.wrappedWord.getWordSegment(segmentIndex);
}
+ @Override
+ public Boolean isOfType(final PartOfSpeech pos, final PosRegularity regularity) {
+ return this.wrappedWord.isOfType(pos, regularity);
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-02 12:24:25
|
Revision: 11977
http://sourceforge.net/p/foray/code/11977
Author: victormote
Date: 2021-11-02 12:24:23 +0000 (Tue, 02 Nov 2021)
Log Message:
-----------
Rough-in use of derivative logic.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SpellChecker.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java 2021-11-02 12:05:43 UTC (rev 11976)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java 2021-11-02 12:24:23 UTC (rev 11977)
@@ -28,6 +28,8 @@
package org.foray.hyphen;
+import org.axsl.hyphen.Word;
+
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
@@ -138,4 +140,14 @@
return inputWord.toString().replaceAll(this.match.pattern(), replace);
}
+ /**
+ * Indicates whether this rule applies to a given word.
+ * @param word The word to be tested.
+ * @return True if and only if this rule applies to {@code word}.
+ */
+ boolean doesRulyApply(final Word word) {
+ /* TODO: Complete this method after word knows how to tell its parts-of-speech. */
+ return false;
+ }
+
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java 2021-11-02 12:05:43 UTC (rev 11976)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/OrthographyConfig4a.java 2021-11-02 12:24:23 UTC (rev 11977)
@@ -265,7 +265,8 @@
/* 2. Check exact matches in standard dictionaries for the orthography. */
final Dictionary orthoDictionary = getDictionary();
- if (orthoDictionary.getWord(wordChars, null) != null) {
+ if (orthoDictionary != null
+ && orthoDictionary.getWord(wordChars, null) != null) {
return true;
}
@@ -274,23 +275,41 @@
return true;
}
- /* 4. Derivative matches in adhoc dictionaries. */
+ /* 4. Check derivative matches in adhoc dictionaries. */
if (adhocDictionaries != null) {
- for (int index = 0; index < adhocDictionaries.size(); index ++) {
- final Dictionary adhocDictionary = adhocDictionaries.get(index);
- if (adhocDictionary.getWord(wordChars, null) != null) {
+ for (int dictIndex = 0; dictIndex < adhocDictionaries.size(); dictIndex ++) {
+ final Dictionary adhocDictionary = adhocDictionaries.get(dictIndex);
+ if (isDerivativeFound(adhocDictionary, wordChars)) {
return true;
}
}
}
- /* 5. Derivative matches in standard dictionaries for the orthography. */
- if (orthoDictionary.getWord(wordChars, null) != null) {
- return true;
- }
+ /* 5. Check derivative matches in standard dictionaries for the orthography. */
+// if (orthoDictionary != null) {
+// return isDerivativeFound(orthoDictionary, wordChars);
+// }
/* Not found in any dictionary. */
return false;
}
+ private boolean isDerivativeFound(final Dictionary dictionary, final CharSequence wordChars) {
+ for (int listIndex = 0; listIndex < this.derivativeRuleListIds.size(); listIndex ++) {
+ final String ruleListKey = this.derivativeRuleListIds.get(listIndex);
+ final List<DerivativeRule> ruleList = this.server.getDerivativeRules(ruleListKey);
+ for (int ruleIndex = 0; ruleIndex < ruleList.size(); ruleIndex ++) {
+ final DerivativeRule rule = ruleList.get(ruleIndex);
+ final String root = rule.applyRule(wordChars).toString();
+ final Word word = dictionary.getWord(root, null);
+ if (word != null) {
+ if (rule.doesRulyApply(word)) {
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
}
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SpellChecker.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SpellChecker.java 2021-11-02 12:05:43 UTC (rev 11976)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SpellChecker.java 2021-11-02 12:24:23 UTC (rev 11977)
@@ -475,11 +475,7 @@
/* TODO: This is all eng-us-lat specific. Need to think about how plurals and other almost-the-same cases should
* be handled. */
- /* Is the word itself directly in the dictionary. */
Word dictWord = null;
-
- /* Starting here, we will change the word content in various ways, looking for a match. The changes are
- * cumulative. */
final StringBuilder builder = new StringBuilder(word);
/* If the first character is capitalized, convert to lowercase & check again. */
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <vic...@us...> - 2021-11-02 12:05:45
|
Revision: 11976
http://sourceforge.net/p/foray/code/11976
Author: victormote
Date: 2021-11-02 12:05:43 +0000 (Tue, 02 Nov 2021)
Log Message:
-----------
Allow a derivative rule to apply to any word.
Modified Paths:
--------------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java
Modified: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java 2021-11-01 21:05:42 UTC (rev 11975)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/DerivativeRule.java 2021-11-02 12:05:43 UTC (rev 11976)
@@ -45,6 +45,9 @@
*/
public enum Type {
+ /** Matching word can be of any type. */
+ ANY("any"),
+
/** Matching word must be a regular noun. */
REGULAR_NOUN("regular-noun"),
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|