Revision: 12678
http://sourceforge.net/p/foray/code/12678
Author: victormote
Date: 2022-06-21 16:24:03 +0000 (Tue, 21 Jun 2022)
Log Message:
-----------
Process text at the end of terminal elements, to avoid the appearance of continuity of words between them.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2022-06-21 13:44:29 UTC (rev 12677)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2022-06-21 16:24:03 UTC (rev 12678)
@@ -149,6 +149,9 @@
/** The list of elements having no content but that can be placed in the middle of a word. */
private List<String> elementIgnoreList = Arrays.asList(new String[] {"Page", "MendOut", "Comment", "ToDo"});
+ /** The list of elements that should never be straddled by a word. */
+ private List<String> elementTerminalList = Arrays.asList(new String[] {"Para", "Sidenote"});
+
/** Map whose key is elements that are actually tokens for text, and whose value is that text. */
private Map<String, String> textElementMap = new HashMap<String, String>();
{
@@ -248,8 +251,10 @@
final WritingSystem4a oldWritingSystem = getCurrentWritingSystem();
final WritingSystem4a newWritingSystem = parseWritingSystem(attributes);
element.writingSystem = newWritingSystem;
- checkWords(oldWritingSystem, newWritingSystem);
+ if (! ObjectUtils.safeEquals(oldWritingSystem, newWritingSystem)) {
+ checkWords(oldWritingSystem);
+ }
this.elementStack.push(element);
}
@@ -278,7 +283,15 @@
}
final WritingSystem4a newWritingSystem = getCurrentWritingSystem();
- checkWords(oldWritingSystem, newWritingSystem);
+
+ if (! ObjectUtils.safeEquals(oldWritingSystem, newWritingSystem)) {
+ checkWords(oldWritingSystem);
+ }
+
+ /* Whether the writing system has changed or not, if we are at the end of a terminal element, process text. */
+ if (this.elementTerminalList.contains(localName)) {
+ checkWords(oldWritingSystem);
+ }
}
@@ -285,20 +298,12 @@
/**
* Check for a change in writing system, and, if there is one, flush the text accumulator and spell-check the words
* in it.
- * @param oldWritingSystem The old writing system, which will be used to spell-check the accumulated text if the
- * writing system has changed.
- * @param newWritingSystem The new writing system.
+ * @param writingSystem The writing system to be used to spell-check the accumulated text.
*/
- private void checkWords(final WritingSystem4a oldWritingSystem, final WritingSystem4a newWritingSystem) {
- if (ObjectUtils.safeEquals(oldWritingSystem, newWritingSystem)) {
- /* Either both are null or they are equal. */
- return;
- }
+ private void checkWords(final WritingSystem4a writingSystem) {
+ final Orthography4a orthography = writingSystem == null ? null : this.server.getOrthography(writingSystem);
+ final Lexer lexer = orthography == null ? new LexerJavaBreakIterator(writingSystem) : orthography.getLexer();
- final Orthography4a orthography = oldWritingSystem == null ?
- null : this.server.getOrthography(oldWritingSystem);
- final Lexer lexer = orthography == null ? new LexerJavaBreakIterator(oldWritingSystem) : orthography.getLexer();
-
final List<CharSequence> words = lexer.tokenize(getAndClearText());
if (words == null) {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|