Revision: 12677
http://sourceforge.net/p/foray/code/12677
Author: victormote
Date: 2022-06-21 13:44:29 +0000 (Tue, 21 Jun 2022)
Log Message:
-----------
Check for change in writing-system at both startElement and endElement, and process text accordingly.
Modified Paths:
--------------
trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
Modified: trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java
===================================================================
--- trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2022-06-21 12:49:30 UTC (rev 12676)
+++ trunk/foray/foray-orthography/src/main/java/org/foray/orthography/util/SpellChecker.java 2022-06-21 13:44:29 UTC (rev 12677)
@@ -31,6 +31,7 @@
import org.foray.common.i18n.WritingSystem4a;
import org.foray.common.primitive.ObjectUtils;
import org.foray.common.primitive.XmlUtils;
+import org.foray.orthography.LexerJavaBreakIterator;
import org.foray.orthography.Orthography4a;
import org.foray.orthography.OrthographyServer4a;
import org.foray.orthography.OrthographyServerConfig;
@@ -39,9 +40,9 @@
import org.foray.xml.dtd.DtdAttribute;
import org.axsl.i18n.WritingSystem;
-import org.axsl.orthography.Orthography;
import org.axsl.orthography.OrthographyException;
import org.axsl.orthography.optional.Dictionary;
+import org.axsl.orthography.optional.Lexer;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
@@ -243,38 +244,15 @@
element.localName = localName;
element.qName = qName;
- parseOrthography(attributes, element);
+ /* Is there a change in writing system? */
+ final WritingSystem4a oldWritingSystem = getCurrentWritingSystem();
+ final WritingSystem4a newWritingSystem = parseWritingSystem(attributes);
+ element.writingSystem = newWritingSystem;
+ checkWords(oldWritingSystem, newWritingSystem);
+
this.elementStack.push(element);
}
- /**
- * For a given element and its attributes, parses the {@link WritingSystem} from them and then finds the
- * {@link Orthogrpahy} for that {@link WritingSystem}.
- * @param attributes The attributes that possibly contain language, country, and script data.
- * @param element The element to which the {@link Orthography} instance should be attached.
- */
- private void parseOrthography(final Attributes attributes, final Element element) {
- final String languageAttr = XML_LANG_ATTRIBUTE.getValue(attributes);
- if (languageAttr == null) {
- if (getCurrentWritingSystem() == null) {
- this.output.println("Orthography not specified. " + getLocationString(getLocator()));
- }
- return;
- }
-
- element.writingSystem = WritingSystem4a.find(languageAttr);
- if (element.writingSystem == null) {
- final String message = String.format("Writing system not found for: %s", languageAttr);
- this.output.println(message + getLocationString(getLocator()));
- } else {
- final Orthography4a config = this.server.getOrthography(element.writingSystem);
- if (config == null) {
- final String message = String.format("Unconfigured orthography: %s", languageAttr);
- this.output.println(message + getLocationString(getLocator()));
- }
- }
- }
-
@Override
public void endElement(final String uri, final String localName, final String qName) throws SAXException {
if (this.elementStack.size() < 1) {
@@ -292,33 +270,37 @@
return;
}
- final Element element = this.elementStack.peek();
+ final WritingSystem4a oldWritingSystem = this.getCurrentWritingSystem();
- List<CharSequence> words = null;
- if (getCurrentWritingSystem() != null) {
- final Orthography4a orthography = this.server.getOrthography(getCurrentWritingSystem());
- words = orthography.getLexer().tokenize(getAndClearText());
+ final Element element = this.elementStack.pop();
+ if (! element.matches(uri, localName, qName)) {
+ throw new SAXException("Closing element does not match top of stack.");
}
- final WritingSystem4a writingSystem = getCurrentWritingSystem();
- final Orthography4a orthography = this.server.getOrthography(writingSystem);
- checkWords(orthography, words);
-
- /* This element should match the top of the element stack. Pop it. */
- if (element.matches(uri, localName, qName)) {
- this.elementStack.pop();
- } else {
- throw new SAXException("Closing element does not match top of stack.");
- }
+ final WritingSystem4a newWritingSystem = getCurrentWritingSystem();
+ checkWords(oldWritingSystem, newWritingSystem);
}
/**
- * Spell-check each word in a sequence of words.
- * @param orthography The orthography to be used to spell-check {@code words}.
- * @param words The words to be checked.
+ * Check for a change in writing system, and, if there is one, flush the text accumulator and spell-check the words
+ * in it.
+ * @param oldWritingSystem The old writing system, which will be used to spell-check the accumulated text if the
+ * writing system has changed.
+ * @param newWritingSystem The new writing system.
*/
- private void checkWords(final Orthography4a orthography, final List<CharSequence> words) {
+ private void checkWords(final WritingSystem4a oldWritingSystem, final WritingSystem4a newWritingSystem) {
+ if (ObjectUtils.safeEquals(oldWritingSystem, newWritingSystem)) {
+ /* Either both are null or they are equal. */
+ return;
+ }
+
+ final Orthography4a orthography = oldWritingSystem == null ?
+ null : this.server.getOrthography(oldWritingSystem);
+ final Lexer lexer = orthography == null ? new LexerJavaBreakIterator(oldWritingSystem) : orthography.getLexer();
+
+ final List<CharSequence> words = lexer.tokenize(getAndClearText());
+
if (words == null) {
return;
}
@@ -358,6 +340,36 @@
}
/**
+ * For a given set of attributes, parses the {@link WritingSystem} from them and then checks the {@link Orthogrpahy}
+ * for that {@link WritingSystem}.
+ * @param attributes The attributes that possibly contain language, country, and script data.
+ * @return The writing system parsed from {@code attributes}.
+ */
+ private WritingSystem4a parseWritingSystem(final Attributes attributes) {
+ final String languageAttr = XML_LANG_ATTRIBUTE.getValue(attributes);
+ if (languageAttr == null) {
+ if (getCurrentWritingSystem() == null) {
+ this.output.println("Orthography not specified. " + getLocationString(getLocator()));
+ }
+ return null;
+ }
+
+ final WritingSystem4a writingSystem = WritingSystem4a.find(languageAttr);
+ if (writingSystem == null) {
+ final String message = String.format("Writing system not found for: %s", languageAttr);
+ this.output.println(message + getLocationString(getLocator()));
+ } else {
+ final Orthography4a config = this.server.getOrthography(writingSystem);
+ if (config == null) {
+ final String message = String.format("Unconfigured orthography: %s", languageAttr);
+ this.output.println(message + getLocationString(getLocator()));
+ }
+ }
+
+ return writingSystem;
+ }
+
+ /**
* Returns the command-line options for the {@link #main(String[])} method.
* @return Command-line options.
*/
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|