[FOray-commit] SF.net SVN: foray:[11905] trunk/foray
Modular XSL-FO Implementation for Java.
Status: Alpha
Brought to you by:
victormote
|
From: <vic...@us...> - 2021-10-18 22:27:04
|
Revision: 11905
http://sourceforge.net/p/foray/code/11905
Author: victormote
Date: 2021-10-18 22:27:01 +0000 (Mon, 18 Oct 2021)
Log Message:
-----------
Rough-in the start of a spell-checker class.
Modified Paths:
--------------
trunk/foray/foray-common/src/main/java/org/foray/common/primitive/ObjectUtils.java
trunk/foray/foray-common/src/main/java/org/foray/common/primitive/XmlUtils.java
Added Paths:
-----------
trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SpellChecker.java
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/primitive/ObjectUtils.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/primitive/ObjectUtils.java 2021-10-15 12:21:25 UTC (rev 11904)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/primitive/ObjectUtils.java 2021-10-18 22:27:01 UTC (rev 11905)
@@ -48,7 +48,7 @@
/**
* Adds a factor to an existing hash code computation.
* @param currentHash The value of the has before adding the new factor.
- * @param hashIncrement The value to be added to the existing hash. This represents the hash or other semi-uniqe
+ * @param hashIncrement The value to be added to the existing hash. This represents the hash or other semi-unique
* value of a significant component of the item being hashed.
* @return The new hash value.
* @see "Effective Java, Item 9"
@@ -59,4 +59,37 @@
return newHash;
}
+ /**
+ * Compares two objects for equality, first checking for null.
+ * @param object1 The first object to be tested.
+ * @param object2 The second object to be tested.
+ * @return True if 1) both objects are null, or 2) neither object is null and they are equal using
+ * {@link Object#equals(Object)}.
+ */
+ public static boolean safeEquals(final Object object1, final Object object2) {
+ if (object1 == null) {
+ if (object2 == null) {
+ return true;
+ } else {
+ return false;
+ }
+ } else {
+ if (object2 == null) {
+ return false;
+ } else {
+ return object1.equals(object2);
+ }
+ }
+ }
+
+ /**
+ * A null operation.
+ * This is useful (arguable) for the case where stub or incomplete code needs to avoid compiler or style warnings
+ * because an object is not used.
+ * @param object The object to be protected from the stigma of not being used.
+ */
+ public static void noOperation(final Object object) {
+ /* Does absolutely nothing, by design. */
+ }
+
}
Modified: trunk/foray/foray-common/src/main/java/org/foray/common/primitive/XmlUtils.java
===================================================================
--- trunk/foray/foray-common/src/main/java/org/foray/common/primitive/XmlUtils.java 2021-10-15 12:21:25 UTC (rev 11904)
+++ trunk/foray/foray-common/src/main/java/org/foray/common/primitive/XmlUtils.java 2021-10-18 22:27:01 UTC (rev 11905)
@@ -41,13 +41,17 @@
public final class XmlUtils {
/**
- * The namespace declaration to be assigned to the namespace definition
- * in XML documents.
+ * The namespace declaration to be assigned to the namespace definition in XML documents.
* @see "http://www.w3.org/2000/xmlns/"
*/
public static final String XMLNS_NAMESPACE_URI = "http://www.w3.org/2000/xmlns/";
/**
+ * The namespace declaration assigned to the reserved prefix "xml:".
+ */
+ public static final String XML_NAMESPACE_URI = "http://www.w3.org/XML/1998/namespace";
+
+ /**
* This class should never be instantiated.
*/
private XmlUtils() { }
Added: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SpellChecker.java
===================================================================
--- trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SpellChecker.java (rev 0)
+++ trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SpellChecker.java 2021-10-18 22:27:01 UTC (rev 11905)
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2021 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.hyphen;
+
+import org.foray.common.i18n.Country4a;
+import org.foray.common.i18n.Language4a;
+import org.foray.common.i18n.Orthography4a;
+import org.foray.common.i18n.Script4a;
+import org.foray.common.primitive.ObjectUtils;
+import org.foray.common.primitive.XmlUtils;
+
+import org.axsl.common.i18n.Orthography;
+import org.axsl.hyphen.HyphenationException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.Attributes;
+import org.xml.sax.InputSource;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXNotRecognizedException;
+import org.xml.sax.SAXNotSupportedException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.ext.DefaultHandler2;
+
+import java.io.BufferedInputStream;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
+
+/**
+ * Parses a document, looking for spelling errors.
+ * The specified natural language can change at any time, and must be tracked
+ * General plan is to capture all of the text data in one CharSequence, then parse and compare to dictionary entries.
+ */
+public class SpellChecker extends DefaultHandler2 {
+
+ private class Element {
+
+ /** The namespace URI, if specified. */
+ private String namespace;
+
+ /** The local name, if specified. */
+ private String localName;
+
+ /** The qualified name, if specified. */
+ private String qName;
+
+ /** The natural language specified in an xml:lang attribute, if specified. */
+// private Orthography orthography;
+
+ /**
+ * Checks whether a set of element descriptor items match this instance.
+ * @param namespace The namespace URI.
+ * @param localName The local name.
+ * @param qName The qualified name.
+ * @return True all element descriptor items match this instance.
+ */
+ boolean matches(final String namespace, final String localName, final String qName) {
+ boolean returnValue = true;
+ returnValue &= ObjectUtils.safeEquals(namespace, this.namespace);
+ returnValue &= ObjectUtils.safeEquals(localName, this.localName);
+ returnValue &= ObjectUtils.safeEquals(qName, this.qName);
+ return returnValue;
+ }
+ }
+
+ /** Command-line status constant indicating that the command line itself was not properly formed. */
+ public static final byte STATUS_COMMAND_LINE_ERROR = 1;
+
+ /** Command-line return status constant indicating that a file was not found. */
+ public static final byte STATUS_FILE_NOT_FOUND = 2;
+
+ /** Command-line return status constant indicating that there was an error parsing the input file. */
+ public static final byte STATUS_PARSING_ERROR = 3;
+
+ /** The input source to be pretty-printed. */
+ private InputSource input;
+
+ /** The output stream to which the pretty-printed output should be sent. */
+ private PrintStream output;
+
+ /** The locator instance for identifying the document, line, and column
+ * number of specific elements. */
+// private Locator locator;
+
+ /** The entity resolver to be used for resolving Dtd catalogs and other
+ * entities. */
+// private EntityResolver entityResolver;
+
+ /** A reusable buffer. */
+ private StringBuilder charBuffer = new StringBuilder();
+
+ /** The element stack. */
+// private Stack<Element> elementStack = new Stack<Element>();
+
+ /** The stack of orthographies. */
+// private Stack<Orthography> orthographyStack = new Stack<Orthography>();
+
+ /** The logger. */
+ private Logger logger = LoggerFactory.getLogger(SpellChecker.class);
+
+ /** The Hyphenation server. */
+// private HyphenationServer4a server;
+
+ /** The word-breaker instance. */
+ private WordBreaker wordBreaker;
+
+ /**
+ * Constructor.
+ * @param input The input source encapsulating the document to be spell-checked.
+ * @param output The output stream to which the spelling errors should be written.
+ * @throws HyphenationException
+ */
+ public SpellChecker(final InputSource input, final PrintStream output) throws HyphenationException {
+ this.input = input;
+ this.output = output;
+// this.server = new HyphenationServer4a(null);
+ this.wordBreaker = new WordBreakerLatin1();
+ }
+
+ public void start() throws SAXException, ParserConfigurationException, IOException {
+ final XMLReader parser = createParser();
+
+ /* Bind the LexicalHandler to the XMLReader if possible. */
+ try {
+ parser.setProperty("http://xml.org/sax/properties/lexical-handler", this);
+ } catch (final SAXNotSupportedException e1) {
+ this.logger.error("Parser does not support LexicalHandler.");
+ }
+
+ /* Bind the DeclHandler to the XMLReader if possible. */
+ try {
+ parser.setProperty("http://xml.org/sax/properties/declaration-handler", this);
+ } catch (final SAXNotSupportedException e) {
+ this.logger.error("Parser does not support Declaration Handler.");
+ }
+
+ /* Turn on namespace-prefixes so that we get the namespace declarations
+ * returned with other attributes and can therefore write them out
+ * along with them. */
+ try {
+ parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
+ } catch (final SAXNotRecognizedException e1) {
+ this.logger.error("Parser does not recognize the \"namespace-prefixes\" feature.");
+ } catch (final SAXNotSupportedException e1) {
+ this.logger.error("Parser unable to supply namespace-prefixes.");
+ }
+
+ /* Turn on validation if it is available. */
+ try {
+ parser.setFeature("http://xml.org/sax/features/validation", true);
+ } catch (final SAXNotRecognizedException e1) {
+ this.logger.error("Parser does not recognize the \"validation\" feature.");
+ } catch (final SAXNotSupportedException e1) {
+ this.logger.error("Parser unable to validate.");
+ }
+
+ /* Turn on "notify-char-refs" feature.
+ * Sadly, this only works with Xerces.
+ * This feature, or something like it is very important.
+ * Without it, character entities get transformed into characters
+ * without notification.
+ * When notified, we can (and do) ignore the transformed characters
+ * and use the character entities instead.
+ * We do NOT want to change the user's content. */
+ try {
+ parser.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
+ } catch (final SAXNotRecognizedException e) {
+ /* Make this a fatal error. */
+ this.logger.error("Parser cannot report character entities. Aborting.");
+ cleanup();
+ return;
+ } catch (final SAXNotSupportedException e) {
+ /* Make this a fatal error. */
+ this.logger.error("Parser cannot report character entities. Aborting.");
+ cleanup();
+ return;
+ }
+ parser.setContentHandler(this);
+ parser.parse(this.input);
+ cleanup();
+ }
+
+ /**
+ * Creates a SAX parser.
+ * @return The created SAX parser.
+ * @throws SAXException For error creating parser.
+ * @throws ParserConfigurationException For error configuring parser.
+ */
+ public XMLReader createParser() throws SAXException, ParserConfigurationException {
+ final SAXParserFactory spf = javax.xml.parsers.SAXParserFactory.newInstance();
+ spf.setNamespaceAware(true);
+ final XMLReader xmlReader = spf.newSAXParser().getXMLReader();
+// if (this.entityResolver != null) {
+// xmlReader.setEntityResolver(this.entityResolver);
+// }
+ return xmlReader;
+ }
+
+ /**
+ * Finalize the processing.
+ */
+ private void cleanup() {
+ this.output.close();
+ }
+
+ @Override
+ public void startDocument() throws SAXException {
+ }
+
+
+ @Override
+ public void endDocument() throws SAXException {
+ }
+
+
+ @Override
+ public void setDocumentLocator(final Locator locator) {
+// this.locator = locator;
+ }
+
+ @Override
+ public void startElement(final String uri, final String localName, final String qName, final Attributes attributes)
+ throws SAXException {
+ final Element element = new Element();
+ element.namespace = uri;
+ element.localName = localName;
+ element.qName = qName;
+ /* TODO: Remove nonsense operation below for incomplete code. */
+ ObjectUtils.noOperation(element.matches(qName, localName, qName));
+
+ String languageAttr = null;
+ languageAttr = attributes.getValue("xml:lang");
+ if (languageAttr == null) {
+ languageAttr = attributes.getValue(XmlUtils.XML_NAMESPACE_URI, "lang");
+ }
+
+ if (languageAttr != null) {
+ /* Is there a country code? */
+ final String[] languageAndCountry = languageAttr.split("-");
+ final String languageString = languageAndCountry[0];
+ String countryString = null;
+ if (languageAndCountry.length > 1) {
+ countryString = languageAndCountry[1];
+ }
+
+ final Language4a language = Language4a.findFromAlpha(languageString);
+ final Country4a country = Country4a.findFromAlpha(countryString);
+ /* TODO: The following should not be hard-coded this way.
+ * Instead, either specify in the XML document (i.e. as an attribute on the element itself), or implied by
+ * the text content. */
+ final Script4a script = Script4a.LATIN;
+
+ final Orthography orthography = Orthography4a.find(language, country, script);
+
+ /* TODO: Fix this. */
+ ObjectUtils.noOperation(orthography);
+// final OrthographyConfig config = this.server.getOrthographyConfig(orthography);
+ }
+ }
+
+
+ @Override
+ public void endElement(final String uri, final String localName, final String qName) throws SAXException {
+ final List<CharSequence> words = this.wordBreaker.breakIntoWords(this.charBuffer);
+ this.charBuffer.delete(0, this.charBuffer.length() - 1);
+ for (int index = 0; index < words.size(); index ++) {
+ if (index % 2 == 0) {
+ final CharSequence word = words.get(index);
+ this.output.println(word);
+ }
+ }
+ }
+
+
+ @Override
+ public void characters(final char[] buffer, final int offset, final int length) {
+ this.charBuffer.append(buffer, offset, length);
+ }
+
+ public static void main(final String[] args) {
+ final Logger logger = LoggerFactory.getLogger(SpellChecker.class);
+
+ final String input = args[0];
+ InputStream inputStream = null;
+ try {
+ FileInputStream fis = null;
+ fis = new FileInputStream(input);
+ inputStream = new BufferedInputStream(fis);
+ } catch (final FileNotFoundException e) {
+ logger.error("File cannot be opened for input: " + input, e);
+ /* CheckStyle: Allow System.exit() in main method. */
+ System.exit(SpellChecker.STATUS_FILE_NOT_FOUND);
+ }
+ final InputSource inputSource = new InputSource(inputStream);
+
+ final PrintStream output = System.out;
+ try {
+ final SpellChecker checker = new SpellChecker(inputSource, output);
+ checker.start();
+ } catch (final HyphenationException | IOException | SAXException | ParserConfigurationException e) {
+ /* CheckStyle: Allow System.exit() in main method. */
+ System.exit(SpellChecker.STATUS_PARSING_ERROR);
+ }
+
+ }
+
+}
Property changes on: trunk/foray/foray-hyphen/src/main/java/org/foray/hyphen/SpellChecker.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Rev
\ No newline at end of property
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|