|
From: <jer...@us...> - 2014-05-09 19:07:12
|
Revision: 8256
http://sourceforge.net/p/bigdata/code/8256
Author: jeremy_carroll
Date: 2014-05-09 19:07:09 +0000 (Fri, 09 May 2014)
Log Message:
-----------
Addressing trac 915 by documenting the current behavior and deprecating DefaultAnalyzerFactory and suggestion the use of ConfigurableAnalyzerFactory instead
Modified Paths:
--------------
branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java
branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java
Modified: branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java
===================================================================
--- branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java 2014-05-09 19:07:02 UTC (rev 8255)
+++ branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java 2014-05-09 19:07:09 UTC (rev 8256)
@@ -90,7 +90,7 @@
* Properties from {@link Options} apply to the factory.
* <p>
*
- * If there are no such properties at all then the property {@link Options#INCLUDE_DEFAULTS} is set to true,
+ * If there are no such properties at all then the property {@link Options#NATURAL_LANGUAGE_SUPPORT} is set to true,
* and the behavior of this class is the same as the legacy {@link DefaultAnalyzerFactory}.
* <p>
* Other properties, from {@link AnalyzerOptions} start with
@@ -117,7 +117,7 @@
* <dd>This suppresses the functionality, by treating every expression as a stop word.</dd>
* </dl>
* there are in addition the language specific analyzers that are included
- * by using the option {@link Options#INCLUDE_DEFAULTS}
+ * by using the option {@link Options#NATURAL_LANGUAGE_SUPPORT}
*
*
* @author jeremycarroll
@@ -265,18 +265,13 @@
*
*
*/
- String INCLUDE_DEFAULTS = ConfigurableAnalyzerFactory.class.getName() + ".includeDefaults";
+ String NATURAL_LANGUAGE_SUPPORT = ConfigurableAnalyzerFactory.class.getName() + ".includeDefaults";
/**
* This is the prefix to all properties configuring the individual analyzers.
*/
String ANALYZER = ConfigurableAnalyzerFactory.class.getName() + ".analyzer.";
-/**
- * If there is no configuration at all, then the defaults are included,
- * but any configuration at all totally replaces the defaults, unless
- * {@link #INCLUDE_DEFAULTS}
- * is explicitly set to true.
- */
- String DEFAULT_INCLUDE_DEFAULTS = "false";
+
+ String DEFAULT_NATURAL_LAMGUAGE_SUPPORT = "false";
}
/**
* Options understood by analyzers created by {@link ConfigurableAnalyzerFactory}.
@@ -810,7 +805,7 @@
while (en.hasMoreElements()) {
String prop = (String)en.nextElement();
- if (prop.equals(Options.INCLUDE_DEFAULTS)) continue;
+ if (prop.equals(Options.NATURAL_LANGUAGE_SUPPORT)) continue;
if (prop.startsWith(Options.ANALYZER)) {
String languageRangeAndProperty[] = prop.substring(Options.ANALYZER.length()).replaceAll("_","*").split("[.]");
if (languageRangeAndProperty.length == 2) {
@@ -838,7 +833,7 @@
protected Properties initProperties() {
final Properties parentProperties = fullTextIndex.getProperties();
Properties myProps;
- if (Boolean.getBoolean(parentProperties.getProperty(Options.INCLUDE_DEFAULTS, Options.DEFAULT_INCLUDE_DEFAULTS))) {
+ if (Boolean.getBoolean(parentProperties.getProperty(Options.NATURAL_LANGUAGE_SUPPORT, Options.DEFAULT_NATURAL_LAMGUAGE_SUPPORT))) {
myProps = defaultProperties();
} else {
myProps = new Properties();
Modified: branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java
===================================================================
--- branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java 2014-05-09 19:07:02 UTC (rev 8255)
+++ branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java 2014-05-09 19:07:09 UTC (rev 8256)
@@ -29,7 +29,6 @@
import java.util.Collections;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
@@ -52,11 +51,24 @@
import com.bigdata.btree.keys.KeyBuilder;
/**
+ * This is the default implementation but could be regarded as legacy since
+ * it fails to use the correct {@link Analyzer} for almost all languages (other than
+ * English). It uses the correct natural language analyzer for literals tagged with
+ * "por", "deu", "ger", "zho", "chi", "jpn", "kor", "ces", "cze", "dut", "nld", "gre", "ell",
+ * "fra", "fre", "rus" and "tha".
+ * This codes do not work if they are used with subtags, e.g. "ger-AT" is treated as English.
+ * No two letter code works correctly: note that the W3C and
+ * IETF recommend the use of the two letter forms instead of the three letter forms.
+ * <p>
* Default implementation registers a bunch of {@link Analyzer}s for various
* language codes and then serves the appropriate {@link Analyzer} based on
* the specified language code.
*
* @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @deprecated Using {@link ConfigurableAnalyzerFactory} with
+ * the {@link ConfigurableAnalyzerFactory.Options#NATURAL_LANGUAGE_SUPPORT}
+ * uses the appropriate natural language analyzers for the two letter codes
+ * and for tags which include sub-tags.
* @version $Id$
*/
public class DefaultAnalyzerFactory implements IAnalyzerFactory {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|