From: <jer...@us...> - 2014-05-09 19:07:12
|
Revision: 8256 http://sourceforge.net/p/bigdata/code/8256 Author: jeremy_carroll Date: 2014-05-09 19:07:09 +0000 (Fri, 09 May 2014) Log Message: ----------- Addressing trac 915 by documenting the current behavior and deprecating DefaultAnalyzerFactory and suggestion the use of ConfigurableAnalyzerFactory instead Modified Paths: -------------- branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java Modified: branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java =================================================================== --- branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java 2014-05-09 19:07:02 UTC (rev 8255) +++ branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/ConfigurableAnalyzerFactory.java 2014-05-09 19:07:09 UTC (rev 8256) @@ -90,7 +90,7 @@ * Properties from {@link Options} apply to the factory. * <p> * - * If there are no such properties at all then the property {@link Options#INCLUDE_DEFAULTS} is set to true, + * If there are no such properties at all then the property {@link Options#NATURAL_LANGUAGE_SUPPORT} is set to true, * and the behavior of this class is the same as the legacy {@link DefaultAnalyzerFactory}. * <p> * Other properties, from {@link AnalyzerOptions} start with @@ -117,7 +117,7 @@ * <dd>This suppresses the functionality, by treating every expression as a stop word.</dd> * </dl> * there are in addition the language specific analyzers that are included - * by using the option {@link Options#INCLUDE_DEFAULTS} + * by using the option {@link Options#NATURAL_LANGUAGE_SUPPORT} * * * @author jeremycarroll @@ -265,18 +265,13 @@ * * */ - String INCLUDE_DEFAULTS = ConfigurableAnalyzerFactory.class.getName() + ".includeDefaults"; + String NATURAL_LANGUAGE_SUPPORT = ConfigurableAnalyzerFactory.class.getName() + ".includeDefaults"; /** * This is the prefix to all properties configuring the individual analyzers. */ String ANALYZER = ConfigurableAnalyzerFactory.class.getName() + ".analyzer."; -/** - * If there is no configuration at all, then the defaults are included, - * but any configuration at all totally replaces the defaults, unless - * {@link #INCLUDE_DEFAULTS} - * is explicitly set to true. - */ - String DEFAULT_INCLUDE_DEFAULTS = "false"; + + String DEFAULT_NATURAL_LAMGUAGE_SUPPORT = "false"; } /** * Options understood by analyzers created by {@link ConfigurableAnalyzerFactory}. @@ -810,7 +805,7 @@ while (en.hasMoreElements()) { String prop = (String)en.nextElement(); - if (prop.equals(Options.INCLUDE_DEFAULTS)) continue; + if (prop.equals(Options.NATURAL_LANGUAGE_SUPPORT)) continue; if (prop.startsWith(Options.ANALYZER)) { String languageRangeAndProperty[] = prop.substring(Options.ANALYZER.length()).replaceAll("_","*").split("[.]"); if (languageRangeAndProperty.length == 2) { @@ -838,7 +833,7 @@ protected Properties initProperties() { final Properties parentProperties = fullTextIndex.getProperties(); Properties myProps; - if (Boolean.getBoolean(parentProperties.getProperty(Options.INCLUDE_DEFAULTS, Options.DEFAULT_INCLUDE_DEFAULTS))) { + if (Boolean.getBoolean(parentProperties.getProperty(Options.NATURAL_LANGUAGE_SUPPORT, Options.DEFAULT_NATURAL_LAMGUAGE_SUPPORT))) { myProps = defaultProperties(); } else { myProps = new Properties(); Modified: branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java =================================================================== --- branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java 2014-05-09 19:07:02 UTC (rev 8255) +++ branches/TEXT_ANALYZERS/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java 2014-05-09 19:07:09 UTC (rev 8256) @@ -29,7 +29,6 @@ import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.Locale; import java.util.Map; import java.util.Set; @@ -52,11 +51,24 @@ import com.bigdata.btree.keys.KeyBuilder; /** + * This is the default implementation but could be regarded as legacy since + * it fails to use the correct {@link Analyzer} for almost all languages (other than + * English). It uses the correct natural language analyzer for literals tagged with + * "por", "deu", "ger", "zho", "chi", "jpn", "kor", "ces", "cze", "dut", "nld", "gre", "ell", + * "fra", "fre", "rus" and "tha". + * This codes do not work if they are used with subtags, e.g. "ger-AT" is treated as English. + * No two letter code works correctly: note that the W3C and + * IETF recommend the use of the two letter forms instead of the three letter forms. + * <p> * Default implementation registers a bunch of {@link Analyzer}s for various * language codes and then serves the appropriate {@link Analyzer} based on * the specified language code. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @deprecated Using {@link ConfigurableAnalyzerFactory} with + * the {@link ConfigurableAnalyzerFactory.Options#NATURAL_LANGUAGE_SUPPORT} + * uses the appropriate natural language analyzers for the two letter codes + * and for tags which include sub-tags. * @version $Id$ */ public class DefaultAnalyzerFactory implements IAnalyzerFactory { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |