Thread: [Bigdata-commit] SF.net SVN: bigdata:[6245] branches/BIGDATA_RELEASE_1_2_0

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 6245
          http://bigdata.svn.sourceforge.net/bigdata/?rev=6245&view=rev
Author:   mrpersonick
Date:     2012-04-02 09:15:46 +0000 (Mon, 02 Apr 2012)
Log Message:
-----------
integrated the subject-centric full text index

Modified Paths:
--------------
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTSearchOptimizer.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/SearchServiceFactory.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/DefaultOptimizerList.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/lexicon/TestAll.java
    branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java

Added Paths:
-----------
    branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/lexicon/TestSubjectCentricFullTextIndex.java

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java
===================================================================

--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java	2012-04-01 17:36:23 UTC (rev 6244)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java	2012-04-02 09:15:46 UTC (rev 6245)
@@ -1,3 +1,4 @@
+
 /**
 
  Copyright (C) SYSTAP, LLC 2006-2007.  All rights reserved.
@@ -77,6 +78,7 @@
 import com.bigdata.journal.IIndexManager;
 import com.bigdata.journal.IResourceLock;
 import com.bigdata.journal.ITx;
+import com.bigdata.journal.NoSuchIndexException;
 import com.bigdata.journal.TimestampUtility;
 import com.bigdata.rawstore.Bytes;
 import com.bigdata.rdf.internal.IDatatypeURIResolver;
@@ -99,6 +101,7 @@
 import com.bigdata.rdf.model.BigdataValueFactoryImpl;
 import com.bigdata.rdf.model.BigdataValueSerializer;
 import com.bigdata.rdf.rio.StatementBuffer;
+import com.bigdata.rdf.spo.ISPO;
 import com.bigdata.rdf.store.AbstractTripleStore;
 import com.bigdata.rdf.vocab.NoVocabulary;
 import com.bigdata.rdf.vocab.Vocabulary;
@@ -141,6 +144,11 @@
     private final AtomicReference<IValueCentricTextIndexer<?>> viewRef = new AtomicReference<IValueCentricTextIndexer<?>>();
 
     /**
+     * A new one for the subject-centric full text index.
+     */
+    private final AtomicReference<ISubjectCentricTextIndexer<?>> viewRef2 = new AtomicReference<ISubjectCentricTextIndexer<?>>();
+
+    /**
      * Note: This is a stateless class.
      */
     private final BlobsIndexHelper h = new BlobsIndexHelper();
@@ -196,6 +204,32 @@
 
     }
     
+    @SuppressWarnings({ "unchecked", "rawtypes" })
+    protected Class<ISubjectCentricTextIndexer> determineSubjectCentricTextIndexerClass() {
+
+        final String className = getProperty(
+                AbstractTripleStore.Options.SUBJECT_CENTRIC_TEXT_INDEXER_CLASS,
+                AbstractTripleStore.Options.DEFAULT_SUBJECT_CENTRIC_TEXT_INDEXER_CLASS);
+        
+        final Class<?> cls;
+        try {
+            cls = Class.forName(className);
+        } catch (ClassNotFoundException e) {
+            throw new RuntimeException("Bad option: "
+                    + AbstractTripleStore.Options.SUBJECT_CENTRIC_TEXT_INDEXER_CLASS, e);
+        }
+
+        if (!ISubjectCentricTextIndexer.class.isAssignableFrom(cls)) {
+            throw new RuntimeException(
+                    AbstractTripleStore.Options.SUBJECT_CENTRIC_TEXT_INDEXER_CLASS
+                            + ": Must implement: "
+                            + ISubjectCentricTextIndexer.class.getName());
+        }
+
+        return (Class<ISubjectCentricTextIndexer>) cls;
+
+    }
+    
     @SuppressWarnings("unchecked")
     protected Class<IExtensionFactory> determineExtensionFactoryClass() {
 
@@ -283,6 +317,13 @@
 
             }
             
+            // just for now while I am testing, don't feel like rebuilding
+            // the entire journal
+            this.subjectCentricTextIndex = textIndex;
+//            this.subjectCentricTextIndex = Boolean.parseBoolean(getProperty(
+//                    AbstractTripleStore.Options.SUBJECT_CENTRIC_TEXT_INDEX,
+//                    AbstractTripleStore.Options.DEFAULT_SUBJECT_CENTRIC_TEXT_INDEXER_CLASS));
+         
         }
         
         this.storeBlankNodes = Boolean.parseBoolean(getProperty(
@@ -705,6 +746,13 @@
     private final boolean textIndex;
     
     /**
+     * When <code>true</code> a secondary subject-centric full text index is 
+     * maintained.
+     * 
+     * @see AbstractTripleStore.Options#SUBJECT_CENTRIC_TEXT_INDEX
+     */
+    private final boolean subjectCentricTextIndex;
+    /**
      * When <code>true</code> the kb is using told blank nodes semantics.
      * 
      * @see AbstractTripleStore.Options#STORE_BLANK_NODES
@@ -851,6 +899,17 @@
         
     }
 
+    /**
+     * <code>true</code> iff the subject-centric full text index is enabled.
+     * 
+     * @see AbstractTripleStore.Options#SUBJECT_CENTRIC_TEXT_INDEX
+     */
+    final public boolean isSubjectCentricTextIndex() {
+        
+        return subjectCentricTextIndex;
+        
+    }
+
 	/**
 	 * Overridden to use local cache of the index reference.
 	 */
@@ -1068,6 +1127,61 @@
     }
 
     /**
+     * A factory returning the softly held singleton for the
+     * {@link FullTextIndex} representing the subject-centric full text index.
+     * 
+     * @see AbstractTripleStore.Options#TEXT_INDEX
+     * 
+     * @todo replace with the use of the {@link IResourceLocator} since it
+     *       already imposes a canonicalizing mapping within for the index name
+     *       and timestamp inside of a JVM.
+     */
+    public ISubjectCentricTextIndexer<?> getSubjectCentricSearchEngine() {
+
+    	if (!subjectCentricTextIndex)
+            return null;
+
+        /*
+         * Note: Double-checked locking pattern requires [volatile] variable or
+         * AtomicReference. This uses the AtomicReference since that gives us a
+         * lock object which is specific to this request.
+         */
+        if (viewRef2.get() == null) {
+
+            synchronized (viewRef2) {// NB: Ignore find bugs complaint per above.
+
+                if (viewRef2.get() == null) {
+
+                    final ISubjectCentricTextIndexer<?> tmp;
+                    try {
+                        final Class<?> vfc = determineSubjectCentricTextIndexerClass();
+                        final Method gi = vfc.getMethod("getInstance",
+                                IIndexManager.class, String.class, Long.class,
+                                Properties.class);
+                        tmp = (ISubjectCentricTextIndexer<?>) gi.invoke(null/* object */,
+                                getIndexManager(), getNamespace(),
+                                getTimestamp(), getProperties());
+                        if(tmp instanceof ILocatableResource<?>) {
+                        	((ILocatableResource<?>)tmp).init();
+                        }
+                        viewRef2.set(tmp);
+                    } catch (Throwable e) {
+                        throw new IllegalArgumentException(
+                                AbstractTripleStore.Options.SUBJECT_CENTRIC_TEXT_INDEXER_CLASS,
+                                e);
+                    }
+
+                }
+
+            }
+
+        }
+
+        return viewRef2.get();
+
+    }
+
+    /**
      * Return the {@link IndexMetadata} for the TERM2ID index.
      * 
      * @param name
@@ -2002,6 +2116,299 @@
         
     }
 
+	/**
+	 * Utility method to (re-)build the subject-based full text index. This is a
+	 * high latency operation for a database of any significant size. You must
+	 * be using the unisolated view of the {@link AbstractTripleStore} for this
+	 * operation. {@link AbstractTripleStore.Options#TEXT_INDEX} must be
+	 * enabled. This operation is only supported when the {@link ITextIndexer}
+	 * uses the {@link FullTextIndex} class.
+	 * <p>
+	 * The subject-based full text index is one that rolls up normal
+	 * object-based full text index into a similarly structured index that
+	 * captures relevancy across subjects. Instead of
+	 * 
+	 * (t,s) => s.len, termWeight
+	 * 
+	 * Where s is the subject's IV. The term weight has the same
+	 * interpretation, but it is across all literals which are linked to that
+	 * subject and which contain the given token.  This index basically
+	 * pre-computes the (?s ?p ?o) join that sometimes follows the (?o
+	 * bd:search "xyz") request.
+	 * <p>
+	 * Truth Maintenance
+	 * <p>
+	 * We will need to perform truth maintenance on the subject-centric text
+	 * index, that is - the index will need to be updated as statements are
+	 * added and removed (to the extent that those statements involving a
+	 * literal in the object position).  Adding a statement is the easier
+	 * case because we will never need to remove entries from the index, we
+	 * can simply write over them with new relevance values.  All that is
+	 * involved with truth maintenance for adding a statement is taking a post-
+	 * commit snapshot of the subject in the statement and running it through
+	 * the indexer (a "subject-refresh").
+	 * <p>
+	 * The same "subject-refresh" will be necessary for truth maintenance for
+	 * removal, but an additional step will be necessary beforehand - the index
+	 * entries associated with the deleted subject/object (tokens+subject) will
+	 * need to be removed in case the token appears only in the removed literal.
+	 * After this pruning step the subject can be refreshed in the index exactly
+	 * the same as for truth maintenance on add.
+	 * <p>
+	 * It looks like the right place to hook in truth maintenance for add is
+	 * {@link AbstractTripleStore#addStatements(AbstractTripleStore, boolean, IChunkedOrderedIterator, com.bigdata.relation.accesspath.IElementFilter)}
+	 * after the ISPOs are added to the SPORelation.
+	 * Likewise, the place to hook in truth maintenance for delete is
+	 * {@link AbstractTripleStore#removeStatements(IChunkedOrderedIterator, boolean)} 
+	 * after the ISPOs are removed from the SPORelation.
+	 */
+    @SuppressWarnings("unchecked")
+    public void buildSubjectCentricTextIndex() {
+
+        if (getTimestamp() != ITx.UNISOLATED)
+            throw new UnsupportedOperationException();
+
+        if (!subjectCentricTextIndex)
+            throw new UnsupportedOperationException();
+
+        final ISubjectCentricTextIndexer<?> textIndexer = getSubjectCentricSearchEngine();
+
+        try {
+        
+	        // destroy the existing text index.
+	        textIndexer.destroy();
+	        
+        } catch (NoSuchIndexException ex) {
+        	
+        	if (log.isInfoEnabled())
+        		log.info("could not destroy subject-centric full text index, does not currently exist");
+        	
+        }
+
+        // create a new index.
+        textIndexer.create();
+
+        // TermIVs
+        {
+            // The index to scan for the individual subjects and their literal
+        	// values.
+            final IIndex spoNdx = getContainer().getSPORelation().getPrimaryIndex();
+            
+            /*
+             * For each S in SPO, collect up O values and pass this information
+             * to the subject-centric text indexer for indexing.
+             */
+            
+            // used to decode the
+            @SuppressWarnings("rawtypes")
+            final ITupleSerializer tupSer = spoNdx.getIndexMetadata()
+                    .getTupleSerializer();
+
+            /*
+             * Visit all plain, language code, and datatype literals in the
+             * object position of the primary statement index.
+             * 
+             * Note: This uses a filter on the ITupleIterator in order to filter
+             * out non-literal terms before they are shipped from a remote index
+             * shard.
+             */
+            final Iterator<ISPO> itr = new Striterator(
+            		spoNdx.rangeIterator(null/* fromKey */, null/* toKey */,
+                            0/* capacity */, IRangeQuery.DEFAULT,
+                            new TupleFilter<ISPO>() {
+                                private static final long serialVersionUID = 1L;
+
+                                protected boolean isValid(
+                                        final ITuple<ISPO> obj) {
+                                    final ISPO spo = (ISPO) tupSer
+                                            .deserializeKey(obj);
+                                    if (spo.o().isLiteral()) {
+                                        return true;
+                                    }
+                                    return false;
+                                }
+                            })).addFilter(new Resolver() {
+                private static final long serialVersionUID = 1L;
+
+                protected Object resolve(final Object obj) {
+                    final ISPO spo = (ISPO) tupSer
+                    	.deserializeKey((ITuple<?>) obj);
+                    return spo;
+                }
+            });
+            
+            /*
+             * Keep track of the current subject being indexed.
+             */
+            IV<?,?> s = null;
+            
+            /*
+             * Keep a collection of literals to be indexed for that subject.
+             */
+            final Collection<IV<?,?>> literals = new LinkedList<IV<?,?>>();
+            
+            long subjectCount = 0;
+            long statementCount = 0;
+            
+            final boolean l = log.isInfoEnabled();
+            
+            while (itr.hasNext()) {
+            	
+            	final ISPO spo = itr.next();
+            	
+            	if (!spo.s().equals(s)) {
+            	
+            		// flush the old s to the text index if != null
+            		
+            		if (s != null) {
+            			
+            			textIndexer.index(s, getTerms(literals).values().iterator());
+            			
+                        subjectCount++;
+                        statementCount += literals.size();
+                        
+                        if (l && subjectCount % 1000 == 0) {
+                        	log.info("indexed " + subjectCount + " subjects, " + statementCount + " statements");
+                        }
+                        
+            		}
+            		
+            		// set the current s and clear the literals
+            		
+            		s = spo.s();
+            		
+            		literals.clear();
+            		
+            	}
+            	
+            	literals.add(spo.o());
+            	
+            }
+            
+            if (s != null) {
+
+            	// flush the last subject
+            	textIndexer.index(s, getTerms(literals).values().iterator());
+            	
+                subjectCount++;
+                statementCount += literals.size();
+                
+            	if (log.isInfoEnabled()) {
+            		log.info("indexed " + subjectCount + " subjects, " + statementCount + " statements");
+            	}
+            	
+            }
+            
+        }
+
+    }
+    
+//    @SuppressWarnings("unchecked")
+//    public void refreshSubjectCentricTextIndex(final Set<IV<?,?>> subjects) {
+//
+//        if (getTimestamp() != ITx.UNISOLATED)
+//            throw new UnsupportedOperationException();
+//
+//        if (!subjectCentricTextIndex)
+//            throw new UnsupportedOperationException();
+//
+//        final ISubjectCentricTextIndexer<?> textIndexer = getSubjectCentricSearchEngine();
+//
+//        final AbstractTripleStore db = getContainer();
+//        
+//        /*
+//         * Keep a collection of literals to be indexed for each subject.
+//         */
+//        final Collection<IV<?,?>> literals = new LinkedList<IV<?,?>>();
+//        
+//        for (IV<?,?> s : subjects) {
+//        	
+//        	literals.clear();
+//        	
+//            /*
+//             * Visit all plain, language code, and datatype literals in the
+//             * object position of the primary statement index.
+//             * 
+//             * Note: This uses a filter on the ITupleIterator in order to filter
+//             * out non-literal terms before they are shipped from a remote index
+//             * shard.
+//             */
+//            final Iterator<ISPO> itr = db.getAccessPath(s, null, null, new SPOFilter<ISPO>() {
+//	                private static final long serialVersionUID = 1L;
+//					@Override
+//					public boolean isValid(Object e) {
+//						return ((ISPO)e).o().isLiteral();
+//					}
+//				}).iterator(); 
+//            	
+//            while (itr.hasNext()) {
+//            	
+//            	final ISPO spo = itr.next();
+//            	
+//            	literals.add(spo.o());
+//            	
+//            }
+//            
+//        	// flush the last subject
+//        	textIndexer.index(s, getTerms(literals).values().iterator());
+//            
+//        }
+//
+//    }
+//    
+//    @SuppressWarnings("unchecked")
+//    public void refreshSubjectCentricTextIndex(final Set<ISPO> removed) {
+//
+//        if (getTimestamp() != ITx.UNISOLATED)
+//            throw new UnsupportedOperationException();
+//
+//        if (!subjectCentricTextIndex)
+//            throw new UnsupportedOperationException();
+//
+//        final ISubjectCentricTextIndexer<?> textIndexer = getSubjectCentricSearchEngine();
+//
+//        final AbstractTripleStore db = getContainer();
+//        
+//        /*
+//         * Keep a collection of literals to be indexed for each subject.
+//         */
+//        final Collection<IV<?,?>> literals = new LinkedList<IV<?,?>>();
+//        
+//        for (ISPO spo : removed) {
+//        	
+//        	literals.clear();
+//        	
+//            /*
+//             * Visit all plain, language code, and datatype literals in the
+//             * object position of the primary statement index.
+//             * 
+//             * Note: This uses a filter on the ITupleIterator in order to filter
+//             * out non-literal terms before they are shipped from a remote index
+//             * shard.
+//             */
+//            final Iterator<ISPO> itr = db.getAccessPath(s, null, null, new SPOFilter<ISPO>() {
+//	                private static final long serialVersionUID = 1L;
+//					@Override
+//					public boolean isValid(Object e) {
+//						return ((ISPO)e).o().isLiteral();
+//					}
+//				}).iterator(); 
+//            	
+//            while (itr.hasNext()) {
+//            	
+//            	final ISPO spo = itr.next();
+//            	
+//            	literals.add(spo.o());
+//            	
+//            }
+//            
+//        	// flush the last subject
+////        	textIndexer.index(s, getTerms(literals).values().iterator());
+//        	
+//        }
+//
+//    }
+    
     /**
      * Batch resolution of internal values to {@link BigdataValue}s.
      * 

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTSearchOptimizer.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTSearchOptimizer.java	2012-04-01 17:36:23 UTC (rev 6244)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/ASTSearchOptimizer.java	2012-04-02 09:15:46 UTC (rev 6245)
@@ -104,6 +104,8 @@
         set.add(BD.MAX_RELEVANCE);
         set.add(BD.MIN_RELEVANCE);
         set.add(BD.MATCH_ALL_TERMS);
+        set.add(BD.SUBJECT_SEARCH);
+        set.add(BD.SEARCH_TIMEOUT);
         
         searchUris = Collections.unmodifiableSet(set);
         

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/SearchServiceFactory.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/SearchServiceFactory.java	2012-04-01 17:36:23 UTC (rev 6244)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/SearchServiceFactory.java	2012-04-02 09:15:46 UTC (rev 6245)
@@ -47,6 +47,7 @@
 import com.bigdata.bop.bindingSet.ListBindingSet;
 import com.bigdata.rdf.internal.constraints.RangeBOp;
 import com.bigdata.rdf.internal.impl.literal.XSDNumericIV;
+import com.bigdata.rdf.lexicon.ITextIndexer;
 import com.bigdata.rdf.lexicon.IValueCentricTextIndexer;
 import com.bigdata.rdf.sparql.ast.ConstantNode;
 import com.bigdata.rdf.sparql.ast.GroupNodeBase;
@@ -282,6 +283,14 @@
                 
                 assertObjectIsLiteral(sp);
                 
+            } else if(uri.equals(BD.SUBJECT_SEARCH)) {
+                
+                assertObjectIsLiteral(sp);
+                
+            } else if(uri.equals(BD.SEARCH_TIMEOUT)) {
+                
+                assertObjectIsLiteral(sp);
+                
             } else {
 
                 throw new AssertionError("Unverified search predicate: " + sp);
@@ -339,6 +348,8 @@
         private final Literal minRelevance;
         private final Literal maxRelevance;
         private final boolean matchAllTerms;
+        private final boolean subjectSearch;
+        private final Literal searchTimeout;
         
         public SearchCall(
                 final AbstractTripleStore store,
@@ -382,6 +393,8 @@
             Literal minRelevance = null;
             Literal maxRelevance = null;
             boolean matchAllTerms = false;
+            boolean subjectSearch = false;
+            Literal searchTimeout = null;
 
             for (StatementPatternNode meta : statementPatterns.values()) {
 
@@ -407,6 +420,10 @@
                     maxRelevance = (Literal) oVal;
                 } else if (BD.MATCH_ALL_TERMS.equals(p)) {
                     matchAllTerms = ((Literal) oVal).booleanValue();
+                } else if (BD.SUBJECT_SEARCH.equals(p)) {
+                    subjectSearch = ((Literal) oVal).booleanValue();
+                } else if (BD.SEARCH_TIMEOUT.equals(p)) {
+                    searchTimeout = (Literal) oVal;
                 }
             }
 
@@ -421,15 +438,22 @@
             this.minRelevance = minRelevance;
             this.maxRelevance = maxRelevance;
             this.matchAllTerms = matchAllTerms;
+            this.subjectSearch = subjectSearch;
+            this.searchTimeout = searchTimeout;
 
         }
 
         @SuppressWarnings({ "rawtypes", "unchecked" })
         private Hiterator<IHit<?>> getHiterator() {
 
-            final IValueCentricTextIndexer<IHit> textIndex = (IValueCentricTextIndexer) store
-                    .getLexiconRelation().getSearchEngine();
+//            final IValueCentricTextIndexer<IHit> textIndex = (IValueCentricTextIndexer) store
+//                    .getLexiconRelation().getSearchEngine();
             
+        	final ITextIndexer<IHit> textIndex = (ITextIndexer) 
+	    		(this.subjectSearch ?
+	    			store.getLexiconRelation().getSubjectCentricSearchEngine() :
+	    				store.getLexiconRelation().getSearchEngine());
+        	
             if (textIndex == null)
                 throw new UnsupportedOperationException("No free text index?");
 
@@ -450,7 +474,7 @@
                 minRank == null ? BD.DEFAULT_MIN_RANK/*1*/ : minRank.intValue()/* minRank */,
                 maxRank == null ? BD.DEFAULT_MAX_RANK/*Integer.MAX_VALUE*/ : maxRank.intValue()/* maxRank */,
                 matchAllTerms,
-                BD.DEFAULT_TIMEOUT/*0L*//* timeout */,
+                searchTimeout == null ? BD.DEFAULT_TIMEOUT/*0L*/ : searchTimeout.longValue()/* timeout */,
                 TimeUnit.MILLISECONDS);
         
         }
@@ -481,11 +505,11 @@
             if (bindingsClause.length == 1 && !bindingsClause[0].isEmpty()) {
 
                 /*
-                 * FIXME This case is not supported.  We need to propagate
-                 * the bindings into the search engine.
+                 * Fixed this by putting the ASTBindingAssigner before the
+                 * ASTSearchOptimizer in the DefaultOptimizerList. 
                  */
 
-                throw new UnsupportedOperationException();
+//                throw new UnsupportedOperationException();
 
             }
             

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/DefaultOptimizerList.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/DefaultOptimizerList.java	2012-04-01 17:36:23 UTC (rev 6244)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/DefaultOptimizerList.java	2012-04-02 09:15:46 UTC (rev 6245)
@@ -200,20 +200,6 @@
         add(new ASTEmptyGroupOptimizer());
         
         /**
-         * Translate {@link BD#SEARCH} and associated magic predicates into a a
-         * {@link ServiceNode}. If there are multiple searches in the query,
-         * then each is translated into its own {@link ServiceNode}. The magic
-         * predicates identify the bindings to be projected out of the named
-         * subquery (rank, cosine, etc).
-         * <p>
-         * Note: Search is most efficiently expressed within named subqueries.
-         * This let's you perform additional joins against the solutions from
-         * the search service before the results are materialized on a hash
-         * index to be joined into the main query.
-         */
-        add(new ASTSearchOptimizer());
-        
-        /**
          * Rewrites any {@link ProjectionNode} with a wild card into the set of
          * variables visible to the {@link QueryBase} having that projection.
          * This is done first for the {@link NamedSubqueriesNode} and then
@@ -249,6 +235,20 @@
         add(new ASTBindingAssigner());
 
         /**
+         * Translate {@link BD#SEARCH} and associated magic predicates into a a
+         * {@link ServiceNode}. If there are multiple searches in the query,
+         * then each is translated into its own {@link ServiceNode}. The magic
+         * predicates identify the bindings to be projected out of the named
+         * subquery (rank, cosine, etc).
+         * <p>
+         * Note: Search is most efficiently expressed within named subqueries.
+         * This let's you perform additional joins against the solutions from
+         * the search service before the results are materialized on a hash
+         * index to be joined into the main query.
+         */
+        add(new ASTSearchOptimizer());
+        
+        /**
          * Imposes a LIMIT of ONE for a non-aggregation ASK query.
          */
         add(new AskOptimizer());

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java	2012-04-01 17:36:23 UTC (rev 6244)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java	2012-04-02 09:15:46 UTC (rev 6245)
@@ -99,8 +99,10 @@
 import com.bigdata.rdf.internal.VTE;
 import com.bigdata.rdf.internal.impl.BlobIV;
 import com.bigdata.rdf.internal.impl.extensions.XSDStringExtension;
+import com.bigdata.rdf.lexicon.BigdataSubjectCentricFullTextIndex;
 import com.bigdata.rdf.lexicon.BigdataValueCentricFullTextIndex;
 import com.bigdata.rdf.lexicon.ITermIndexCodes;
+import com.bigdata.rdf.lexicon.ITextIndexer;
 import com.bigdata.rdf.lexicon.IValueCentricTextIndexer;
 import com.bigdata.rdf.lexicon.LexiconKeyOrder;
 import com.bigdata.rdf.lexicon.LexiconRelation;
@@ -862,6 +864,20 @@
 
         String DEFAULT_TEXT_INDEX = "true";
 
+        /**
+         * Boolean option (default <code>true</code>) enables support for a
+         * full text index that may be used to lookup literals by tokens found
+         * in the text of those literals.
+         * 
+         * @see #TEXT_INDEXER_CLASS
+         * @see #TEXT_INDEX_DATATYPE_LITERALS
+         * @see #INLINE_TEXT_LITERALS
+         * @see #MAX_INLINE_TEXT_LENGTH
+         */
+        String SUBJECT_CENTRIC_TEXT_INDEX = AbstractTripleStore.class.getName() + ".subjectCentricTextIndex";
+
+        String DEFAULT_SUBJECT_CENTRIC_TEXT_INDEX = "false";
+
 		/**
 		 * Boolean option enables support for a full text index that may be used
 		 * to lookup datatype literals by tokens found in the text of those
@@ -894,6 +910,25 @@
 		String DEFAULT_TEXT_INDEXER_CLASS = BigdataValueCentricFullTextIndex.class
 				.getName();
 
+        /**
+         * The name of the {@link ITextIndexer} class. The implementation MUST
+         * declare a method with the following signature which will be used to
+         * locate instances of that class.
+         * 
+         * <pre>
+         * static public ITextIndexer getInstance(final IIndexManager indexManager,
+         *             final String namespace, final Long timestamp,
+         *             final Properties properties)
+         * </pre>
+         * 
+         * @see #DEFAULT_TEXT_INDEXER_CLASS
+         */
+		String SUBJECT_CENTRIC_TEXT_INDEXER_CLASS = AbstractTripleStore.class.getName()
+				+ ".subjectCentricTextIndexerClass";
+
+		String DEFAULT_SUBJECT_CENTRIC_TEXT_INDEXER_CLASS = BigdataSubjectCentricFullTextIndex.class
+				.getName();
+
         /*
          * Inlining options.
          */

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java	2012-04-01 17:36:23 UTC (rev 6244)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java	2012-04-02 09:15:46 UTC (rev 6245)
@@ -284,6 +284,45 @@
     final boolean DEFAULT_PREFIX_MATCH = false;
     
     /**
+     * Magic predicate used to query for free text search metadata.  Use 
+     * in conjunction with {@link #SEARCH} as follows:
+     * <p>
+     * <pre>
+     * 
+     * select ?s
+     * where {
+     *   ?s bd:search &quot;scale-out RDF triplestore&quot; .
+     *   ?s bd:subjectSearch "true" .
+     * }
+     * 
+     * </pre>
+     * <p>
+     * The subject-centric search index must be enabled via
+     * {@link AbstractTripleStore.Options#SUBJECT_CENTRIC_TEXT_INDEX}.
+     */
+    final URI SUBJECT_SEARCH = new URIImpl(SEARCH_NAMESPACE+"subjectSearch");
+
+    final boolean DEFAULT_SUBJECT_SEARCH = false;
+    
+    /**
+     * Magic predicate used to query for free text search metadata.  Use 
+     * in conjunction with {@link #SEARCH} as follows:
+     * <p>
+     * <pre>
+     * 
+     * select ?s
+     * where {
+     *   ?s bd:search &quot;scale-out RDF triplestore&quot; .
+     *   ?s bd:searchTimeout "5000" .
+     * }
+     * 
+     * </pre>
+     * <p>
+     * Timeout specified in milliseconds.
+     */
+    final URI SEARCH_TIMEOUT = new URIImpl(SEARCH_NAMESPACE+"searchTimeout");
+    
+    /**
      * The default timeout for a free text search (milliseconds).
      */
     final long DEFAULT_TIMEOUT = Long.MAX_VALUE;

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/lexicon/TestAll.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/lexicon/TestAll.java	2012-04-01 17:36:23 UTC (rev 6244)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/lexicon/TestAll.java	2012-04-02 09:15:46 UTC (rev 6245)
@@ -108,6 +108,9 @@
         // test suite for access paths reading on the TERMS index.
         suite.addTestSuite(TestAccessPaths.class);
         
+        // test suite for subject-centric text index
+        suite.addTestSuite(TestSubjectCentricFullTextIndex.class);
+        
         return suite;
         
     }

Added: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/lexicon/TestSubjectCentricFullTextIndex.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/lexicon/TestSubjectCentricFullTextIndex.java	                        (rev 0)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/lexicon/TestSubjectCentricFullTextIndex.java	2012-04-02 09:15:46 UTC (rev 6245)
@@ -0,0 +1,541 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2007.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Dec 19, 2007
+ */
+
+package com.bigdata.rdf.lexicon;
+
+import java.util.Arrays;
+import java.util.Properties;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+
+import junit.framework.AssertionFailedError;
+
+import org.apache.log4j.Logger;
+import org.openrdf.model.impl.LiteralImpl;
+import org.openrdf.model.vocabulary.RDF;
+import org.openrdf.model.vocabulary.RDFS;
+import org.openrdf.model.vocabulary.XMLSchema;
+
+import com.bigdata.rdf.internal.IV;
+import com.bigdata.rdf.model.BigdataURI;
+import com.bigdata.rdf.model.BigdataValue;
+import com.bigdata.rdf.model.BigdataValueFactory;
+import com.bigdata.rdf.spo.TestSPOKeyOrder;
+import com.bigdata.rdf.store.AbstractTripleStore;
+import com.bigdata.rdf.store.AbstractTripleStoreTestCase;
+import com.bigdata.rdf.store.BD;
+import com.bigdata.rdf.store.BigdataValueIteratorImpl;
+import com.bigdata.search.Hit;
+import com.bigdata.search.Hiterator;
+import com.bigdata.striterator.ChunkedWrappedIterator;
+import com.bigdata.striterator.ICloseableIterator;
+import com.bigdata.striterator.Resolver;
+import com.bigdata.striterator.Striterator;
+
+/**
+ * Test of adding terms with the full text index enabled and of lookup of terms
+ * by tokens which appear within those terms.
+ * 
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @version $Id$
+ */
+public class TestSubjectCentricFullTextIndex extends AbstractTripleStoreTestCase {
+
+	private static final transient Logger log = Logger.getLogger(TestSubjectCentricFullTextIndex.class);
+	
+    /**
+     * 
+     */
+    public TestSubjectCentricFullTextIndex() {
+    }
+
+    /**
+     * @param name
+     */
+    public TestSubjectCentricFullTextIndex(String name) {
+        super(name);
+    }
+
+    public Properties getProperties() {
+
+        Properties properties = new Properties(super.getProperties());
+        
+        // enable the full text index.
+        properties.setProperty(AbstractTripleStore.Options.TEXT_INDEX,"true");
+        properties.setProperty(AbstractTripleStore.Options.SUBJECT_CENTRIC_TEXT_INDEX,"true");
+        
+        return properties;
+
+    }
+    
+//    /**
+//     * Test helper verifies that the term is not in the lexicon, adds the term
+//     * to the lexicon, verifies that the term can be looked up by its assigned
+//     * term identifier, verifies that the term is now in the lexicon, and
+//     * verifies that adding the term again returns the same term identifier.
+//     * 
+//     * @param term
+//     *            The term.
+//     */
+//    protected void doAddTermTest(final AbstractTripleStore store,
+//            final BigdataValue term) {
+//
+//        assertEquals(NULL, store.getIV(term));
+//
+//        final IV<?,?> id = store.addTerm(term);
+//
+//        assertNotSame(NULL, id);
+//
+//        assertEquals(id, store.getIV(term));
+//
+//        assertEquals(term, store.getTerm(id));
+//
+//        assertEquals(id, store.addTerm(term));
+//
+//    }
+
+    private void assertExpectedHits(final AbstractTripleStore store,
+            final String query, final String languageCode, 
+            final BigdataValue[] expected) {
+        
+        assertExpectedHits(store, query, languageCode, 0f/* minCosine */,
+                expected);
+
+    }
+
+    @SuppressWarnings("unchecked")
+    private void assertExpectedHits(final AbstractTripleStore store,
+            final String query, final String languageCode,
+            final float minCosine, final BigdataValue[] expected) {
+
+        final Hiterator hitr = store.getLexiconRelation().getSubjectCentricSearchEngine()
+                .search(query, languageCode, false/* prefixMatch */, 
+                		minCosine, 1.0d/* maxCosine */,
+                        1/* minRank */, Integer.MAX_VALUE/* maxRank */,
+                        false/* matchAllTerms */,
+                        Long.MAX_VALUE,//2L/* timeout */,
+                        TimeUnit.MILLISECONDS// TimeUnit.SECONDS
+                        );
+
+        // assertEquals("#hits", (long) expected.length, itr.size());
+
+        final ICloseableIterator<BigdataValue> itr2 = new BigdataValueIteratorImpl(
+                store, new ChunkedWrappedIterator<IV>(new Striterator(hitr)
+                        .addFilter(new Resolver() {
+                            private static final long serialVersionUID = 1L;
+
+                            @Override
+                            protected Object resolve(Object e) {
+                            	final Hit hit = (Hit) e;
+                            	if (log.isDebugEnabled()) {
+                            		log.debug(hit);
+                            	}
+                                return hit.getDocId();
+                            }
+                        })));
+
+        try {
+
+            TestSPOKeyOrder.assertSameIteratorAnyOrder(expected, itr2);
+
+        } catch (AssertionFailedError ex) {
+
+            fail("minCosine=" + minCosine + ", expected="
+                    + Arrays.toString(expected) + ", actual=" + hitr, ex);
+
+        } finally {
+
+            itr2.close();
+
+        }
+        
+    }
+
+    private LiteralImpl getLargeLiteral(final AbstractTripleStore store) {
+        
+        final int len = store.getLexiconRelation().getBlobsThreshold();
+
+        final StringBuilder sb = new StringBuilder(len);
+
+        final String[] tokens = new String[] {
+                "apple",
+                "mary",
+                "john",
+                "barley",
+                "mellow",
+                "pudding",
+                "fries",
+                "peal",
+                "gadzooks"
+        };
+        
+        for (int i = 0; sb.length() < len; i++) {
+
+            sb.append(tokens[(i % tokens.length)]);
+
+            sb.append(" ");
+
+        }
+
+        final String s = sb.toString();
+
+        if (log.isInfoEnabled())
+            log.info("length(s)=" + s.length());
+
+        return new LiteralImpl(s);
+    
+    }
+
+    public void test_SingleSubject() {
+        
+        AbstractTripleStore store = getStore();
+
+        try {
+
+            assertNotNull(store.getLexiconRelation().getSearchEngine());
+
+            final BigdataValueFactory f = store.getValueFactory();
+            
+            final BigdataURI s = f.createURI(BD.NAMESPACE+"s");
+            
+            final BigdataURI p = f.createURI(BD.NAMESPACE+"p");
+            
+            final LiteralImpl largeLiteral = getLargeLiteral(store);
+
+            final BigdataValue[] terms = new BigdataValue[] {//
+                    f.createLiteral("abc"),//
+                    f.createLiteral("abc", "en"),//
+                    f.createLiteral("good day", "en"),//
+                    f.createLiteral("gutten tag", "de"),//
+                    f.createLiteral("tag team", "en"),//
+                    f.createLiteral("the first day", "en"),// // 'the' is a stopword.
+
+                    f.createURI("http://www.bigdata.com"),//
+                    f.asValue(RDF.TYPE),//
+                    f.asValue(RDFS.SUBCLASSOF),//
+                    f.asValue(XMLSchema.DECIMAL),//
+
+                    f.createBNode(UUID.randomUUID().toString()),//
+                    f.createBNode("a12"),//
+                    
+                    f.asValue(largeLiteral),//
+
+            };
+
+            for (BigdataValue o : terms) {
+            	
+            	store.addStatement(s, p, o);
+            	
+            }
+            
+            store.commit();
+            
+            // build the subject-centric full text index.
+            store.getLexiconRelation().buildSubjectCentricTextIndex();
+            
+			if (log.isInfoEnabled()) {
+				log.info("\n"+store.dumpStore(true, false, false));
+			}
+
+            /*
+             * Note: the language code is only used when tokenizing literals. It
+             * IS NOT applied as a filter to the recovered literals.
+             */
+            
+            assertExpectedHits(store, "abc", null/* languageCode */,
+                    new BigdataValue[] {
+            			s
+                    });
+
+            assertExpectedHits(store, "tag", "en", new BigdataValue[] {//
+					s
+					});
+
+            assertExpectedHits(store, "tag", "de", new BigdataValue[] {//
+            		s
+                    });
+
+            assertExpectedHits(store, "GOOD DAY", "en", //
+                    .0f, // minCosine
+                    new BigdataValue[] {//
+            		s
+                    });
+
+            assertExpectedHits(store, "GOOD DAY", "en", //
+                    .0f, // minCosine
+                    new BigdataValue[] {//
+            		s
+                    });
+
+            assertExpectedHits(store, "day", "en", //
+                    .0f, // minCosine
+                    new BigdataValue[] {
+            		s
+                    });
+
+            // 'the' is a stopword, so there are no hits.
+            assertExpectedHits(store, "the", "en", new BigdataValue[] {});
+
+            // BLOB
+            assertExpectedHits(store, largeLiteral.getLabel(), null/*lang*/, //
+                    .0f, // minCosine
+                    new BigdataValue[] {
+                    s
+                    });
+
+            /*
+             * re-open the store before search to verify that the data were made
+             * restart safe.
+             */
+            if (store.isStable()) {
+
+                store.commit();
+
+                store = reopenStore(store);
+
+            }
+
+            // re-verify the full text index.
+            {
+
+                assertNotNull(store.getLexiconRelation().getSubjectCentricSearchEngine());
+                
+                assertExpectedHits(store, "abc", null/* languageCode */,
+                        new BigdataValue[] { //
+                		s
+                        });
+
+                assertExpectedHits(store, "tag", "en", new BigdataValue[] {//
+                		s
+                        });
+
+                assertExpectedHits(store, "tag", "de", new BigdataValue[] {//
+                		s
+                        });
+
+                assertExpectedHits(store, "GOOD DAY", "en", //
+                        .0f, // minCosine
+                        new BigdataValue[] {//
+                		s
+                        });
+
+                assertExpectedHits(store, "GOOD DAY", "en", //
+                        .0f, // minCosine
+                        new BigdataValue[] {//
+                		s
+                        });
+
+                assertExpectedHits(store, "day", "en", //
+                        .0f, // minCosine
+                        new BigdataValue[] {
+                		s
+                		});
+                
+                // BLOB
+                assertExpectedHits(store, largeLiteral.getLabel(), null/*lang*/, //
+                        .0f, // minCosine
+                        new BigdataValue[] {
+                        s
+                        });
+                
+            }
+            
+        } finally {
+
+            store.__tearDownUnitTest();
+
+        }
+
+    }
+    
+    public void test_MultiSubject() {
+        
+        AbstractTripleStore store = getStore();
+
+        try {
+
+            assertNotNull(store.getLexiconRelation().getSearchEngine());
+
+            final BigdataValueFactory f = store.getValueFactory();
+            
+            final BigdataURI s1 = f.createURI(BD.NAMESPACE+"s1");
+            
+            final BigdataURI s2 = f.createURI(BD.NAMESPACE+"s2");
+            
+            final BigdataURI s3 = f.createURI(BD.NAMESPACE+"s3");
+            
+            final BigdataURI p = f.createURI(BD.NAMESPACE+"p");
+            
+            final LiteralImpl largeLiteral = getLargeLiteral(store);
+
+            final BigdataValue[] terms = new BigdataValue[] {//
+                    f.createLiteral("abc"),//
+                    f.createLiteral("abc", "en"),//
+                    f.createLiteral("good day", "en"),//
+                    f.createLiteral("gutten tag", "de"),//
+                    f.createLiteral("tag team", "en"),//
+                    f.createLiteral("the first day", "en"),// // 'the' is a stopword.
+
+                    f.createURI("http://www.bigdata.com"),//
+                    f.asValue(RDF.TYPE),//
+                    f.asValue(RDFS.SUBCLASSOF),//
+                    f.asValue(XMLSchema.DECIMAL),//
+
+                    f.createBNode(UUID.randomUUID().toString()),//
+                    f.createBNode("a12"),//
+                    
+                    f.asValue(largeLiteral),//
+
+            };
+
+            for (BigdataValue o : terms) {
+            	
+            	store.addStatement(s1, p, o);
+            	
+            }
+            
+            for (int i = 0; i < 3; i++) {
+            	
+            	store.addStatement(s2, p, terms[i]);
+            	
+            }
+            
+            for (int i = 3; i < 6; i++) {
+            	
+            	store.addStatement(s3, p, terms[i]);
+            	
+            }
+            
+            store.commit();
+            
+            // build the subject-centric full text index.
+            store.getLexiconRelation().buildSubjectCentricTextIndex();
+            
+			if (log.isInfoEnabled()) {
+				log.info("\n"+store.dumpStore(true, false, false));
+			}
+
+            /*
+             * Note: the language code is only used when tokenizing literals. It
+             * IS NOT applied as a filter to the recovered literals.
+             */
+            
+            assertExpectedHits(store, "abc", null/* languageCode */,
+                    new BigdataValue[] {
+        			s1, s2
+                    });
+
+            assertExpectedHits(store, "tag", "en", new BigdataValue[] {//
+					s1, s3
+					});
+
+            assertExpectedHits(store, "tag", "de", new BigdataValue[] {//
+            		s1, s3
+                    });
+
+            assertExpectedHits(store, "GOOD DAY", "en", //
+                    .0f, // minCosine
+                    new BigdataValue[] {//
+            		s1, s2, s3
+                    });
+
+            assertExpectedHits(store, "day", "en", //
+                    .0f, // minCosine
+                    new BigdataValue[] {
+            		s1, s2, s3
+                    });
+
+            // 'the' is a stopword, so there are no hits.
+            assertExpectedHits(store, "the", "en", new BigdataValue[] {});
+
+            // BLOB
+            assertExpectedHits(store, largeLiteral.getLabel(), null/*lang*/, //
+                    .0f, // minCosine
+                    new BigdataValue[] {
+                    s1
+                    });
+
+            /*
+             * re-open the store before search to verify that the data were made
+             * restart safe.
+             */
+            if (store.isStable()) {
+
+                store.commit();
+
+                store = reopenStore(store);
+
+            }
+
+            // re-verify the full text index.
+            {
+
+                assertNotNull(store.getLexiconRelation().getSubjectCentricSearchEngine());
+                
+                assertExpectedHits(store, "abc", null/* languageCode */,
+                        new BigdataValue[] { //
+                		s1, s2
+                        });
+
+                assertExpectedHits(store, "tag", "en", new BigdataValue[] {//
+                		s1, s3
+                        });
+
+                assertExpectedHits(store, "tag", "de", new BigdataValue[] {//
+                		s1, s3
+                        });
+
+                assertExpectedHits(store, "GOOD DAY", "en", //
+                        .0f, // minCosine
+                        new BigdataValue[] {//
+                		s1, s2, s3
+                        });
+
+                assertExpectedHits(store, "day", "en", //
+                        .0f, // minCosine
+                        new BigdataValue[] {
+                		s1, s2, s3
+                		});
+                
+                // BLOB
+                assertExpectedHits(store, largeLiteral.getLabel(), null/*lang*/, //
+                        .0f, // minCosine
+                        new BigdataValue[] {
+                        s1
+                        });
+                
+            }
+            
+        } finally {
+
+            store.__tearDownUnitTest();
+
+        }
+
+    }
+    
+}

Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java
===================================================================
--- branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java	2012-04-01 17:36:23 UTC (rev 6244)
+++ branches/BIGDATA_RELEASE_1_2_0/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java	2012-04-02 09:15:46 UTC (rev 6245)
@@ -60,7 +60,9 @@
 
 import com.bigdata.journal.BufferMode;
 import com.bigdata.rdf.internal.IV;
+import com.bigdata.rdf.lexicon.ITextIndexer;
 import com.bigdata.rdf.lexicon.IValueCentricTextIndexer;
+import com.bigdata.rdf.model.BigdataURI;
 import com.bigdata.rdf.model.BigdataValue;
 import com.bigdata.rdf.sail.BigdataSail.Options;
 import com.bigdata.rdf.sparql.ast.eval.service.TestSearch;
@@ -87,21 +89,18 @@
         super(name);
     }
     
-//    /**
-//     * Overriden to use a persistent backing store.
-//     */
-//    public Properties getProperties() {
-//        
-//        Properties properties = super.getProperties();
-//        
-//        // use a disk-based mode since we will re-open the store to test restart safety.
-//        properties.setProperty(Options.BUFFER_MODE,BufferMode.Disk.toString());
-//
-//        properties.setProperty(Options.FILE,file.toString());
-//        
-//        return properties;
-//        
-//    }
+    /**
+     * Overriden to allow the subject-centric full text index.
+     */
+    public Properties getProperties() {
+        
+        Properties properties = super.getProperties();
+        
+        properties.setProperty(Options.SUBJECT_CENTRIC_TEXT_INDEX, "true");
+        
+        return properties;
+        
+    }
 
 //    /**
 //     * Overriden to cause the backing store to be deleted.
@@ -789,7 +788,7 @@
 
                 Collection<BindingSet> answer = new LinkedList<BindingSet>();
                 
-                final IValueCentricTextIndexer search = 
+                final ITextIndexer search = 
                 	sail.getDatabase().getLexiconRelation().getSearchEngine();
                 final Hiterator<IHit> hits = 
                 	search.search(searchQuery, 
@@ -1360,6 +1359,848 @@
         
     }
     
+    public void testSubjectSearch() throws Exception {
+        
+        final BigdataSail sail = getSail();
+        try {
+            
+        sail.initialize();
+        final BigdataSailRepository repo = new BigdataSailRepository(sail);
+        final BigdataSailRepositoryConnection cxn = 
+            (BigdataSailRepositoryConnection) repo.getConnection();
+        
+        try {
+            
+            cxn.setAutoCommit(false);
+
+            final ValueFactory vf = sail.getValueFactory();
+
+        	final URI s1 = vf.createURI(BD.NAMESPACE+"s1");
+        	final URI s2 = vf.createURI(BD.NAMESPACE+"s2");
+        	final URI s3 = vf.createURI(BD.NAMESPACE+"s3");
+        	final URI s4 = vf.createURI(BD.NAMESPACE+"s4");
+        	final URI s5 = vf.createURI(BD.NAMESPACE+"s5");
+        	final URI s6 = vf.createURI(BD.NAMESPACE+"s6");
+        	final URI s7 = vf.createURI(BD.NAMESPACE+"s7");
+        	final URI s8 = vf.createURI(BD.NAMESPACE+"s8");
+        	final Literal l1 = vf.createLiteral("how");
+        	final Literal l2 = vf.createLiteral("now");
+        	final Literal l3 = vf.createLiteral("brown");
+        	final Literal l4 = vf.createLiteral("cow");
+        	final Literal l5 = vf.createLiteral("how now");
+        	final Literal l6 = vf.createLiteral("brown cow");
+        	final Literal l7 = vf.createLiteral("how now brown cow");
+        	final Literal l8 = vf.createLiteral("toilet");
+        	
+            cxn.add(s1, RDFS.LABEL, l1);
+            cxn.add(s2, RDFS.LABEL, l2);
+            cxn.add(s3, RDFS.LABEL, l3);
+            cxn.add(s4, RDFS.LABEL, l4);
+            cxn.add(s5, RDFS.LABEL, l5);
+            cxn.add(s6, RDFS.LABEL, l6);
+            cxn.add(s7, RDFS.LABEL, l7);
+            cxn.add(s8, RDFS.LABEL, l8);
+            
+            /*
+             * Note: The either flush() or commit() is required to flush the
+             * statement buffers to the database before executing any operations
+             * that go around the sail.
+             */
+            cxn.commit();
+            
+            sail.getDatabase().getLexiconRelation().buildSubjectCentricTextIndex();
+            
+            final Map<IV, Literal> literals = new LinkedHashMap<IV, Literal>();
+            literals.put(((BigdataURI)s1).getIV(), l1);
+            literals.put(((BigdataURI)s2).getIV(), l2);
+            literals.put(((BigdataURI)s3).getIV(), l3);
+            literals.put(((BigdataURI)s4).getIV(), l4);
+            literals.put(((BigdataURI)s5).getIV(), l5);
+            literals.put(((BigdataURI)s6).getIV(), l6);
+            literals.put(((BigdataURI)s7).getIV(), l7);
+            literals.put(((BigdataURI)s8).getIV(), l8);
+            
+            final Map<IV, URI> uris = new LinkedHashMap<IV, URI>();
+            uris.put(((BigdataURI)s1).getIV(), s1);
+            uris.put(((BigdataURI)s2).getIV(), s2);
+            uris.put(((BigdataURI)s3).getIV(), s3);
+            uris.put(((BigdataURI)s4).getIV(), s4);
+            uris.put(((BigdataURI)s5).getIV(), s5);
+            uris.put(((BigdataURI)s6).getIV(), s6);
+            uris.put(((BigdataURI)s7).getIV(), s7);
+            uris.put(((BigdataURI)s8).getIV(), s8);
+            
+/**/            
+            if (log.isInfoEnabled()) {
+                log.info("\n" + sail.getDatabase().dumpStore());
+            }
+            
+            { 
+            	final String searchQuery = "how now brown cow";
+            	
+                final String query = 
+                    "select ?s ?score " + 
+                    "where " +
+                    "{ " +
+                    "    ?s <"+BD.SEARCH+"> \""+searchQuery+"\" . " +
+                    "    ?s <"+BD.RELEVANCE+"> ?score . " +
+                    "    ?s <"+BD.SUBJECT_SEARCH+"> true . " +
+                    "} " +
+                    "order by desc(?score)";
+                
+                final TupleQuery tupleQuery = 
+                    cxn.prepareTupleQuery(QueryLanguage.SPARQL, query);
+                tupleQuery.setIncludeInferred(true /* includeInferred */);
+                TupleQueryResult result = tupleQuery.evaluate();
+
+                int i = 0;
+                while (result.hasNext()) {
+                    final BindingSet tmp = result.next();
+                    if (log.isInfoEnabled())
+                        log.info(i + ": " + tmp.toString());
+                    i++;
+                }
+                assertEquals("wrong # of results", 7, i);
+                
+                result = tupleQuery.evaluate();
+
+                Collection<BindingSet> answer = new LinkedList<BindingSet>();
+                
+                final ITextIndexer search = 
+                	sail.getDatabase().getLexiconRelation().getSubjectCentricSearchEngine();
+                final Hiterator<IHit> hits = 
+                	search.search(searchQuery, 
+                            null, // languageCode
+                            BD.DEFAULT_PREFIX_MATCH,//false, // prefixMatch
+                            BD.DEFAULT_MIN_RELEVANCE,//0d, // minCosine
+                            BD.DEFAULT_MAX_RELEVANCE,//1.0d, // maxCosine
+                            BD.DEFAULT_MIN_RANK,//1
+                            BD.DEFAULT_MAX_RANK,//10000, // maxRank (=maxResults + 1)
+                            BD.DEFAULT_MATCH_ALL_TERMS,//false, // matchAllTerms
+                            BD.DEFAULT_TIMEOUT,//1000L, // timeout 
+                            TimeUnit.MILLISECONDS // unit
+                            );
+                
+                while (hits.hasNext()) {
+                	final IHit hit = hits.next();
+                	final IV id = (IV)hit.getDocId();
+                	final Literal score = vf.createLiteral(hit.getCosine());
+                	final URI s = uris.get(id);
+                    final BindingSet bs = createBindingSet(
+                    		new BindingImpl("s", s),
+                    		new BindingImpl("score", score));
+                	if(log.isInfoEnabled())
+                		log.info(bs);
+                    answer.add(bs);
+                }
+                
+                compare(result, answer);
+
+            }
+
+            { 
+            	final String searchQuery = "how now brown cow";
+            	final int maxHits = 5;
+            	
+                final String query = 
+                    "select ?s ?score " + 
+                    "where " +
+                    "{ " +
+                    "    ?s <"+BD.SEARCH+"> \""+searchQuery+"\" . " +
+                    "    ?s <"+BD.RELEVANCE+"> ?score . " +
+                    "    ?s <"+BD.MAX_RANK+"> \""+maxHits+"\" . " +
+                    "    ?s <"+BD.SUBJECT_SEARCH+"> true . " +
+                    "} " +
+                    "order by desc(?score)";
+                
+                final TupleQuery tupleQuery = 
+                    cxn.prepareTupleQuery(QueryLanguage.SPARQL, query);
+                tupleQuery.setIncludeInferred(true /* includeInferred */);
+                TupleQueryResult result = tupleQuery.evaluate();
+
+                int i = 0;
+                while (result.hasNext()) {
+                    final BindingSet tmp = result.next();
+                    if (log.isInfoEnabled())
+                        log.info(i + ": " + tmp.toString());
+                    i++;
+                }
+                assertEquals("wrong # of results", 5, i);
+                
+                result = tupleQuery.evaluate();
+
+                Collection<BindingSet> answer = new LinkedList<BindingSet>();
+                
+                final ITextIndexer search = 
+                	sail.getDatabase().getLexiconRelation().getSubjectCentricSearchEngine();
+                final Hiterator<IHit> hits = 
+                	search.search(searchQuery, 
+                            null, // languageCode
+                            BD.DEFAULT_PREFIX_MATCH,//false, // prefixMatch
+             ...
 
[truncated message content]

Thread: [Bigdata-commit] SF.net SVN: bigdata:[6245] branches/BIGDATA_RELEASE_1_2_0

Fast, scalable, robust graph database platform

bigdata-commit