[cweb-CVS] bigdata-rdf/src/java/com/bigdata/rdf KeyOrder.java, 1.1, 1.2 TripleStore.java, 1.26, 1.2

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf
In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv2483/src/java/com/bigdata/rdf

Modified Files:
	KeyOrder.java TripleStore.java 
Log Message:
Added a Sesame 1.x SAIL implementation.  This is NOT intended for production use.  It is just being done to gain a high-level query language integration for the triple store.

Index: KeyOrder.java
===================================================================
RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/KeyOrder.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** KeyOrder.java	26 Jan 2007 20:51:28 -0000	1.1
--- KeyOrder.java	12 Apr 2007 23:59:21 -0000	1.2
***************
*** 69,71 ****
--- 69,124 ----
      }
      
+     private static final long NULL = TripleStore.NULL;
+     
+     /**
+      * Return the access path that should be used for the triple pattern.
+      * 
+      * @param s
+      *            The optional subject identifier or {@link TripleStore#NULL}.
+      * @param p
+      *            The optional subject identifier or {@link TripleStore#NULL}.
+      * @param o
+      *            The optional subject identifier or {@link TripleStore#NULL}.
+      * 
+      * @return The KeyOrder that identifies the index to use for that triple
+      *         pattern.
+      */
+     public static KeyOrder getKeyOrder(long s, long p, long o) {
+ 
+         if (s != NULL && p != NULL && o != NULL) {
+ 
+             return SPO;
+ 
+         } else if (s != NULL && p != NULL) {
+ 
+             return SPO;
+ 
+         } else if (s != NULL && o != NULL) {
+ 
+             return OSP;
+ 
+         } else if (p != NULL && o != NULL) {
+ 
+             return POS;
+ 
+         } else if (s != NULL) {
+ 
+             return SPO;
+ 
+         } else if (p != NULL) {
+ 
+             return POS;
+ 
+         } else if (o != NULL) {
+ 
+             return OSP;
+ 
+         } else {
+ 
+             return SPO;
+ 
+         }
+ 
+     }
+     
  }

Index: TripleStore.java
===================================================================
RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/TripleStore.java,v
retrieving revision 1.26
retrieving revision 1.27
diff -C2 -d -r1.26 -r1.27
*** TripleStore.java	29 Mar 2007 17:01:47 -0000	1.26
--- TripleStore.java	12 Apr 2007 23:59:21 -0000	1.27
***************
*** 72,79 ****
--- 72,81 ----
  import com.bigdata.objndx.Errors;
  import com.bigdata.objndx.IBatchOp;
+ import com.bigdata.objndx.IEntryIterator;
  import com.bigdata.objndx.IIndex;
  import com.bigdata.objndx.ISimpleBTree;
  import com.bigdata.objndx.KeyBuilder;
  import com.bigdata.rawstore.Bytes;
+ import com.bigdata.rdf.inf.SPO;
  import com.bigdata.rdf.model.OptimizedValueFactory.OSPComparator;
  import com.bigdata.rdf.model.OptimizedValueFactory.POSComparator;
***************
*** 553,578 ****
      /**
       * Return true if the statement exists in the store (non-batch API).
       */
      public boolean containsStatement(Resource s, URI p, Value o) {
  
          long _s, _p, _o;
          
!         if( (_s = getTermId(s)) == 0L ) return false;
!         if( (_p = getTermId(p)) == 0L ) return false;
!         if( (_o = getTermId(o)) == 0L ) return false;
          
!         return getSPOIndex().contains(keyBuilder.statement2Key(_s, _p, _o));
          
      }
  
      /**
       * Adds the statements to each index (batch api).
!      * <p> 
       * Note: this is not sorting by the generated keys so the sort order may not
!      *       perfectly reflect the natural order of the index. however, i
!      *       suspect that it simply creates a few partitions out of the natural
!      *       index order based on the difference between signed and unsigned
!      *       interpretations of the termIds when logically combined into a
!      *       statement identifier.
       * 
       * @param stmts
--- 555,965 ----
      /**
       * Return true if the statement exists in the store (non-batch API).
+      * 
+      * @param s
+      *            Optional subject.
+      * @param p
+      *            Optional predicate.
+      * @param o
+      *            Optional object.
       */
      public boolean containsStatement(Resource s, URI p, Value o) {
  
          long _s, _p, _o;
+ 
+         _s = (s == null ? NULL : getTermId(s));
+         _p = (p == null ? NULL : getTermId(p));
+         _o = (o == null ? NULL : getTermId(o));
+ 
+         /*
+          * If a value was specified and it is not in the terms index then the
+          * statement can not exist in the KB.
+          */
+         if (_s == NULL && s != null)
+             return false;
+         if (_p == NULL && p != null)
+             return false;
+         if (_o == NULL && o != null)
+             return false;
          
!         /*
!          * if all bound, then a slight optimization.
!          */
!         if (_s != NULL && _p != NULL && _o != NULL) {
! 
!             return getSPOIndex().contains(keyBuilder.statement2Key(_s, _p, _o));
!             
!         }
          
!         /*
!          * Choose the access path and test to see if any statements would be
!          * visited for that triple pattern.
!          */
!         return rangeQuery(_s,_p,_o).hasNext();
!         
!     }
! 
!     /**
!      * Return a range query iterator that will visit the statements matching the
!      * triple pattern using the best access path given the triple pattern.
!      * 
!      * @param s
!      *            An optional term identifier for the subject role or
!      *            {@link #NULL}.
!      * @param p
!      *            An optional term identifier for the predicate role or
!      *            {@link #NULL}.
!      * @param o
!      *            An optional term identifier for the object role or
!      *            {@link #NULL}.
!      * 
!      * @return The range query iterator.
!      * 
!      * @todo write tests.
!      */
!     public IEntryIterator rangeQuery(long s, long p, long o) {
! 
!         if (s != NULL && p != NULL && o != NULL) {
! 
!             byte[] fromKey = keyBuilder.statement2Key(s, p, o);
! 
!             byte[] toKey = keyBuilder.statement2Key(s, p, o + 1);
! 
!             return getSPOIndex().rangeIterator(fromKey, toKey);
! 
!         } else if (s != NULL && p != NULL) {
! 
!             byte[] fromKey = keyBuilder.statement2Key(s, p, NULL);
! 
!             byte[] toKey = keyBuilder.statement2Key(s, p + 1, NULL);
! 
!             return getSPOIndex().rangeIterator(fromKey, toKey);
! 
!         } else if (s != NULL && o != NULL) {
! 
!             byte[] fromKey = keyBuilder.statement2Key(o, s, NULL);
! 
!             byte[] toKey = keyBuilder.statement2Key(o, s + 1, NULL);
! 
!             return getOSPIndex().rangeIterator(fromKey, toKey);
! 
!         } else if (p != NULL && o != NULL) {
! 
!             byte[] fromKey = keyBuilder.statement2Key(p, o, NULL);
! 
!             byte[] toKey = keyBuilder.statement2Key(p, o + 1, NULL);
! 
!             return getPOSIndex().rangeIterator(fromKey, toKey);
! 
!         } else if (s != NULL) {
! 
!             byte[] fromKey = keyBuilder.statement2Key(s, NULL, NULL);
! 
!             byte[] toKey = keyBuilder.statement2Key(s + 1, NULL, NULL);
! 
!             return getSPOIndex().rangeIterator(fromKey, toKey);
! 
!         } else if (p != NULL) {
! 
!             byte[] fromKey = keyBuilder.statement2Key(p, NULL, NULL);
! 
!             byte[] toKey = keyBuilder.statement2Key(p + 1, NULL, NULL);
! 
!             return getPOSIndex().rangeIterator(fromKey, toKey);
! 
!         } else if (o != NULL) {
! 
!             byte[] fromKey = keyBuilder.statement2Key(o, NULL, NULL);
! 
!             byte[] toKey = keyBuilder.statement2Key(o + 1, NULL, NULL);
! 
!             return getOSPIndex().rangeIterator(fromKey, toKey);
! 
!         } else {
! 
!             return getSPOIndex().rangeIterator(null, null);
! 
!         }
! 
!     }
!     
!     /**
!      * Return the #of statements matching the triple pattern using the best
!      * access path given the triple pattern (the count will be approximate if
!      * partitioned indices are being used).
!      * 
!      * @param s
!      *            An optional term identifier for the subject role or
!      *            {@link #NULL}.
!      * @param p
!      *            An optional term identifier for the predicate role or
!      *            {@link #NULL}.
!      * @param o
!      *            An optional term identifier for the object role or
!      *            {@link #NULL}.
!      * 
!      * @return The range count.
!      * 
!      * @todo write tests.
!      */
!     public int rangeCount(long s, long p, long o) {
! 
!         if (s != NULL && p != NULL && o != NULL) {
! 
!             byte[] fromKey = keyBuilder.statement2Key(s, p, o);
! 
!             byte[] toKey = keyBuilder.statement2Key(s, p, o + 1);
! 
!             return getSPOIndex().rangeCount(fromKey, toKey);
! 
!         } else if (s != NULL && p != NULL) {
! 
!             byte[] fromKey = keyBuilder.statement2Key(s, p, NULL);
! 
!             byte[] toKey = keyBuilder.statement2Key(s, p + 1, NULL);
! 
!             return getSPOIndex().rangeCount(fromKey, toKey);
! 
!         } else if (s != NULL && o != NULL) {
! 
!             byte[] fromKey = keyBuilder.statement2Key(o, s, NULL);
! 
!             byte[] toKey = keyBuilder.statement2Key(o, s + 1, NULL);
! 
!             return getOSPIndex().rangeCount(fromKey, toKey);
! 
!         } else if (p != NULL && o != NULL) {
! 
!             byte[] fromKey = keyBuilder.statement2Key(p, o, NULL);
! 
!             byte[] toKey = keyBuilder.statement2Key(p, o + 1, NULL);
! 
!             return getPOSIndex().rangeCount(fromKey, toKey);
! 
!         } else if (s != NULL) {
! 
!             byte[] fromKey = keyBuilder.statement2Key(s, NULL, NULL);
! 
!             byte[] toKey = keyBuilder.statement2Key(s + 1, NULL, NULL);
! 
!             return getSPOIndex().rangeCount(fromKey, toKey);
! 
!         } else if (p != NULL) {
! 
!             byte[] fromKey = keyBuilder.statement2Key(p, NULL, NULL);
! 
!             byte[] toKey = keyBuilder.statement2Key(p + 1, NULL, NULL);
! 
!             return getPOSIndex().rangeCount(fromKey, toKey);
! 
!         } else if (o != NULL) {
! 
!             byte[] fromKey = keyBuilder.statement2Key(o, NULL, NULL);
! 
!             byte[] toKey = keyBuilder.statement2Key(o + 1, NULL, NULL);
! 
!             return getOSPIndex().rangeCount(fromKey, toKey);
! 
!         } else {
! 
!             return getSPOIndex().rangeCount(null, null);
! 
!         }
! 
!     }
!     
!     /**
!      * Removes statements matching the triple pattern.
!      * 
!      * @param s
!      * @param p
!      * @param o
!      * 
!      * @return The #of statements removed.
!      * 
!      * @todo write tests.
!      */
!     public int removeStatements(Resource s,URI p,Value o) {
! 
!         /*
!          * convert our object types to internal identifiers.
!          */
!         long _s, _p, _o;
! 
!         _s = (s == null ? NULL : getTermId(s));
!         _p = (p == null ? NULL : getTermId(p));
!         _o = (o == null ? NULL : getTermId(o));
! 
!         /*
!          * If a value was specified and it is not in the terms index then the
!          * statement can not exist in the KB.
!          */
!         if (_s == NULL && s != null) {
! 
!             return 0;
!             
!         }
!         
!         if (_p == NULL && p != null) {
!         
!             return 0;
!             
!         }
!         
!         if (_o == NULL && o != null) {
!             
!             return 0;
!             
!         }
!     
!         return removeStatements(_s,_p,_o);
          
      }
+     
+     /**
+      * Remove statements matching the triple pattern.
+      * <p>
+      * Since the indices do not support modification with concurrent traversal
+      * the statements are materialized before they are deleted.
+      * 
+      * @param _s
+      * @param _p
+      * @param _o
+      * 
+      * @return The #of statements removed.
+      * 
+      * @todo the {@link #keyBuilder} is, which means that this is NOT thread
+      *       safe.
+      * 
+      * @todo this is not using the batch btree api.
+      * 
+      * @todo write tests.
+      */
+     public int removeStatements(long _s, long _p, long _o) {
+     
+         /*
+          * if all bound, then a slight optimization.
+          */
+         if (_s != NULL && _p != NULL && _o != NULL) {
+ 
+             byte[] key = keyBuilder.statement2Key(_s, _p, _o);
+ 
+             if (getSPOIndex().contains(key)) {
+ 
+                 getSPOIndex().remove(key);
+ 
+                 return 1;
+                 
+             } else {
+                 
+                 return 0;
+                 
+             }
+             
+         }
+         
+         /*
+          * Choose the access path, count the #of statements that match the
+          * triple pattern, and the materalize those statements (since traversal
+          * with concurrent modification is not supported).
+          */
+         
+         KeyOrder keyOrder = KeyOrder.getKeyOrder(_s, _p, _o);
+         
+         // #of matching statements.
+         int rangeCount = rangeCount(_s, _p, _o);
+         
+         SPO[] stmts = new SPO[rangeCount];
+ 
+         // materialize the matching statements.
+         {
+             IEntryIterator itr1 = rangeQuery(_s, _p, _o);
+ 
+             int i = 0;
+ 
+             while (itr1.hasNext()) {
+ 
+                 itr1.next();
+ 
+                 stmts[i++] = new SPO(keyOrder, keyBuilder, itr1.getKey());
+ 
+             }
+ 
+             assert i == rangeCount;
+         }
+ 
+         /*
+          * Remove the statements from each of the access paths.
+          */
+         {
+ 
+             {
+                 IIndex ndx = getSPOIndex();
+ 
+                 // Place statements in SPO order.
+                 Arrays.sort(stmts, com.bigdata.rdf.inf.SPOComparator.INSTANCE);
+ 
+                 // remove statements from SPO index.
+                 for (int i = 0; i < stmts.length; i++) {
+ 
+                     SPO spo = stmts[i];
+ 
+                     ndx.remove(keyBuilder.statement2Key(spo.s, spo.p, spo.o));
+ 
+                 }
+             }
+ 
+             {
+ 
+                 IIndex ndx = getPOSIndex();
+                 // Place statements in POS order.
+                 Arrays.sort(stmts, com.bigdata.rdf.inf.POSComparator.INSTANCE);
+ 
+                 // Remove statements from POS index.
+                 for (int i = 0; i < stmts.length; i++) {
+ 
+                     SPO spo = stmts[i];
+ 
+                     ndx.remove(keyBuilder.statement2Key(spo.p, spo.o, spo.s));
+ 
+                 }
+             }
+ 
+             {
+ 
+                 IIndex ndx = getOSPIndex();
+ 
+                 // Place statements in OSP order.
+                 Arrays.sort(stmts, com.bigdata.rdf.inf.OSPComparator.INSTANCE);
+ 
+                 // Remove statements from OSP index.
+                 for (int i = 0; i < stmts.length; i++) {
+ 
+                     SPO spo = stmts[i];
+ 
+                     ndx.remove(keyBuilder.statement2Key(spo.o, spo.s, spo.p));
+ 
+                 }
+ 
+             }
+             
+         }
+ 
+         return rangeCount;
+ 
+     }
+ 
+     /**
+      * Value used for a "NULL" term identifier.
+      */
+     public static final long NULL = 0L;
  
      /**
       * Adds the statements to each index (batch api).
!      * <p>
       * Note: this is not sorting by the generated keys so the sort order may not
!      * perfectly reflect the natural order of the index. however, i suspect that
!      * it simply creates a few partitions out of the natural index order based
!      * on the difference between signed and unsigned interpretations of the
!      * termIds when logically combined into a statement identifier.
       * 
       * @param stmts
***************
*** 1193,1196 ****
--- 1580,1604 ----
          public long commitTime;
          
+         public long triplesPerSecond() {
+             
+             return ((long)( ((double)toldTriples) / ((double)totalTime) * 1000d ));
+             
+         }
+         
+         /**
+          * Human readable representation.
+          */
+         public String toString() {
+ 
+             return toldTriples+" stmts added in " + 
+                     ((double)loadTime) / 1000d +
+                     " secs, rate= " + 
+                     triplesPerSecond()+
+                     ", commitLatency="+
+                     commitTime+"ms" 
+                     ;
+ 
+         }
+         
      }
      
***************
*** 1270,1274 ****
                      ((double)loader.getInsertTime()) / 1000d +
                      " secs, rate= " + 
!                     loader.getInsertRate() 
                      );
  
--- 1678,1682 ----
                      ((double)loader.getInsertTime()) / 1000d +
                      " secs, rate= " + 
!                     loader.getInsertRate()
                      );

[cweb-CVS] bigdata-rdf/src/java/com/bigdata/rdf KeyOrder.java, 1.1, 1.2 TripleStore.java, 1.26, 1.2

[cweb-CVS] bigdata-rdf/src/java/com/bigdata/rdf KeyOrder.java, 1.1, 1.2 TripleStore.java, 1.26, 1.27