From: Bryan T. <tho...@us...> - 2007-04-13 20:37:11
|
Update of /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv5139/src/java/com/bigdata/rdf/inf Modified Files: InferenceEngine.java AbstractRuleRdf.java Rule.java RuleRdfs09.java AbstractRuleRdfs511.java AbstractRuleRdfs2379.java AbstractRuleRdfs68101213.java RuleRdf01.java RuleRdfs07.java Log Message: Fussed with the inference engine and got a modest performance boost. Rdfs5/11 needs to be rewritten to be smart about exploiting the bufferes triples, e.g., using a binary search on the buffer to locate matches in the self-join. Index: RuleRdfs07.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/RuleRdfs07.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** RuleRdfs07.java 31 Jan 2007 15:52:54 -0000 1.1 --- RuleRdfs07.java 13 Apr 2007 20:37:04 -0000 1.2 *************** *** 44,49 **** package com.bigdata.rdf.inf; ! ! public class RuleRdfs07 extends AbstractRuleRdfs2379 { --- 44,52 ---- package com.bigdata.rdf.inf; ! /** ! * <pre> ! * <a rdfs:subPropertyOf b> AND <u a y> IMPLIES <u b y> ! * </pre> ! */ public class RuleRdfs07 extends AbstractRuleRdfs2379 { *************** *** 59,63 **** --- 62,68 ---- protected SPO buildStmt3( SPO stmt1, SPO stmt2 ) { + return new SPO( stmt2.s, stmt1.o, stmt2.o ); + } Index: AbstractRuleRdfs511.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/AbstractRuleRdfs511.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** AbstractRuleRdfs511.java 12 Apr 2007 23:59:21 -0000 1.5 --- AbstractRuleRdfs511.java 13 Apr 2007 20:37:04 -0000 1.6 *************** *** 45,53 **** import java.util.Arrays; - import java.util.Vector; import com.bigdata.rdf.KeyOrder; import com.bigdata.rdf.TempTripleStore; - import com.bigdata.rdf.inf.Rule.Stats; --- 45,51 ---- *************** *** 64,72 **** } ! public Stats apply( TempTripleStore entailments ) { ! ! Stats stats = new Stats(); ! long computeStart = System.currentTimeMillis(); // the predicate is fixed for all parts of the rule. --- 62,68 ---- } ! public Stats apply( final Stats stats, SPO[] buffer, TempTripleStore tmpStore ) { ! final long computeStart = System.currentTimeMillis(); // the predicate is fixed for all parts of the rule. *************** *** 91,121 **** // in POS order. ! SPO[] stmts1 = store.getStatements(store.getPOSIndex(), KeyOrder.POS, pkey, pkey1); // in SPO order. Arrays.sort(stmts1,SPOComparator.INSTANCE); // a clone of the answer set ! SPO[] stmts2 = stmts1.clone(); - Vector<SPO> stmts3 = new Vector<SPO>(BUFFER_SIZE); // the simplest n^2 algorithm ! for( int i = 0; i < stmts1.length; i++ ) { // printStatement(stmts1[i]); ! for ( int j = 0; j < stmts2.length; j++ ) { ! if ( stmts1[i].o == stmts2[j].s ) { ! if (stmts3.size() == BUFFER_SIZE) { ! dumpBuffer ! ( stmts3.toArray( new SPO[stmts3.size()] ), ! entailments ! ); ! stmts3.clear(); } ! stmts3.add( new SPO(stmts1[i].s, p, stmts2[j].o) ); stats.numComputed++; } } } ! if(debug)dumpBuffer( stmts3.toArray( new SPO[stmts3.size()] ), entailments ); ! stats.computeTime = System.currentTimeMillis() - computeStart; return stats; --- 87,134 ---- // in POS order. ! final SPO[] stmts1 = store.getStatements(store.getPOSIndex(), ! KeyOrder.POS, pkey, pkey1); ! ! stats.stmts1 += stmts1.length; ! // in SPO order. Arrays.sort(stmts1,SPOComparator.INSTANCE); + // a clone of the answer set ! // SPO[] stmts2 = stmts1.clone(); ! final SPO[] stmts2 = stmts1; ! ! stats.stmts2 += stmts2.length; ! ! int n = 0; // the simplest n^2 algorithm ! for (int i = 0; i < stmts1.length; i++) { ! // printStatement(stmts1[i]); ! ! for (int j = 0; j < stmts2.length; j++) { ! ! if (stmts1[i].o == stmts2[j].s) { ! ! buffer[n++] = new SPO(stmts1[i].s, p, stmts2[j].o); ! ! if (n == buffer.length) { ! ! insertStatements(buffer, n, tmpStore); ! ! n = 0; } ! stats.numComputed++; } + } + } ! ! insertStatements( buffer, n, tmpStore ); ! stats.computeTime += System.currentTimeMillis() - computeStart; return stats; Index: AbstractRuleRdfs2379.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/AbstractRuleRdfs2379.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** AbstractRuleRdfs2379.java 12 Apr 2007 23:59:21 -0000 1.4 --- AbstractRuleRdfs2379.java 13 Apr 2007 20:37:04 -0000 1.5 *************** *** 45,54 **** import java.util.Arrays; - import java.util.Vector; import com.bigdata.rdf.KeyOrder; import com.bigdata.rdf.TempTripleStore; - public abstract class AbstractRuleRdfs2379 extends AbstractRuleRdf { --- 45,52 ---- *************** *** 63,93 **** } ! public Stats apply( TempTripleStore entailments ) { ! ! Stats stats = new Stats(); ! long computeStart = System.currentTimeMillis(); - // create a place to hold the entailments - Vector<SPO> stmts3 = new Vector<SPO>(BUFFER_SIZE); - SPO[] stmts1 = getStmts1(); ! for ( int i = 0; i < stmts1.length; i++ ) { ! SPO[] stmts2 = getStmts2( stmts1[i] ); ! for ( int j = 0; j < stmts2.length; j++ ) { ! if (stmts3.size() == BUFFER_SIZE) { ! dumpBuffer ! ( stmts3.toArray( new SPO[stmts3.size()] ), ! entailments ! ); ! stmts3.clear(); } ! stmts3.add( buildStmt3( stmts1[i], stmts2[j] ) ); stats.numComputed++; } } - if(debug) dumpBuffer( stmts3.toArray( new SPO[stmts3.size()] ), entailments ); ! stats.computeTime = System.currentTimeMillis() - computeStart; return stats; --- 61,101 ---- } ! public Stats apply( final Stats stats, final SPO[] buffer, TempTripleStore tmpStore ) { ! final long computeStart = System.currentTimeMillis(); SPO[] stmts1 = getStmts1(); ! ! stats.stmts1 += stmts1.length; ! ! int n = 0; ! ! for (int i = 0; i < stmts1.length; i++) { ! ! SPO[] stmts2 = getStmts2(stmts1[i]); ! ! stats.stmts2 += stmts2.length; ! ! for (int j = 0; j < stmts2.length; j++) { ! ! buffer[n++] = buildStmt3(stmts1[i], stmts2[j]); ! ! if (n == buffer.length) { ! ! insertStatements(buffer, n, tmpStore); ! ! n = 0; ! } ! stats.numComputed++; + } + } ! insertStatements(buffer, n, tmpStore); ! ! stats.computeTime += System.currentTimeMillis() - computeStart; return stats; *************** *** 95,111 **** } ! // default behavior is to use POS index to match body[0] and then sort ! // using POSComparator since the default for body[1] is the POS index ! // again (is the sort even necessary?) protected SPO[] getStmts1() { // use the POS index to look up the matches for body[0], the more // constrained triple byte[] fromKey = store.keyBuilder.statement2Key( body[0].p.id, 0, 0 ); byte[] toKey = store.keyBuilder.statement2Key( body[0].p.id+1, 0, 0 ); ! SPO[] stmts1 = store.getStatements(store.getPOSIndex(), KeyOrder.POS, fromKey, toKey); // make sure the statements are in POS order, since we are going to be // doing lookups against the POS index in a moment Arrays.sort(stmts1,POSComparator.INSTANCE); --- 103,126 ---- } ! /** ! * default behavior is to use POS index to match body[0] and then sort using ! * POSComparator since the default for body[1] is the POS index again (is ! * the sort even necessary?) ! */ protected SPO[] getStmts1() { // use the POS index to look up the matches for body[0], the more // constrained triple + byte[] fromKey = store.keyBuilder.statement2Key( body[0].p.id, 0, 0 ); + byte[] toKey = store.keyBuilder.statement2Key( body[0].p.id+1, 0, 0 ); ! ! SPO[] stmts1 = store.getStatements(store.getPOSIndex(), KeyOrder.POS, ! fromKey, toKey); // make sure the statements are in POS order, since we are going to be // doing lookups against the POS index in a moment + Arrays.sort(stmts1,POSComparator.INSTANCE); *************** *** 114,124 **** } ! // default behavior is to join the subject of stmt1 with the predicate ! // of body[1] using the POS index protected SPO[] getStmts2( SPO stmt1 ) { byte[] fromKey = store.keyBuilder.statement2Key(stmt1.s, 0, 0); byte[] toKey = store.keyBuilder.statement2Key(stmt1.s+1, 0, 0); ! return store.getStatements(store.getPOSIndex(), KeyOrder.POS, fromKey, toKey); } --- 129,144 ---- } ! /** ! * default behavior is to join the subject of stmt1 with the predicate of ! * body[1] using the POS index. ! */ protected SPO[] getStmts2( SPO stmt1 ) { byte[] fromKey = store.keyBuilder.statement2Key(stmt1.s, 0, 0); + byte[] toKey = store.keyBuilder.statement2Key(stmt1.s+1, 0, 0); ! ! return store.getStatements(store.getPOSIndex(), KeyOrder.POS, fromKey, ! toKey); } *************** *** 126,128 **** protected abstract SPO buildStmt3( SPO stmt1, SPO stmt2 ); ! } \ No newline at end of file --- 146,148 ---- protected abstract SPO buildStmt3( SPO stmt1, SPO stmt2 ); ! } Index: RuleRdf01.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/RuleRdf01.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** RuleRdf01.java 13 Apr 2007 15:02:34 -0000 1.6 --- RuleRdf01.java 13 Apr 2007 20:37:04 -0000 1.7 *************** *** 44,49 **** package com.bigdata.rdf.inf; - import java.util.Vector; - import com.bigdata.btree.IEntryIterator; import com.bigdata.rdf.KeyOrder; --- 44,47 ---- *************** *** 62,75 **** } ! public Stats apply( TempTripleStore btree ) { ! ! Stats stats = new Stats(); ! ! long computeStart = System.currentTimeMillis(); ! Vector<SPO> entailments = new Vector<SPO>(BUFFER_SIZE); long lastP = -1; IEntryIterator it = store.getPOSIndex().rangeIterator(null,null); --- 60,71 ---- } ! public Stats apply( final Stats stats, final SPO[] buffer, TempTripleStore btree ) { ! final long computeStart = System.currentTimeMillis(); long lastP = -1; + int n = 0; + IEntryIterator it = store.getPOSIndex().rangeIterator(null,null); *************** *** 78,97 **** it.next(); ! SPO stmt = ! new SPO(KeyOrder.POS,store.keyBuilder,it.getKey()); if ( stmt.p != lastP ) { lastP = stmt.p; ! if (entailments.size() == BUFFER_SIZE) { ! dumpBuffer ! ( entailments.toArray( new SPO[entailments.size()] ), ! btree ! ); ! entailments.clear(); } ! entailments.add ! ( new SPO(stmt.p, store.rdfType.id, store.rdfProperty.id) ); stats.numComputed++; --- 74,96 ---- it.next(); ! stats.stmts1++; ! ! SPO stmt = new SPO(KeyOrder.POS, store.keyBuilder, it.getKey()); if ( stmt.p != lastP ) { lastP = stmt.p; + + buffer[n++] = new SPO(stmt.p, store.rdfType.id, + store.rdfProperty.id); ! if (n == buffer.length) { ! ! insertStatements(buffer, n, btree); ! ! n = 0; ! } ! stats.numComputed++; *************** *** 99,105 **** } - if(debug)dumpBuffer( entailments.toArray( new SPO[entailments.size()] ), btree ); ! stats.computeTime = System.currentTimeMillis() - computeStart; return stats; --- 98,105 ---- } ! insertStatements( buffer, n, btree ); ! ! stats.computeTime += System.currentTimeMillis() - computeStart; return stats; Index: RuleRdfs09.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/RuleRdfs09.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** RuleRdfs09.java 9 Feb 2007 20:18:56 -0000 1.3 --- RuleRdfs09.java 13 Apr 2007 20:37:04 -0000 1.4 *************** *** 47,50 **** --- 47,55 ---- + /** + * <pre> + * <u rdfs:subClassOf x> AND <v rdf:type u> IMPLIES <v rdf:type x> + * </pre> + */ public class RuleRdfs09 extends AbstractRuleRdfs2379 { *************** *** 61,74 **** protected SPO[] getStmts2( SPO stmt1 ) { ! byte[] fromKey = ! store.keyBuilder.statement2Key(store.rdfType.id,stmt1.s,0); ! byte[] toKey = ! store.keyBuilder.statement2Key(store.rdfType.id,stmt1.s+1,0); ! return store.getStatements(store.getPOSIndex(), KeyOrder.POS, fromKey, toKey); } protected SPO buildStmt3( SPO stmt1, SPO stmt2 ) { return new SPO( stmt2.s, store.rdfType.id, stmt1.o ); } --- 66,84 ---- protected SPO[] getStmts2( SPO stmt1 ) { ! byte[] fromKey = store.keyBuilder.statement2Key(store.rdfType.id, ! stmt1.s, 0); ! ! byte[] toKey = store.keyBuilder.statement2Key(store.rdfType.id, ! stmt1.s + 1, 0); ! ! return store.getStatements(store.getPOSIndex(), KeyOrder.POS, fromKey, ! toKey); } protected SPO buildStmt3( SPO stmt1, SPO stmt2 ) { + return new SPO( stmt2.s, store.rdfType.id, stmt1.o ); + } Index: InferenceEngine.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/InferenceEngine.java,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** InferenceEngine.java 13 Apr 2007 15:02:34 -0000 1.12 --- InferenceEngine.java 13 Apr 2007 20:37:04 -0000 1.13 *************** *** 54,60 **** --- 54,62 ---- import com.bigdata.journal.BufferMode; import com.bigdata.journal.Options; + import com.bigdata.rawstore.Bytes; import com.bigdata.rdf.KeyOrder; import com.bigdata.rdf.TempTripleStore; import com.bigdata.rdf.TripleStore; + import com.bigdata.rdf.inf.Rule.Stats; import com.bigdata.rdf.inf.TestMagicSets.MagicRule; import com.bigdata.rdf.model.OptimizedValueFactory._URI; *************** *** 62,66 **** /** * Adds support for RDFS inference. ! * <p> * A fact always has the form: * --- 64,68 ---- /** * Adds support for RDFS inference. ! * <p> * A fact always has the form: * *************** *** 269,286 **** /** ! * Compute the complete forward closure of the store using a ! * set-at-a-time inference strategy. * ! * @todo refactor so that we can close a document that we are loading ! * before it is inserted into the main database. * ! * @todo can the dependency array among the rules be of use to us when ! * we are computing full foward closure as opposed to using magic ! * sets to answer a specific query? */ public void fullForwardClosure() { final Rule[] rules = this.rules; final int nrules = rules.length; --- 271,330 ---- /** ! * Compute the complete forward closure of the store using a set-at-a-time ! * inference strategy. ! * <p> ! * The general approach is a series of rounds in which each rule is applied ! * to all data in turn. The rules directly embody queries that cause only ! * the statements which can trigger the rule to be visited. Since most rules ! * require two antecedents, this typically means that the rules are running ! * two range queries and performing a join operation in order to identify ! * the set of rule firings. Entailments computed in each round are fed back ! * into the source against which the rules can match their preconditions, so ! * derived entailments may be computed in a succession of rounds. The ! * process halts when no new entailments are computed in a given round. ! * <p> * ! * @todo Rules can be computed in parallel using a pool of worker threads. ! * The round ends when the queue of rules to process is empty and all ! * workers are done. * ! * @todo The entailments computed in each round are inserted back into the ! * primary triple store at the end of the round. The purpose of this ! * is to read from a fused view of the triples already in the primary ! * store and those that have been computed by the last application of ! * the rules. This is necessary in order for derived entailments to be ! * computed. However, an alternative approach would be to explicitly ! * read from a fused view of the indices in the temporary store and ! * those in the primary store (or simply combining their iterators in ! * the rules). This could have several advantages and an approach like ! * this is necessary in order to compute entailments at query time ! * that are not to be inserted back into the kb. ! * ! * @todo can the dependency array among the rules be of use to us when we ! * are computing full foward closure as opposed to using magic sets to ! * answer a specific query? ! * ! * @todo The SPO[] buffer might be better off as a more interesting ! * structure that only accepted the distinct statements. This was a ! * big win for the batch oriented parser and would probably eliminate ! * even more duplicates in the context of the inference engine. */ public void fullForwardClosure() { + /* + * @todo configuration paramater. + * + * There is a factor of 2 performance difference for a sample data set + * from a buffer size of one (unordered inserts) to a buffer size of + * 10k. + */ + final int BUFFER_SIZE = 100 * Bytes.kilobyte32; + final Rule[] rules = this.rules; + final long[] timePerRule = new long[rules.length]; + + final int[] entailmentsPerRule = new int[rules.length]; + final int nrules = rules.length; *************** *** 294,320 **** int round = 0; TempTripleStore entailments = new TempTripleStore(); while (true) { ! int numComputed = 0; ! ! long computeTime = 0; ! ! int numEntailmentsBefore = entailments.getStatementCount(); for (int i = 0; i < nrules; i++) { Rule rule = rules[i]; ! Rule.Stats stats = rule.apply( entailments ); ! numComputed += stats.numComputed; ! computeTime += stats.computeTime; } ! int numEntailmentsAfter = entailments.getStatementCount(); if ( numEntailmentsBefore == numEntailmentsAfter ) { --- 338,426 ---- int round = 0; + /* + * This is a buffer that is used to hold entailments so that we can + * insert them into the indices using ordered insert operations (much + * faster than random inserts). The buffer is reused by each rule. The + * rule assumes that the buffer is empty and just keeps a local counter + * of the #of entailments that it has inserted into the buffer. When the + * buffer overflows, those entailments are transfered enmass into the + * tmp store. + */ + final SPO[] buffer = new SPO[BUFFER_SIZE]; + + /* + * The temporary store used to accumulate the entailments. + */ TempTripleStore entailments = new TempTripleStore(); + Stats totalStats = new Stats(); + while (true) { ! final int numEntailmentsBefore = entailments.getStatementCount(); for (int i = 0; i < nrules; i++) { + Stats ruleStats = new Stats(); + Rule rule = rules[i]; ! int nbefore = ruleStats.numComputed; ! rule.apply( ruleStats, buffer, entailments ); ! ! int nnew = ruleStats.numComputed - nbefore; ! // #of statements examined by the rule. ! int nstmts = ruleStats.stmts1 + ruleStats.stmts2; ! ! long elapsed = ruleStats.computeTime; ! ! timePerRule[i] += elapsed; ! ! entailmentsPerRule[i] = ruleStats.numComputed; // Note: already a running sum. ! ! long stmtsPerSec = (nstmts == 0 || elapsed == 0L ? 0 ! : ((long) (((double) nstmts) / ((double) elapsed) * 1000d))); ! ! if (DEBUG||true) { ! log.debug("round# " + round + ", " ! + rule.getClass().getSimpleName() ! + ", entailments=" + nnew + ", #stmts1=" ! + ruleStats.stmts1 + ", #stmts2=" ! + ruleStats.stmts2 + ", #stmtsExaminedPerSec=" ! + stmtsPerSec ! ); ! } ! ! totalStats.numComputed += ruleStats.numComputed; ! ! totalStats.computeTime += ruleStats.computeTime; } + + if(true) { ! /* ! * Show times for each rule so far. ! */ ! System.err.println("rule \tms\t#entms\tentms/ms"); ! ! for(int i=0; i<timePerRule.length; i++) { ! ! System.err.println(rules[i].getClass().getSimpleName() ! + "\t" ! + timePerRule[i] ! + "\t" ! + entailmentsPerRule[i] ! + "\t" ! + (timePerRule[i] == 0 ? 0 : entailmentsPerRule[i] ! / timePerRule[i])); ! ! } ! ! } ! ! final int numEntailmentsAfter = entailments.getStatementCount(); if ( numEntailmentsBefore == numEntailmentsAfter ) { *************** *** 325,339 **** } ! long insertStart = System.currentTimeMillis(); ! ! int numInserted = transferBTrees( entailments ); ! ! long insertTime = System.currentTimeMillis() - insertStart; ! ! if(DEBUG){ StringBuilder debug = new StringBuilder(); debug.append( "round #" ).append( round++ ).append( ": " ); ! debug.append( numComputed ).append( " computed in " ); ! debug.append( computeTime ).append( " millis, " ); debug.append( numInserted ).append( " inserted in " ); debug.append( insertTime ).append( " millis " ); --- 431,449 ---- } ! /* ! * Transfer the entailments into the primary store so that derived ! * entailments may be computed. ! */ ! final long insertStart = System.currentTimeMillis(); ! ! final int numInserted = transferBTrees(entailments); ! ! final long insertTime = System.currentTimeMillis() - insertStart; ! ! if (DEBUG) { StringBuilder debug = new StringBuilder(); debug.append( "round #" ).append( round++ ).append( ": " ); ! debug.append( totalStats.numComputed ).append( " computed in " ); ! debug.append( totalStats.computeTime ).append( " millis, " ); debug.append( numInserted ).append( " inserted in " ); debug.append( insertTime ).append( " millis " ); *************** *** 347,354 **** final int lastStatementCount = getStatementCount(); ! if(INFO) { ! log.info("Closed store in " + elapsed + "ms yeilding " ! + lastStatementCount + " statements total, " + ! (lastStatementCount - firstStatementCount) + " inferences"); } --- 457,475 ---- final int lastStatementCount = getStatementCount(); ! if (INFO) { ! ! final int inferenceCount = lastStatementCount - firstStatementCount; ! ! log.info("Computed closure of store in " ! + elapsed ! + "ms yeilding " ! + lastStatementCount ! + " statements total, " ! + (inferenceCount) ! + " inferences" ! + ", entailmentsPerSec=" ! + ((long) (((double) inferenceCount) ! / ((double) elapsed) * 1000d))); ! } *************** *** 358,370 **** * Copies the entailments from the temporary store into the main store. * ! * @param entailments * * @return The #of entailments inserted into the main store. */ ! private int transferBTrees( TempTripleStore entailments ) { int numInserted = 0; ! IEntryIterator it = entailments.getSPOIndex().rangeIterator(null, null); while (it.hasNext()) { it.next(); --- 479,491 ---- * Copies the entailments from the temporary store into the main store. * ! * @param tmpStore * * @return The #of entailments inserted into the main store. */ ! private int transferBTrees( TempTripleStore tmpStore ) { int numInserted = 0; ! IEntryIterator it = tmpStore.getSPOIndex().rangeIterator(null, null); while (it.hasNext()) { it.next(); *************** *** 376,380 **** } ! it = entailments.getPOSIndex().rangeIterator(null, null); while (it.hasNext()) { it.next(); --- 497,501 ---- } ! it = tmpStore.getPOSIndex().rangeIterator(null, null); while (it.hasNext()) { it.next(); *************** *** 385,389 **** } ! it = entailments.getOSPIndex().rangeIterator(null, null); while (it.hasNext()) { it.next(); --- 506,510 ---- } ! it = tmpStore.getOSPIndex().rangeIterator(null, null); while (it.hasNext()) { it.next(); Index: Rule.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/Rule.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** Rule.java 12 Apr 2007 23:59:21 -0000 1.4 --- Rule.java 13 Apr 2007 20:37:04 -0000 1.5 *************** *** 69,74 **** abstract public class Rule { - static protected final boolean debug = false; - /** * The inference engine. --- 69,72 ---- *************** *** 102,118 **** /** ! * Apply the rule to the statement in the store. * ! * @param entailments ! * The temporary triple store used to hold entailments. * ! * @return Statistics related to what the rule did. * ! * @todo support conditional insert in the btree so that we do not have ! * to do a lookup/insert combination. * ! * @todo we could store proofs in the value for a statement or in a ! * proofs index. doing that efficiently would require a ! * concatenation operation variant for insert. * * @todo the btree class is NOT safe for concurrent modification under --- 100,122 ---- /** ! * Apply the rule, creating entailments that are inserted into the temporary ! * store. * ! * @param stats ! * Returns statistics on the rule application as a side effect. ! * @param buffer ! * Used to buffer entailments so that we can perform batch btree ! * operations. ! * @param tmpStore ! * The temporary store into which the entailments are placed. * ! * @return The statistics object. * ! * @todo support conditional insert in the btree so that we do not have to ! * do a lookup/insert combination. * ! * @todo we could store proofs in the value for a statement or in a proofs ! * index. doing that efficiently would require a concatenation ! * operation variant for insert. * * @todo the btree class is NOT safe for concurrent modification under *************** *** 120,125 **** * statements that they will insert. */ ! abstract public Stats apply( TempTripleStore entailments ); ! /** --- 124,128 ---- * statements that they will insert. */ ! public abstract Stats apply( final Stats stats, final SPO[] buffer, TempTripleStore tmpStore ); /** *************** *** 130,138 **** public static class Stats { public int numComputed; long computeTime; } ! } \ No newline at end of file --- 133,164 ---- public static class Stats { + /** + * #of matches for the triple pattern for the first antecedent of the rule. + */ + public int stmts1; + + /** + * #of matches for the triple pattern for the second antecedent of the + * rule (if there are two). + */ + public int stmts2; + + /** + * #of statements considered. + */ + public int numConsidered; + + /** + * #of entailments computed. + */ public int numComputed; + /** + * Time to compute the entailments and store them within the + * {@link TempTripleStore} in milliseconds. + */ long computeTime; } ! } Index: AbstractRuleRdf.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/AbstractRuleRdf.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** AbstractRuleRdf.java 13 Apr 2007 15:02:34 -0000 1.6 --- AbstractRuleRdf.java 13 Apr 2007 20:37:04 -0000 1.7 *************** *** 48,59 **** import org.openrdf.model.URI; - import com.bigdata.btree.IEntryIterator; import com.bigdata.btree.IIndex; - import com.bigdata.rdf.KeyOrder; import com.bigdata.rdf.TempTripleStore; public abstract class AbstractRuleRdf extends Rule { - - protected final int BUFFER_SIZE = 10*1024*1024; public AbstractRuleRdf(InferenceEngine store, Triple head, Pred[] body) { --- 48,55 ---- *************** *** 63,122 **** } ! public abstract Stats apply( TempTripleStore entailments ); ! ! protected void dumpBuffer( SPO[] stmts, TempTripleStore btree ) { ! ! // deal with the SPO index ! IIndex spo = btree.getSPOIndex(); ! Arrays.sort(stmts,SPOComparator.INSTANCE); ! for ( int i = 0; i < stmts.length; i++ ) { ! byte[] key = btree.keyBuilder.statement2Key ! ( stmts[i].s, stmts[i].p, stmts[i].o ! ); ! if ( !spo.contains(key) ) { ! spo.insert(key, null); ! } ! } ! ! // deal with the POS index ! IIndex pos = btree.getPOSIndex(); ! Arrays.sort(stmts,POSComparator.INSTANCE); ! for ( int i = 0; i < stmts.length; i++ ) { ! byte[] key = btree.keyBuilder.statement2Key ! ( stmts[i].p, stmts[i].o, stmts[i].s ! ); ! if ( !pos.contains(key) ) { ! pos.insert(key, null); ! } ! } ! ! // deal with the OSP index ! IIndex osp = btree.getOSPIndex(); ! Arrays.sort(stmts,OSPComparator.INSTANCE); ! for ( int i = 0; i < stmts.length; i++ ) { ! byte[] key = btree.keyBuilder.statement2Key ! ( stmts[i].o, stmts[i].s, stmts[i].p ! ); ! if ( !osp.contains(key) ) { ! osp.insert(key, null); ! } ! } ! ! } ! ! protected int insertEntailments( SPO[] entailments ) { ! ! int numAdded = 0; // deal with the SPO index IIndex spo = store.getSPOIndex(); ! Arrays.sort(entailments,SPOComparator.INSTANCE); ! for ( int i = 0; i < entailments.length; i++ ) { byte[] key = store.keyBuilder.statement2Key ! ( entailments[i].s, entailments[i].p, entailments[i].o ); if ( !spo.contains(key) ) { spo.insert(key, null); - numAdded++; } } --- 59,85 ---- } ! /** ! * Copies the entailments from the array into the {@link TempTripleStore}. ! * ! * @param stmts ! * The source statements. ! * ! * @param n ! * The #of statements in the buffer. ! * ! * @param store ! * The target store. ! */ ! protected void insertStatements(SPO[] stmts, int n, TempTripleStore store) { // deal with the SPO index IIndex spo = store.getSPOIndex(); ! Arrays.sort(stmts,0,n,SPOComparator.INSTANCE); ! for ( int i = 0; i < n; i++ ) { byte[] key = store.keyBuilder.statement2Key ! ( stmts[i].s, stmts[i].p, stmts[i].o ); if ( !spo.contains(key) ) { spo.insert(key, null); } } *************** *** 124,131 **** // deal with the POS index IIndex pos = store.getPOSIndex(); ! Arrays.sort(entailments,POSComparator.INSTANCE); ! for ( int i = 0; i < entailments.length; i++ ) { byte[] key = store.keyBuilder.statement2Key ! ( entailments[i].p, entailments[i].o, entailments[i].s ); if ( !pos.contains(key) ) { --- 87,94 ---- // deal with the POS index IIndex pos = store.getPOSIndex(); ! Arrays.sort(stmts,0,n,POSComparator.INSTANCE); ! for ( int i = 0; i < n; i++ ) { byte[] key = store.keyBuilder.statement2Key ! ( stmts[i].p, stmts[i].o, stmts[i].s ); if ( !pos.contains(key) ) { *************** *** 136,143 **** // deal with the OSP index IIndex osp = store.getOSPIndex(); ! Arrays.sort(entailments,OSPComparator.INSTANCE); ! for ( int i = 0; i < entailments.length; i++ ) { byte[] key = store.keyBuilder.statement2Key ! ( entailments[i].o, entailments[i].s, entailments[i].p ); if ( !osp.contains(key) ) { --- 99,106 ---- // deal with the OSP index IIndex osp = store.getOSPIndex(); ! Arrays.sort(stmts,0,n,OSPComparator.INSTANCE); ! for ( int i = 0; i < n; i++ ) { byte[] key = store.keyBuilder.statement2Key ! ( stmts[i].o, stmts[i].s, stmts[i].p ); if ( !osp.contains(key) ) { *************** *** 145,159 **** } } - - return numAdded; } ! protected int insertEntailments2( TempTripleStore entailments ) { ! ! return insertEntailments( convert( entailments ) ); ! ! } ! protected void printStatement( SPO stmt ) { --- 108,177 ---- } } } ! // /** ! // * Copies the statements into the primary store. ! // * ! // * @todo refactor for common code and #of statements parameters with ! // * {@link #dumpBuffer(SPO[], int, TempTripleStore)}, which copies the ! // * statements into the temporary store. ! // * ! // * @param entailments ! // * The statements. ! // * ! // * @return The #of statements actually added to the store. ! // */ ! // protected int insertEntailments( SPO[] entailments ) { ! // ! // int numAdded = 0; ! // ! // // deal with the SPO index ! // IIndex spo = store.getSPOIndex(); ! // Arrays.sort(entailments,SPOComparator.INSTANCE); ! // for ( int i = 0; i < entailments.length; i++ ) { ! // byte[] key = store.keyBuilder.statement2Key ! // ( entailments[i].s, entailments[i].p, entailments[i].o ! // ); ! // if ( !spo.contains(key) ) { ! // spo.insert(key, null); ! // numAdded++; ! // } ! // } ! // ! // // deal with the POS index ! // IIndex pos = store.getPOSIndex(); ! // Arrays.sort(entailments,POSComparator.INSTANCE); ! // for ( int i = 0; i < entailments.length; i++ ) { ! // byte[] key = store.keyBuilder.statement2Key ! // ( entailments[i].p, entailments[i].o, entailments[i].s ! // ); ! // if ( !pos.contains(key) ) { ! // pos.insert(key, null); ! // } ! // } ! // ! // // deal with the OSP index ! // IIndex osp = store.getOSPIndex(); ! // Arrays.sort(entailments,OSPComparator.INSTANCE); ! // for ( int i = 0; i < entailments.length; i++ ) { ! // byte[] key = store.keyBuilder.statement2Key ! // ( entailments[i].o, entailments[i].s, entailments[i].p ! // ); ! // if ( !osp.contains(key) ) { ! // osp.insert(key, null); ! // } ! // } ! // ! // return numAdded; ! // ! // } ! ! // protected int insertEntailments2( TempTripleStore entailments ) { ! // ! // return insertEntailments( getStatements( entailments ) ); ! // ! // } ! protected void printStatement( SPO stmt ) { *************** *** 176,214 **** } ! protected TempTripleStore convert( SPO[] stmts ) { ! ! TempTripleStore tts = new TempTripleStore(); ! ! for ( int i = 0; i < stmts.length; i++ ) { ! ! tts.addStatement( stmts[i].s, stmts[i].p, stmts[i].o ); ! ! } ! ! return tts; ! ! } ! protected SPO[] convert( TempTripleStore tts ) { ! ! SPO[] stmts = new SPO[tts.getStatementCount()]; ! ! int i = 0; ! ! IIndex ndx_spo = tts.getSPOIndex(); ! ! IEntryIterator it = ndx_spo.rangeIterator(null, null); ! ! while ( it.hasNext() ) { ! ! it.next(); ! ! stmts[i++] = new SPO(KeyOrder.SPO, tts.keyBuilder, it.getKey()); ! ! } ! ! return stmts; ! ! } } \ No newline at end of file --- 194,240 ---- } ! // protected TempTripleStore convert( SPO[] stmts ) { ! // ! // TempTripleStore tts = new TempTripleStore(); ! // ! // for ( int i = 0; i < stmts.length; i++ ) { ! // ! // tts.addStatement( stmts[i].s, stmts[i].p, stmts[i].o ); ! // ! // } ! // ! // return tts; ! // ! // } ! // /** ! // * Extracts all statements in the store into an {@link SPO}[]. ! // * ! // * @param store ! // * The store. ! // * ! // * @return The array of statements. ! // */ ! // protected SPO[] getStatements( TempTripleStore store ) { ! // ! // SPO[] stmts = new SPO[store.getStatementCount()]; ! // ! // int i = 0; ! // ! // IIndex ndx_spo = store.getSPOIndex(); ! // ! // IEntryIterator it = ndx_spo.rangeIterator(null, null); ! // ! // while ( it.hasNext() ) { ! // ! // it.next(); ! // ! // stmts[i++] = new SPO(KeyOrder.SPO, store.keyBuilder, it.getKey()); ! // ! // } ! // ! // return stmts; ! // ! // } } \ No newline at end of file Index: AbstractRuleRdfs68101213.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/AbstractRuleRdfs68101213.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** AbstractRuleRdfs68101213.java 13 Apr 2007 15:02:34 -0000 1.6 --- AbstractRuleRdfs68101213.java 13 Apr 2007 20:37:04 -0000 1.7 *************** *** 44,49 **** package com.bigdata.rdf.inf; - import java.util.Vector; - import com.bigdata.btree.IEntryIterator; import com.bigdata.rdf.KeyOrder; --- 44,47 ---- *************** *** 63,73 **** } ! public Stats apply( TempTripleStore btree ) { ! ! Stats stats = new Stats(); ! ! long computeStart = System.currentTimeMillis(); ! Vector<SPO> entailments = new Vector<SPO>(BUFFER_SIZE); byte[] startKey = store.keyBuilder.statement2Key --- 61,67 ---- } ! public Stats apply( final Stats stats, final SPO[] buffer, TempTripleStore tmpStore ) { ! final long computeStart = System.currentTimeMillis(); byte[] startKey = store.keyBuilder.statement2Key *************** *** 79,108 **** ); ! IEntryIterator it = store.getPOSIndex().rangeIterator(startKey,endKey); while ( it.hasNext() ) { it.next(); - SPO stmt = - new SPO(KeyOrder.POS,store.keyBuilder,it.getKey()); long _s = head.s.isVar() ? stmt.s : head.s.id; long _p = head.p.isVar() ? stmt.s : head.p.id; long _o = head.o.isVar() ? stmt.s : head.o.id; ! if (entailments.size() == BUFFER_SIZE) { ! dumpBuffer ! ( entailments.toArray( new SPO[entailments.size()] ), ! btree ! ); ! entailments.clear(); } ! entailments.add( new SPO(_s, _p, _o) ); stats.numComputed++; } - if(debug)dumpBuffer( entailments.toArray( new SPO[entailments.size()] ), btree ); ! stats.computeTime = System.currentTimeMillis() - computeStart; return stats; --- 73,110 ---- ); ! IEntryIterator it = store.getPOSIndex().rangeIterator(startKey, endKey); ! ! int n = 0; while ( it.hasNext() ) { it.next(); + stats.stmts1++; + + SPO stmt = new SPO(KeyOrder.POS, store.keyBuilder, it.getKey()); + + // @todo review -- should this be substituting stmt.s in each case? long _s = head.s.isVar() ? stmt.s : head.s.id; long _p = head.p.isVar() ? stmt.s : head.p.id; long _o = head.o.isVar() ? stmt.s : head.o.id; ! buffer[n++] = new SPO(_s, _p, _o); ! ! if (n == buffer.length) { ! ! insertStatements(buffer, n, tmpStore); ! ! n = 0; ! } ! stats.numComputed++; } ! insertStatements( buffer, n, tmpStore ); ! ! stats.computeTime += System.currentTimeMillis() - computeStart; return stats; |