From: Mike P. <mrp...@us...> - 2007-02-17 03:08:10
|
Update of /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv24378/bigdata-rdf/src/java/com/bigdata/rdf/inf Modified Files: AbstractRuleRdfs511.java Rule.java InferenceEngine.java AbstractRuleRdfs2379.java RuleRdf01.java AbstractRuleRdf.java AbstractRuleRdfs68101213.java Log Message: Converted entailment collection arrays to btrees. Index: InferenceEngine.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/InferenceEngine.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** InferenceEngine.java 9 Feb 2007 21:19:26 -0000 1.8 --- InferenceEngine.java 17 Feb 2007 03:07:59 -0000 1.9 *************** *** 55,58 **** --- 55,59 ---- import com.bigdata.objndx.IIndex; import com.bigdata.rdf.KeyOrder; + import com.bigdata.rdf.TempTripleStore; import com.bigdata.rdf.TripleStore; import com.bigdata.rdf.inf.TestMagicSets.MagicRule; *************** *** 284,321 **** final int nrules = rules.length; ! int firstStatementCount = getStatementCount(); ! ! int lastStatementCount = firstStatementCount; final long begin = System.currentTimeMillis(); ! System.err.println("Closing kb with " + lastStatementCount + " statements"); ! int nadded = 0; while (true) { for (int i = 0; i < nrules; i++) { Rule rule = rules[i]; ! nadded += rule.apply(); } ! ! int statementCount = getStatementCount(); ! ! // testing the #of statement is less prone to error. ! if (lastStatementCount == statementCount) { ! ! // if( nadded == 0 ) { // should also work. ! // This is the fixed point. break; ! } ! lastStatementCount = statementCount; } --- 285,341 ---- final int nrules = rules.length; ! final int firstStatementCount = getStatementCount(); final long begin = System.currentTimeMillis(); ! log.debug("Closing kb with " + firstStatementCount + " statements"); ! int round = 0; ! ! TempTripleStore entailments = new TempTripleStore(); while (true) { + int numComputed = 0; + + long computeTime = 0; + + int numEntailmentsBefore = entailments.getStatementCount(); + for (int i = 0; i < nrules; i++) { Rule rule = rules[i]; ! Rule.Stats stats = rule.apply( entailments ); ! ! numComputed += stats.numComputed; + computeTime += stats.computeTime; + } ! ! int numEntailmentsAfter = entailments.getStatementCount(); ! ! if ( numEntailmentsBefore == numEntailmentsAfter ) { ! // This is the fixed point. break; ! } ! long insertStart = System.currentTimeMillis(); ! ! int numInserted = transferBTrees( entailments ); ! ! long insertTime = System.currentTimeMillis() - insertStart; ! ! StringBuilder debug = new StringBuilder(); ! debug.append( "round #" ).append( round++ ).append( ": " ); ! debug.append( numComputed ).append( " computed in " ); ! debug.append( computeTime ).append( " millis, " ); ! debug.append( numInserted ).append( " inserted in " ); ! debug.append( insertTime ).append( " millis " ); ! log.debug( debug.toString() ); } *************** *** 323,331 **** final long elapsed = System.currentTimeMillis() - begin; ! System.err.println("Closed store in " + elapsed + "ms yeilding " + lastStatementCount + " statements total, " + (lastStatementCount - firstStatementCount) + " inferences"); } /** --- 343,389 ---- final long elapsed = System.currentTimeMillis() - begin; ! final int lastStatementCount = getStatementCount(); ! ! log.debug("Closed store in " + elapsed + "ms yeilding " + lastStatementCount + " statements total, " + (lastStatementCount - firstStatementCount) + " inferences"); } + + private int transferBTrees( TempTripleStore entailments ) { + + int numInserted = 0; + + IEntryIterator it = entailments.getSPOIndex().rangeIterator(null, null); + while (it.hasNext()) { + it.next(); + byte[] key = it.getKey(); + if (!getSPOIndex().contains(key)) { + numInserted++; + getSPOIndex().insert(key, null); + } + } + + it = entailments.getPOSIndex().rangeIterator(null, null); + while (it.hasNext()) { + it.next(); + byte[] key = it.getKey(); + if (!getPOSIndex().contains(key)) { + getPOSIndex().insert(key, null); + } + } + + it = entailments.getOSPIndex().rangeIterator(null, null); + while (it.hasNext()) { + it.next(); + byte[] key = it.getKey(); + if (!getOSPIndex().contains(key)) { + getOSPIndex().insert(key, null); + } + } + + return numInserted; + + } /** *************** *** 441,445 **** Rule rule = rules[i]; ! nadded += rule.apply(); } --- 499,504 ---- Rule rule = rules[i]; ! // nadded += rule.apply(); ! // rule.apply(); } Index: AbstractRuleRdfs511.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/AbstractRuleRdfs511.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** AbstractRuleRdfs511.java 9 Feb 2007 20:18:56 -0000 1.3 --- AbstractRuleRdfs511.java 17 Feb 2007 03:07:59 -0000 1.4 *************** *** 48,51 **** --- 48,53 ---- import com.bigdata.rdf.KeyOrder; + import com.bigdata.rdf.TempTripleStore; + import com.bigdata.rdf.inf.Rule.Stats; *************** *** 62,68 **** } ! protected SPO[] collectEntailments() { ! // the predicate is fixed for all parts of the rule. final long p = head.p.id; --- 64,74 ---- } ! public Stats apply( TempTripleStore entailments ) { ! Stats stats = new Stats(); ! ! long computeStart = System.currentTimeMillis(); ! ! // the predicate is fixed for all parts of the rule. final long p = head.p.id; *************** *** 91,95 **** SPO[] stmts2 = stmts1.clone(); ! Vector<SPO> v = new Vector<SPO>(); // the simplest n^2 algorithm for( int i = 0; i < stmts1.length; i++ ) { --- 97,101 ---- SPO[] stmts2 = stmts1.clone(); ! Vector<SPO> stmts3 = new Vector<SPO>(BUFFER_SIZE); // the simplest n^2 algorithm for( int i = 0; i < stmts1.length; i++ ) { *************** *** 97,106 **** for ( int j = 0; j < stmts2.length; j++ ) { if ( stmts1[i].o == stmts2[j].s ) { ! v.add( new SPO(stmts1[i].s, p, stmts2[j].o) ); } } } ! return v.toArray( new SPO[v.size()] ); } --- 103,123 ---- for ( int j = 0; j < stmts2.length; j++ ) { if ( stmts1[i].o == stmts2[j].s ) { ! if (stmts3.size() == BUFFER_SIZE) { ! dumpBuffer ! ( stmts3.toArray( new SPO[stmts3.size()] ), ! entailments ! ); ! stmts3.clear(); ! } ! stmts3.add( new SPO(stmts1[i].s, p, stmts2[j].o) ); ! stats.numComputed++; } } } + dumpBuffer( stmts3.toArray( new SPO[stmts3.size()] ), entailments ); ! stats.computeTime = System.currentTimeMillis() - computeStart; ! ! return stats; } Index: AbstractRuleRdfs2379.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/AbstractRuleRdfs2379.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** AbstractRuleRdfs2379.java 9 Feb 2007 20:18:56 -0000 1.2 --- AbstractRuleRdfs2379.java 17 Feb 2007 03:07:59 -0000 1.3 *************** *** 48,51 **** --- 48,52 ---- import com.bigdata.rdf.KeyOrder; + import com.bigdata.rdf.TempTripleStore; *************** *** 62,69 **** } ! protected SPO[] collectEntailments() { // create a place to hold the entailments ! Vector<SPO> stmts3 = new Vector<SPO>(); SPO[] stmts1 = getStmts1(); --- 63,74 ---- } ! public Stats apply( TempTripleStore entailments ) { + Stats stats = new Stats(); + + long computeStart = System.currentTimeMillis(); + // create a place to hold the entailments ! Vector<SPO> stmts3 = new Vector<SPO>(BUFFER_SIZE); SPO[] stmts1 = getStmts1(); *************** *** 71,79 **** SPO[] stmts2 = getStmts2( stmts1[i] ); for ( int j = 0; j < stmts2.length; j++ ) { stmts3.add( buildStmt3( stmts1[i], stmts2[j] ) ); } } ! return stmts3.toArray( new SPO[stmts3.size()] ); } --- 76,95 ---- SPO[] stmts2 = getStmts2( stmts1[i] ); for ( int j = 0; j < stmts2.length; j++ ) { + if (stmts3.size() == BUFFER_SIZE) { + dumpBuffer + ( stmts3.toArray( new SPO[stmts3.size()] ), + entailments + ); + stmts3.clear(); + } stmts3.add( buildStmt3( stmts1[i], stmts2[j] ) ); + stats.numComputed++; } } + dumpBuffer( stmts3.toArray( new SPO[stmts3.size()] ), entailments ); + + stats.computeTime = System.currentTimeMillis() - computeStart; ! return stats; } Index: RuleRdf01.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/RuleRdf01.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** RuleRdf01.java 9 Feb 2007 20:18:56 -0000 1.3 --- RuleRdf01.java 17 Feb 2007 03:07:59 -0000 1.4 *************** *** 48,51 **** --- 48,52 ---- import com.bigdata.objndx.IEntryIterator; import com.bigdata.rdf.KeyOrder; + import com.bigdata.rdf.TempTripleStore; *************** *** 61,67 **** } ! protected SPO[] collectEntailments() { ! Vector<SPO> entailments = new Vector<SPO>(); long lastP = -1; --- 62,72 ---- } ! public Stats apply( TempTripleStore btree ) { ! Stats stats = new Stats(); ! ! long computeStart = System.currentTimeMillis(); ! ! Vector<SPO> entailments = new Vector<SPO>(BUFFER_SIZE); long lastP = -1; *************** *** 80,91 **** lastP = stmt.p; entailments.add ( new SPO(stmt.p, store.rdfType.id, store.rdfProperty.id) ); } } ! return entailments.toArray( new SPO[entailments.size()] ); } --- 85,107 ---- lastP = stmt.p; + if (entailments.size() == BUFFER_SIZE) { + dumpBuffer + ( entailments.toArray( new SPO[entailments.size()] ), + btree + ); + entailments.clear(); + } entailments.add ( new SPO(stmt.p, store.rdfType.id, store.rdfProperty.id) ); + stats.numComputed++; } } + dumpBuffer( entailments.toArray( new SPO[entailments.size()] ), btree ); ! stats.computeTime = System.currentTimeMillis() - computeStart; ! ! return stats; } Index: Rule.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/Rule.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** Rule.java 27 Jan 2007 15:58:57 -0000 1.2 --- Rule.java 17 Feb 2007 03:07:59 -0000 1.3 *************** *** 44,47 **** --- 44,49 ---- package com.bigdata.rdf.inf; + import com.bigdata.rdf.TempTripleStore; + /** *************** *** 100,107 **** * Apply the rule to the statement in the store. * ! * @param store ! * The triple store. * ! * @return The #of statements added to the store. * * @todo support conditional insert in the btree so that we do not have --- 102,109 ---- * Apply the rule to the statement in the store. * ! * @param entailments ! * The temporary triple store used to hold entailments. * ! * @return Statistics related to what the rule did. * * @todo support conditional insert in the btree so that we do not have *************** *** 116,120 **** * statements that they will insert. */ ! abstract public int apply(); } \ No newline at end of file --- 118,136 ---- * statements that they will insert. */ ! abstract public Stats apply( TempTripleStore entailments ); ! ! ! /** ! * Statistics about what the Rule did during {@link Rule#apply()}. ! * ! * @author mikep ! */ ! public static class Stats { ! ! public int numComputed; ! ! long computeTime; ! ! } } \ No newline at end of file Index: AbstractRuleRdf.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/AbstractRuleRdf.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** AbstractRuleRdf.java 9 Feb 2007 20:18:56 -0000 1.3 --- AbstractRuleRdf.java 17 Feb 2007 03:07:59 -0000 1.4 *************** *** 48,56 **** --- 48,62 ---- import org.openrdf.model.URI; + import com.bigdata.objndx.IEntryIterator; import com.bigdata.objndx.IIndex; + import com.bigdata.rdf.KeyOrder; + import com.bigdata.rdf.TempTripleStore; public abstract class AbstractRuleRdf extends Rule { + protected final int BUFFER_SIZE = 10*1024*1024; + + public AbstractRuleRdf(InferenceEngine store, Triple head, Pred[] body) { *************** *** 59,102 **** } ! public int apply() { ! // long startTime = System.currentTimeMillis(); ! ! SPO[] entailments = collectEntailments(); ! /* ! long collectionTime = System.currentTimeMillis() - startTime; ! System.out.println( getClass().getName() + " collected " + ! entailments.length + " entailments in " + ! collectionTime + " millis" ); ! int numStmtsBefore = store.ndx_spo.getEntryCount(); ! ! System.out.println( getClass().getName() + ! " number of statements before: " + ! numStmtsBefore); ! ! startTime = System.currentTimeMillis(); ! */ ! int numAdded = insertEntailments( entailments ); ! /* ! long insertionTime = System.currentTimeMillis() - startTime; ! ! int numStmtsAfter = store.ndx_spo.getEntryCount(); ! ! System.out.println( getClass().getName() + ! " number of statements after: " + ! numStmtsAfter); ! ! System.out.println( getClass().getName() + ! " inserted " + ( numStmtsAfter - numStmtsBefore ) + ! " statements in " + insertionTime + " millis"); ! */ ! return numAdded; ! ! } ! protected abstract SPO[] collectEntailments(); protected int insertEntailments( SPO[] entailments ) { --- 65,110 ---- } ! public abstract Stats apply( TempTripleStore entailments ); ! protected void dumpBuffer( SPO[] stmts, TempTripleStore btree ) { ! // deal with the SPO index ! IIndex spo = btree.getSPOIndex(); ! Arrays.sort(stmts,SPOComparator.INSTANCE); ! for ( int i = 0; i < stmts.length; i++ ) { ! byte[] key = btree.keyBuilder.statement2Key ! ( stmts[i].s, stmts[i].p, stmts[i].o ! ); ! if ( !spo.contains(key) ) { ! spo.insert(key, null); ! } ! } ! // deal with the POS index ! IIndex pos = btree.getPOSIndex(); ! Arrays.sort(stmts,POSComparator.INSTANCE); ! for ( int i = 0; i < stmts.length; i++ ) { ! byte[] key = btree.keyBuilder.statement2Key ! ( stmts[i].p, stmts[i].o, stmts[i].s ! ); ! if ( !pos.contains(key) ) { ! pos.insert(key, null); ! } ! } ! // deal with the OSP index ! IIndex osp = btree.getOSPIndex(); ! Arrays.sort(stmts,OSPComparator.INSTANCE); ! for ( int i = 0; i < stmts.length; i++ ) { ! byte[] key = btree.keyBuilder.statement2Key ! ( stmts[i].o, stmts[i].s, stmts[i].p ! ); ! if ( !osp.contains(key) ) { ! osp.insert(key, null); ! } ! } + } + protected int insertEntailments( SPO[] entailments ) { *************** *** 144,147 **** --- 152,161 ---- } + protected int insertEntailments2( TempTripleStore entailments ) { + + return insertEntailments( convert( entailments ) ); + + } + protected void printStatement( SPO stmt ) { *************** *** 164,166 **** --- 178,216 ---- } + protected TempTripleStore convert( SPO[] stmts ) { + + TempTripleStore tts = new TempTripleStore(); + + for ( int i = 0; i < stmts.length; i++ ) { + + tts.addStatement( stmts[i].s, stmts[i].p, stmts[i].o ); + + } + + return tts; + + } + + protected SPO[] convert( TempTripleStore tts ) { + + SPO[] stmts = new SPO[tts.getStatementCount()]; + + int i = 0; + + IIndex ndx_spo = tts.getSPOIndex(); + + IEntryIterator it = ndx_spo.rangeIterator(null, null); + + while ( it.hasNext() ) { + + it.next(); + + stmts[i++] = new SPO(KeyOrder.SPO, tts.keyBuilder, it.getKey()); + + } + + return stmts; + + } + } \ No newline at end of file Index: AbstractRuleRdfs68101213.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/AbstractRuleRdfs68101213.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** AbstractRuleRdfs68101213.java 9 Feb 2007 20:18:56 -0000 1.3 --- AbstractRuleRdfs68101213.java 17 Feb 2007 03:07:59 -0000 1.4 *************** *** 48,51 **** --- 48,52 ---- import com.bigdata.objndx.IEntryIterator; import com.bigdata.rdf.KeyOrder; + import com.bigdata.rdf.TempTripleStore; *************** *** 62,68 **** } ! protected SPO[] collectEntailments() { ! Vector<SPO> entailments = new Vector<SPO>(); byte[] startKey = store.keyBuilder.statement2Key --- 63,73 ---- } ! public Stats apply( TempTripleStore btree ) { ! Stats stats = new Stats(); ! ! long computeStart = System.currentTimeMillis(); ! ! Vector<SPO> entailments = new Vector<SPO>(BUFFER_SIZE); byte[] startKey = store.keyBuilder.statement2Key *************** *** 85,94 **** long _p = head.p.isVar() ? stmt.s : head.p.id; long _o = head.o.isVar() ? stmt.s : head.o.id; ! entailments.add( new SPO(_s, _p, _o) ); } ! return entailments.toArray( new SPO[entailments.size()] ); } --- 90,110 ---- long _p = head.p.isVar() ? stmt.s : head.p.id; long _o = head.o.isVar() ? stmt.s : head.o.id; ! ! if (entailments.size() == BUFFER_SIZE) { ! dumpBuffer ! ( entailments.toArray( new SPO[entailments.size()] ), ! btree ! ); ! entailments.clear(); ! } entailments.add( new SPO(_s, _p, _o) ); + stats.numComputed++; } + dumpBuffer( entailments.toArray( new SPO[entailments.size()] ), btree ); ! stats.computeTime = System.currentTimeMillis() - computeStart; ! ! return stats; } |