You can subscribe to this list here.
2006 |
Jan
|
Feb
|
Mar
(414) |
Apr
(123) |
May
(448) |
Jun
(180) |
Jul
(17) |
Aug
(49) |
Sep
(3) |
Oct
(92) |
Nov
(101) |
Dec
(64) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2007 |
Jan
(132) |
Feb
(230) |
Mar
(146) |
Apr
(146) |
May
|
Jun
|
Jul
(34) |
Aug
(4) |
Sep
(3) |
Oct
(10) |
Nov
(12) |
Dec
(24) |
2008 |
Jan
(6) |
Feb
|
Mar
|
Apr
|
May
(1) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(11) |
Nov
(4) |
Dec
|
2009 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
|
Nov
|
Dec
|
From: Bryan T. <tho...@us...> - 2007-02-17 21:34:26
|
Update of /cvsroot/cweb/bigdata/src/java/com/bigdata/objndx In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv12401/src/java/com/bigdata/objndx Modified Files: BTreeMetadata.java BTree.java FusedView.java Added Files: ReadOnlyIndex.java Log Message: Working through transactional isolation. Index: BTree.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/objndx/BTree.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** BTree.java 15 Feb 2007 14:23:49 -0000 1.33 --- BTree.java 17 Feb 2007 21:34:21 -0000 1.34 *************** *** 438,442 **** * The hard reference queue for {@link Leaf}s. * ! * @see BTreeMetadata#read(IRawStore, long) */ public BTree(IRawStore store, BTreeMetadata metadata, --- 438,447 ---- * The hard reference queue for {@link Leaf}s. * ! * @see BTreeMetadata#load(IRawStore, long), which will re-load a ! * {@link BTree} or derived class from its {@link BTreeMetadata} ! * record. ! * ! * @see #newMetadata(), which must be overriden if you subclass ! * {@link BTreeMetadata} */ public BTree(IRawStore store, BTreeMetadata metadata, Index: BTreeMetadata.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/objndx/BTreeMetadata.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** BTreeMetadata.java 13 Feb 2007 23:01:02 -0000 1.9 --- BTreeMetadata.java 17 Feb 2007 21:34:18 -0000 1.10 *************** *** 3,6 **** --- 3,7 ---- import java.io.Externalizable; import java.io.Serializable; + import java.lang.reflect.Constructor; import java.nio.ByteBuffer; *************** *** 47,50 **** --- 48,56 ---- public final IValueSerializer valueSer; + /** + * The name of the class that will be used to re-load the index. + */ + public final String className; + public final RecordCompressor recordCompressor; *************** *** 95,105 **** this.nentries = btree.nentries; ! this.valueSer = btree.nodeSer.valueSerializer; this.recordCompressor = btree.nodeSer.recordCompressor; this.useChecksum = btree.nodeSer.useChecksum; ! /* * Note: This can not be invoked here since a derived class will not --- 101,113 ---- this.nentries = btree.nentries; ! this.valueSer = btree.nodeSer.valueSerializer; + this.className = btree.getClass().getName(); + this.recordCompressor = btree.nodeSer.recordCompressor; this.useChecksum = btree.nodeSer.useChecksum; ! /* * Note: This can not be invoked here since a derived class will not *************** *** 130,135 **** * @param addr * the address of the metadata record. ! * * @return the metadata record. */ public static BTreeMetadata read(IRawStore store, long addr) { --- 138,149 ---- * @param addr * the address of the metadata record. ! * * @return the metadata record. + * + * @see #load(IRawStore, long), which will load the btree not just the + * {@link BTreeMetadata}. + * + * @todo review remaining uses of this method vs + * {@link #load(IRawStore, long)}. */ public static BTreeMetadata read(IRawStore store, long addr) { *************** *** 139,142 **** --- 153,201 ---- } + + /** + * Re-load the {@link BTree} or derived class from the store. The + * {@link BTree} or derived class MUST provide a public construct with the + * following signature: <code> + * + * <i>className</i>(IRawStore store, BTreeMetadata metadata) + * + * </code> + * + * @param store + * The store. + * @param addr + * The address of the {@link BTreeMetadata} record for that + * class. + * + * @return The {@link BTree} or derived class loaded from that + * {@link BTreeMetadata} record. + * + * @see BTree#newMetadata(), which MUST be overloaded if you subclass extend + * {@link BTreeMetadata}. + */ + public static BTree load(IRawStore store, long addr) { + + BTreeMetadata metadata = read(store, addr); + + try { + + Class cl = Class.forName(metadata.className); + + Constructor ctor = cl.getConstructor(new Class[] { + IRawStore.class, BTreeMetadata.class }); + + BTree btree = (BTree) ctor.newInstance(new Object[] { store, + metadata }); + + return btree; + + } catch(Exception ex) { + + throw new RuntimeException(ex); + + } + + } // /** Index: FusedView.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/objndx/FusedView.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** FusedView.java 13 Feb 2007 23:01:02 -0000 1.4 --- FusedView.java 17 Feb 2007 21:34:21 -0000 1.5 *************** *** 51,56 **** import java.util.NoSuchElementException; - import com.bigdata.isolation.Value; - /** * <p> --- 51,54 ---- *************** *** 64,70 **** * @todo support N sources for a {@link FusedView} by chaining together multiple * {@link FusedView} instances if not in a more efficient manner. - * - * @todo Do a variant that uses {@link Value}s and supports a merged view - * showing only the most recent data version under any given key.gb */ public class FusedView implements IIndex { --- 62,65 ---- *************** *** 251,254 **** --- 246,255 ---- } + /** + * Helper class merges entries from the sources in the view. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ protected static class FusedEntryIterator implements IEntryIterator { --- NEW FILE: ReadOnlyIndex.java --- /** The Notice below must appear in each file of the Source Code of any copy you distribute of the Licensed Product. Contributors to any Modifications may add their own copyright notices to identify their own contributions. License: The contents of this file are subject to the CognitiveWeb Open Source License Version 1.1 (the License). You may not copy or use this file, in either source code or executable form, except in compliance with the License. You may obtain a copy of the License from http://www.CognitiveWeb.org/legal/license/ Software distributed under the License is distributed on an AS IS basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. Copyrights: Portions created by or assigned to CognitiveWeb are Copyright (c) 2003-2003 CognitiveWeb. All Rights Reserved. Contact information for CognitiveWeb is available at http://www.CognitiveWeb.org Portions Copyright (c) 2002-2003 Bryan Thompson. Acknowledgements: Special thanks to the developers of the Jabber Open Source License 1.0 (JOSL), from which this License was derived. This License contains terms that differ from JOSL. Special thanks to the CognitiveWeb Open Source Contributors for their suggestions and support of the Cognitive Web. Modifications: */ /* * Created on Feb 16, 2007 */ package com.bigdata.objndx; /** * A fly-weight wrapper that does not permit write operations and reads * through onto an underlying {@link IIndex}. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ public class ReadOnlyIndex implements IIndex, IRangeQuery { private final IIndex src; public ReadOnlyIndex(IIndex src) { if(src==null) throw new IllegalArgumentException(); this.src = src; } public boolean contains(byte[] key) { return src.contains(key); } public Object insert(Object key, Object value) { throw new UnsupportedOperationException(); } public Object lookup(Object key) { return src.lookup(key); } public Object remove(Object key) { throw new UnsupportedOperationException(); } public int rangeCount(byte[] fromKey, byte[] toKey) { return src.rangeCount(fromKey, toKey); } public IEntryIterator rangeIterator(byte[] fromKey, byte[] toKey) { return src.rangeIterator(fromKey, toKey); } public void contains(BatchContains op) { src.contains(op); } public void insert(BatchInsert op) { throw new UnsupportedOperationException(); } public void lookup(BatchLookup op) { src.lookup(op); } public void remove(BatchRemove op) { throw new UnsupportedOperationException(); } } |
From: Bryan T. <tho...@us...> - 2007-02-17 21:34:25
|
Update of /cvsroot/cweb/bigdata/src/java/com/bigdata/scaleup In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv12401/src/java/com/bigdata/scaleup Modified Files: PartitionedJournal.java Name2MetadataAddr.java SlaveJournal.java Log Message: Working through transactional isolation. Index: Name2MetadataAddr.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/scaleup/Name2MetadataAddr.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** Name2MetadataAddr.java 15 Feb 2007 14:23:50 -0000 1.2 --- Name2MetadataAddr.java 17 Feb 2007 21:34:21 -0000 1.3 *************** *** 73,77 **** protected IIndex loadBTree(IRawStore store, String name, long addr) { ! return new MetadataIndex(this.store, BTreeMetadata.read(this.store, addr)); } --- 73,77 ---- protected IIndex loadBTree(IRawStore store, String name, long addr) { ! return (MetadataIndex)BTreeMetadata.load(this.store, addr); } Index: SlaveJournal.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/scaleup/SlaveJournal.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** SlaveJournal.java 10 Feb 2007 15:37:44 -0000 1.1 --- SlaveJournal.java 17 Feb 2007 21:34:21 -0000 1.2 *************** *** 71,75 **** * for the store. */ ! public static transient final int ROOT_NAME_2_METADATA_ADDR = 1; /** --- 71,75 ---- * for the store. */ ! public static transient final int ROOT_NAME_2_METADATA_ADDR = 2; /** *************** *** 129,133 **** // the root address of the btree. ! long addr = getAddr(ROOT_NAME_2_METADATA_ADDR); if (addr == 0L) { --- 129,133 ---- // the root address of the btree. ! long addr = getRootAddr(ROOT_NAME_2_METADATA_ADDR); if (addr == 0L) { *************** *** 148,153 **** */ ! name2MetadataAddr = new Name2MetadataAddr(this, BTreeMetadata ! .read(this, addr)); } --- 148,153 ---- */ ! name2MetadataAddr = (Name2MetadataAddr)BTreeMetadata ! .load(this, addr); } Index: PartitionedJournal.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/scaleup/PartitionedJournal.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** PartitionedJournal.java 15 Feb 2007 22:01:19 -0000 1.7 --- PartitionedJournal.java 17 Feb 2007 21:34:21 -0000 1.8 *************** *** 54,57 **** --- 54,58 ---- import java.util.Properties; + import com.bigdata.journal.CommitRecordIndex; import com.bigdata.journal.ICommitter; import com.bigdata.journal.IJournal; *************** *** 608,611 **** --- 609,617 ---- * </p> * + * FIXME handle the {@link CommitRecordIndex} during overflow. This can + * either be handled like the metadata indices (with a secondary index) or + * by extending the {@link CommitRecordIndex} so as to carry more + * information about {@link SlaveJournal} locations for commit records. + * * @todo implement asynchronous overflow. * @todo implement using compacting merges only. *************** *** 1163,1172 **** } ! public void commit() { ! slave.commit(); } ! public long getAddr(int rootSlot) { ! return slave.getAddr(rootSlot); } --- 1169,1178 ---- } ! public long commit() { ! return slave.commit(); } ! public long getRootAddr(int rootSlot) { ! return slave.getRootAddr(rootSlot); } |
From: Bryan T. <tho...@us...> - 2007-02-17 21:34:25
|
Update of /cvsroot/cweb/bigdata/src/test/com/bigdata/scaleup In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv12401/src/test/com/bigdata/scaleup Modified Files: TestMetadataIndex.java Log Message: Working through transactional isolation. Index: TestMetadataIndex.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/test/com/bigdata/scaleup/TestMetadataIndex.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** TestMetadataIndex.java 15 Feb 2007 22:01:19 -0000 1.5 --- TestMetadataIndex.java 17 Feb 2007 21:34:21 -0000 1.6 *************** *** 61,64 **** --- 61,65 ---- import com.bigdata.objndx.AbstractBTreeTestCase; import com.bigdata.objndx.BTree; + import com.bigdata.objndx.BTreeMetadata; import com.bigdata.objndx.BatchInsert; import com.bigdata.objndx.FusedView; *************** *** 268,272 **** // re-load the index. ! md = new MetadataIndex(store,MetadataIndexMetadata.read(store, addr)); assertEquals("name","abc",md.getName()); --- 269,273 ---- // re-load the index. ! md = (MetadataIndex)BTreeMetadata.load(store, addr); assertEquals("name","abc",md.getName()); |
From: Bryan T. <tho...@us...> - 2007-02-17 21:34:25
|
Update of /cvsroot/cweb/bigdata/src/test/com/bigdata/isolation In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv12401/src/test/com/bigdata/isolation Modified Files: TestUnisolatedBTree.java Log Message: Working through transactional isolation. Index: TestUnisolatedBTree.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/test/com/bigdata/isolation/TestUnisolatedBTree.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** TestUnisolatedBTree.java 15 Feb 2007 22:01:19 -0000 1.4 --- TestUnisolatedBTree.java 17 Feb 2007 21:34:21 -0000 1.5 *************** *** 106,110 **** final long addr = btree.write(); ! btree = new UnisolatedBTree(store,BTreeMetadata.read(store, addr)); assertTrue(store==btree.getStore()); --- 106,110 ---- final long addr = btree.write(); ! btree = (UnisolatedBTree)BTreeMetadata.load(store, addr); assertTrue(store==btree.getStore()); *************** *** 124,128 **** final long addr = btree.write(); ! btree = new UnisolatedBTree(store, BTreeMetadata.read(store, addr)); assertTrue(store == btree.getStore()); --- 124,128 ---- final long addr = btree.write(); ! btree = (UnisolatedBTree)BTreeMetadata.load(store, addr); assertTrue(store == btree.getStore()); *************** *** 412,416 **** final long addr1 = btree.write(); ! btree = new UnisolatedBTree(store,BTreeMetadata.read(store, addr1)); assertSameIterator(new Object[]{v3,v5,v7},btree.entryIterator()); --- 412,416 ---- final long addr1 = btree.write(); ! btree = (UnisolatedBTree)BTreeMetadata.load(store, addr1); assertSameIterator(new Object[]{v3,v5,v7},btree.entryIterator()); *************** *** 426,430 **** final long addr2 = btree.write(); ! btree = new UnisolatedBTree(store,BTreeMetadata.read(store, addr2)); assertSameIterator(new Object[]{v3,v7},btree.entryIterator()); --- 426,430 ---- final long addr2 = btree.write(); ! btree = (UnisolatedBTree)BTreeMetadata.load(store, addr2); assertSameIterator(new Object[]{v3,v7},btree.entryIterator()); *************** *** 440,444 **** final long addr3 = btree.write(); ! btree = new UnisolatedBTree(store,BTreeMetadata.read(store, addr3)); assertSameIterator(new Object[]{v3,v7a},btree.entryIterator()); --- 440,444 ---- final long addr3 = btree.write(); ! btree = (UnisolatedBTree)BTreeMetadata.load(store, addr3); assertSameIterator(new Object[]{v3,v7a},btree.entryIterator()); *************** *** 455,459 **** final long addr4 = btree.write(); ! btree = new UnisolatedBTree(store,BTreeMetadata.read(store, addr4)); assertSameIterator(new Object[]{v3,v5a,v7a},btree.entryIterator()); --- 455,459 ---- final long addr4 = btree.write(); ! btree = (UnisolatedBTree)BTreeMetadata.load(store, addr4); assertSameIterator(new Object[]{v3,v5a,v7a},btree.entryIterator()); |
From: Mike P. <mrp...@us...> - 2007-02-17 03:08:11
|
Update of /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv24378/bigdata-rdf/src/java/com/bigdata/rdf Added Files: TempTripleStore.java Log Message: Converted entailment collection arrays to btrees. --- NEW FILE: TempTripleStore.java --- /** The Notice below must appear in each file of the Source Code of any copy you distribute of the Licensed Product. Contributors to any Modifications may add their own copyright notices to identify their own contributions. License: The contents of this file are subject to the CognitiveWeb Open Source License Version 1.1 (the License). You may not copy or use this file, in either source code or executable form, except in compliance with the License. You may obtain a copy of the License from http://www.CognitiveWeb.org/legal/license/ Software distributed under the License is distributed on an AS IS basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. Copyrights: Portions created by or assigned to CognitiveWeb are Copyright (c) 2003-2003 CognitiveWeb. All Rights Reserved. Contact information for CognitiveWeb is available at http://www.CognitiveWeb.org Portions Copyright (c) 2002-2003 Bryan Thompson. Acknowledgements: Special thanks to the developers of the Jabber Open Source License 1.0 (JOSL), from which this License was derived. This License contains terms that differ from JOSL. Special thanks to the CognitiveWeb Open Source Contributors for their suggestions and support of the Cognitive Web. Modifications: */ /* * Created on Jan 3, 2007 */ package com.bigdata.rdf; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.util.Arrays; import java.util.Locale; import java.util.Properties; import org.apache.log4j.Logger; import org.openrdf.model.Resource; import org.openrdf.model.URI; import org.openrdf.model.Value; import com.bigdata.journal.IJournal; import com.bigdata.journal.Journal; import com.bigdata.journal.RootBlockView; import com.bigdata.journal.TemporaryStore; import com.bigdata.objndx.BTree; import com.bigdata.objndx.IEntryIterator; import com.bigdata.objndx.IIndex; import com.bigdata.objndx.KeyBuilder; import com.bigdata.rawstore.Bytes; import com.bigdata.rdf.inf.SPO; import com.bigdata.rdf.model.OptimizedValueFactory.OSPComparator; import com.bigdata.rdf.model.OptimizedValueFactory.POSComparator; import com.bigdata.rdf.model.OptimizedValueFactory.SPOComparator; import com.bigdata.rdf.model.OptimizedValueFactory.TermIdComparator; import com.bigdata.rdf.model.OptimizedValueFactory._Statement; import com.bigdata.rdf.model.OptimizedValueFactory._Value; import com.bigdata.rdf.model.OptimizedValueFactory._ValueSortKeyComparator; import com.bigdata.rdf.rio.IRioLoader; import com.bigdata.rdf.rio.PresortRioLoader; import com.bigdata.rdf.rio.RioLoaderEvent; import com.bigdata.rdf.rio.RioLoaderListener; import com.bigdata.rdf.serializers.RdfValueSerializer; import com.bigdata.rdf.serializers.StatementSerializer; import com.bigdata.rdf.serializers.TermIdSerializer; import com.bigdata.scaleup.PartitionedIndex; import com.bigdata.scaleup.PartitionedJournal; import com.bigdata.scaleup.SlaveJournal; import com.ibm.icu.text.Collator; import com.ibm.icu.text.RuleBasedCollator; /** * A triple store based on the <em>bigdata</em> architecture. * * @todo Refactor to use a delegation mechanism so that we can run with or * without partitioned indices? (All you have to do now is change the * class that is being extended from Journal to PartitionedJournal and * handle some different initialization properties.) * * @todo Play with the branching factor again. Now that we are using overflow * to evict data onto index segments we can use a higher branching factor * and simply evict more often. Is this worth it? We might want a lower * branching factor on the journal since we can not tell how large any * given write will be and then use larger branching factors on the index * segments. * * @todo try loading some very large data sets; try Transient vs Disk vs Direct * modes. If Transient has significantly better performance then it * indicates that we are waiting on IO so introduce AIO support in the * Journal and try Disk vs Direct with aio. Otherwise, consider * refactoring the btree to have the values be variable length byte[]s * with serialization in the client and other tuning focused on IO (the * only questions with that approach are appropriate compression * techniques and handling transparently timestamps as part of the value * when using an isolated btree in a transaction). * * @todo the only added cost for a quad store is the additional statement * indices. There are only three more statement indices in a quad store. * Since statement indices are so cheap, it is probably worth implementing * them now, even if only as a configuration option. * * @todo verify read after commit (restart safe) for large data sets and test * re-load rate for a data set and verify that no new statements are * added. * * @todo add bulk data export (buffering statements and bulk resolving term * identifiers). * * @todo The use of long[] identifiers for statements also means that the SPO * and other statement indices are only locally ordered so they can not be * used to perform a range scan that is ordered in the terms without * joining against the various term indices and then sorting the outputs. * * @todo possibly save frequently seen terms in each batch for the next batch in * order to reduce unicode conversions. * * @todo support metadata about the statement, e.g., whether or not it is an * inference. * * @todo compute the MB/sec rate at which the store can load data and compare it * with the maximum transfer rate for the journal without the btree and * the maximum transfer rate to disk. this will tell us the overhead of * the btree implementation. * * @todo Try a variant in which we have metadata linking statements and terms * together. In this case we would have to go back to the terms and update * them to have metadata about the statement. it is a bit circular since * we can not create the statement until we have the terms and we can not * add the metadata to the terms until we have the statement. * * @todo Note that a very interesting solution for RDF places all data into a * statement index and then use block compression techniques to remove * frequent terms, e.g., the repeated parts of the value. Also note that * there will be no "value" for an rdf statement since existence is all. * The key completely encodes the statement. So, another approach is to * bit code the repeated substrings found within the key in each leaf. * This way the serialized key size reflects only the #of distinctions. * * @todo I've been thinking about rdfs stores in the light of the work on * bigdata. Transactional isolation for rdf is really quite simple. Since * lexicons (uri, literal or bnode indices) do not (really) support * deletion, the only acts are asserting term and asserting and retracting * statements. since assertion terms can lead to write-write conflicts, * which must be resolved and can cascade into the statement indices since * the statement key depends directly on the assigned term identifiers. a * statement always merges with an existing statement, inserts never cause * conflicts. Hence the only possible write-write conflict for the * statement indices is a write-delete conflict. quads do not really make * this more complex (or expensive) since merges only occur when there is * a context match. however entailments can cause twists depending on how * they are realized. * * If we do a pure RDF layer (vs RDF over GOM over bigdata), then it seems that * we could simple use a statement index (no lexicons for URIs, etc). Normally * this inflates the index size since you have lots of duplicate strings, but we * could just use block compression to factor out those strings when we evict * index leaves to disk. Prefix compression of keys will already do great things * for removing repetitive strings from the index nodes and block compression * will get at the leftover redundancy. * * So, one dead simple architecture is one index per access path (there is of * course some index reuse across the access paths) with the statements inline * in the index using prefix key compression and block compression to remove * redundancy. Inserts on this architecture would just send triples to the store * and the various indices would be maintained by the store itself. Those * indices could be load balanced in segments across a cluster. * * Since a read that goes through to disk reads an entire leaf at a time, the * most obvious drawback that I see is caching for commonly used assertions, but * that is easy to implement with some cache invalidation mechanism coupled to * deletes. * * I can also see how to realize very large bulk inserts outside of a * transactional context while handling concurrent transactions -- you just have * to reconcile as of the commit time of the bulk insert and you get to do that * using efficient compacting sort-merges of "perfect" bulk index segments. The * architecture would perform well on concurrent apstars style document loading * as well as what we might normally consider a bulk load (a few hundred * megabytes of data) within the normal transaction mechanisms, but if you * needed to ingest uniprot you would want to use a different technique :-) * outside of the normal transactional isolation mechanisms. * * I'm not sure what the right solution is for entailments, e.g., truth * maintenance vs eager closure. Either way, you would definitely want to avoid * tuple at a time processing and batch things up so as to minimize the #of * index tests that you had to do. So, handling entailments and efficient joins * for high-level query languages would be the two places for more thought. And * there are little odd spots in RDF - handling bnodes, typed literals, and the * concept of a total sort order for the statement index. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id: TempTripleStore.java,v 1.1 2007/02/17 03:08:00 mrpersonick Exp $ */ public class TempTripleStore extends TemporaryStore { static transient public Logger log = Logger.getLogger(TempTripleStore.class); public RdfKeyBuilder keyBuilder; /* * Note: You MUST NOT retain hard references to these indices across * operations since they may be discarded and re-loaded. */ private IIndex ndx_spo; private IIndex ndx_pos; private IIndex ndx_osp; final String name_spo = "spo"; final String name_pos = "pos"; final String name_osp = "osp"; /** * Returns and creates iff necessary a scalable restart safe index for RDF * {@link _Statement statements}. * @param name The name of the index. * @return The index. * * @see #name_spo * @see #name_pos * @see #name_osp */ protected IIndex getStatementIndex(String name) { IIndex ndx = getIndex(name); if (ndx == null) { ndx = registerIndex(name, new BTree(this, BTree.DEFAULT_BRANCHING_FACTOR, StatementSerializer.INSTANCE)); } return ndx; } public IIndex getSPOIndex() { if(ndx_spo!=null) return ndx_spo; return ndx_spo = getStatementIndex(name_spo); } public IIndex getPOSIndex() { if(ndx_pos!=null) return ndx_pos; return ndx_pos = getStatementIndex(name_pos); } public IIndex getOSPIndex() { if(ndx_osp!=null) return ndx_osp; return ndx_osp = getStatementIndex(name_osp); } /** * Create or re-open a triple store. * * @todo initialize the locale for the {@link KeyBuilder} from properties or * use the default locale if none is specified. The locale should be a * restart safe property since it effects the sort order of the * term:id index. */ public TempTripleStore() { super(); // setup key builder that handles unicode and primitive data types. KeyBuilder _keyBuilder = new KeyBuilder(createCollator(), Bytes.kilobyte32 * 4); // setup key builder for RDF Values and Statements. keyBuilder = new RdfKeyBuilder(_keyBuilder); } /** * Create and return a new collator object responsible for encoding unicode * strings into sort keys. * * @return A new collator object. * * @todo define properties for configuring the collator. */ private RuleBasedCollator createCollator() { // choose a collator for the default locale. RuleBasedCollator collator = (RuleBasedCollator) Collator .getInstance(Locale.getDefault()); /* * Primary uses case folding and produces smaller sort strings. * * Secondary does not fold case. * * Tertiary is the default. * * Identical is also allowed. * * @todo handle case folding - currently the indices complain, e.g., for * wordnet that a term already exists with a given id "Yellow Pages" vs * "yellow pages". Clearly the logic to fold case needs to extend * further if it is to work. */ // collator.setStrength(Collator.PRIMARY); // collator.setStrength(Collator.SECONDARY); return collator; } /** * The #of triples in the store. * <p> * This may be an estimate when using partitioned indices. */ public int getStatementCount() { return getSPOIndex().rangeCount(null,null); } /** * Add a single statement by lookup and/or insert into the various indices * (non-batch api). */ public void addStatement(long _s, long _p, long _o) { getSPOIndex().insert(keyBuilder.statement2Key(_s, _p, _o),null); getPOSIndex().insert(keyBuilder.statement2Key(_p, _o, _s),null); getOSPIndex().insert(keyBuilder.statement2Key(_p, _s, _p),null); } /** * Return true if the statement exists in the store (non-batch API). */ public boolean containsStatement(long _s, long _p, long _o) { return getSPOIndex().contains(keyBuilder.statement2Key(_s, _p, _o)); } /** * Writes out some usage details on System.err. */ public void usage() { usage("spo", getSPOIndex()); usage("pos", getPOSIndex()); usage("osp", getOSPIndex()); } public void dump() { IIndex ndx_spo = getSPOIndex(); IEntryIterator it = ndx_spo.rangeIterator(null, null); while( it.hasNext() ) { it.next(); SPO spo = new SPO(KeyOrder.SPO, keyBuilder, it.getKey()); System.err.println(spo.s + ", " + spo.p + ", " + spo.o); } } private void usage(String name,IIndex ndx) { if (ndx instanceof BTree) { BTree btree = (BTree) ndx; final int nentries = btree.getEntryCount(); final int height = btree.getHeight(); final int nleaves = btree.getLeafCount(); final int nnodes = btree.getNodeCount(); final int ndistinctOnQueue = btree.getNumDistinctOnQueue(); final int queueCapacity = btree.getHardReferenceQueueCapacity(); System.err.println(name + ": #entries=" + nentries + ", height=" + height + ", #nodes=" + nnodes + ", #leaves=" + nleaves + ", #(nodes+leaves)=" + (nnodes + nleaves) + ", #distinctOnQueue=" + ndistinctOnQueue + ", queueCapacity=" + queueCapacity); } else { // Note: this is only an estimate if the index is a view. final int nentries = ndx.rangeCount(null, null); System.err.println(name+": #entries(est)="+nentries); } } } |
From: Mike P. <mrp...@us...> - 2007-02-17 03:08:10
|
Update of /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv24378/bigdata-rdf/src/java/com/bigdata/rdf/inf Modified Files: AbstractRuleRdfs511.java Rule.java InferenceEngine.java AbstractRuleRdfs2379.java RuleRdf01.java AbstractRuleRdf.java AbstractRuleRdfs68101213.java Log Message: Converted entailment collection arrays to btrees. Index: InferenceEngine.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/InferenceEngine.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** InferenceEngine.java 9 Feb 2007 21:19:26 -0000 1.8 --- InferenceEngine.java 17 Feb 2007 03:07:59 -0000 1.9 *************** *** 55,58 **** --- 55,59 ---- import com.bigdata.objndx.IIndex; import com.bigdata.rdf.KeyOrder; + import com.bigdata.rdf.TempTripleStore; import com.bigdata.rdf.TripleStore; import com.bigdata.rdf.inf.TestMagicSets.MagicRule; *************** *** 284,321 **** final int nrules = rules.length; ! int firstStatementCount = getStatementCount(); ! ! int lastStatementCount = firstStatementCount; final long begin = System.currentTimeMillis(); ! System.err.println("Closing kb with " + lastStatementCount + " statements"); ! int nadded = 0; while (true) { for (int i = 0; i < nrules; i++) { Rule rule = rules[i]; ! nadded += rule.apply(); } ! ! int statementCount = getStatementCount(); ! ! // testing the #of statement is less prone to error. ! if (lastStatementCount == statementCount) { ! ! // if( nadded == 0 ) { // should also work. ! // This is the fixed point. break; ! } ! lastStatementCount = statementCount; } --- 285,341 ---- final int nrules = rules.length; ! final int firstStatementCount = getStatementCount(); final long begin = System.currentTimeMillis(); ! log.debug("Closing kb with " + firstStatementCount + " statements"); ! int round = 0; ! ! TempTripleStore entailments = new TempTripleStore(); while (true) { + int numComputed = 0; + + long computeTime = 0; + + int numEntailmentsBefore = entailments.getStatementCount(); + for (int i = 0; i < nrules; i++) { Rule rule = rules[i]; ! Rule.Stats stats = rule.apply( entailments ); ! ! numComputed += stats.numComputed; + computeTime += stats.computeTime; + } ! ! int numEntailmentsAfter = entailments.getStatementCount(); ! ! if ( numEntailmentsBefore == numEntailmentsAfter ) { ! // This is the fixed point. break; ! } ! long insertStart = System.currentTimeMillis(); ! ! int numInserted = transferBTrees( entailments ); ! ! long insertTime = System.currentTimeMillis() - insertStart; ! ! StringBuilder debug = new StringBuilder(); ! debug.append( "round #" ).append( round++ ).append( ": " ); ! debug.append( numComputed ).append( " computed in " ); ! debug.append( computeTime ).append( " millis, " ); ! debug.append( numInserted ).append( " inserted in " ); ! debug.append( insertTime ).append( " millis " ); ! log.debug( debug.toString() ); } *************** *** 323,331 **** final long elapsed = System.currentTimeMillis() - begin; ! System.err.println("Closed store in " + elapsed + "ms yeilding " + lastStatementCount + " statements total, " + (lastStatementCount - firstStatementCount) + " inferences"); } /** --- 343,389 ---- final long elapsed = System.currentTimeMillis() - begin; ! final int lastStatementCount = getStatementCount(); ! ! log.debug("Closed store in " + elapsed + "ms yeilding " + lastStatementCount + " statements total, " + (lastStatementCount - firstStatementCount) + " inferences"); } + + private int transferBTrees( TempTripleStore entailments ) { + + int numInserted = 0; + + IEntryIterator it = entailments.getSPOIndex().rangeIterator(null, null); + while (it.hasNext()) { + it.next(); + byte[] key = it.getKey(); + if (!getSPOIndex().contains(key)) { + numInserted++; + getSPOIndex().insert(key, null); + } + } + + it = entailments.getPOSIndex().rangeIterator(null, null); + while (it.hasNext()) { + it.next(); + byte[] key = it.getKey(); + if (!getPOSIndex().contains(key)) { + getPOSIndex().insert(key, null); + } + } + + it = entailments.getOSPIndex().rangeIterator(null, null); + while (it.hasNext()) { + it.next(); + byte[] key = it.getKey(); + if (!getOSPIndex().contains(key)) { + getOSPIndex().insert(key, null); + } + } + + return numInserted; + + } /** *************** *** 441,445 **** Rule rule = rules[i]; ! nadded += rule.apply(); } --- 499,504 ---- Rule rule = rules[i]; ! // nadded += rule.apply(); ! // rule.apply(); } Index: AbstractRuleRdfs511.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/AbstractRuleRdfs511.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** AbstractRuleRdfs511.java 9 Feb 2007 20:18:56 -0000 1.3 --- AbstractRuleRdfs511.java 17 Feb 2007 03:07:59 -0000 1.4 *************** *** 48,51 **** --- 48,53 ---- import com.bigdata.rdf.KeyOrder; + import com.bigdata.rdf.TempTripleStore; + import com.bigdata.rdf.inf.Rule.Stats; *************** *** 62,68 **** } ! protected SPO[] collectEntailments() { ! // the predicate is fixed for all parts of the rule. final long p = head.p.id; --- 64,74 ---- } ! public Stats apply( TempTripleStore entailments ) { ! Stats stats = new Stats(); ! ! long computeStart = System.currentTimeMillis(); ! ! // the predicate is fixed for all parts of the rule. final long p = head.p.id; *************** *** 91,95 **** SPO[] stmts2 = stmts1.clone(); ! Vector<SPO> v = new Vector<SPO>(); // the simplest n^2 algorithm for( int i = 0; i < stmts1.length; i++ ) { --- 97,101 ---- SPO[] stmts2 = stmts1.clone(); ! Vector<SPO> stmts3 = new Vector<SPO>(BUFFER_SIZE); // the simplest n^2 algorithm for( int i = 0; i < stmts1.length; i++ ) { *************** *** 97,106 **** for ( int j = 0; j < stmts2.length; j++ ) { if ( stmts1[i].o == stmts2[j].s ) { ! v.add( new SPO(stmts1[i].s, p, stmts2[j].o) ); } } } ! return v.toArray( new SPO[v.size()] ); } --- 103,123 ---- for ( int j = 0; j < stmts2.length; j++ ) { if ( stmts1[i].o == stmts2[j].s ) { ! if (stmts3.size() == BUFFER_SIZE) { ! dumpBuffer ! ( stmts3.toArray( new SPO[stmts3.size()] ), ! entailments ! ); ! stmts3.clear(); ! } ! stmts3.add( new SPO(stmts1[i].s, p, stmts2[j].o) ); ! stats.numComputed++; } } } + dumpBuffer( stmts3.toArray( new SPO[stmts3.size()] ), entailments ); ! stats.computeTime = System.currentTimeMillis() - computeStart; ! ! return stats; } Index: AbstractRuleRdfs2379.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/AbstractRuleRdfs2379.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** AbstractRuleRdfs2379.java 9 Feb 2007 20:18:56 -0000 1.2 --- AbstractRuleRdfs2379.java 17 Feb 2007 03:07:59 -0000 1.3 *************** *** 48,51 **** --- 48,52 ---- import com.bigdata.rdf.KeyOrder; + import com.bigdata.rdf.TempTripleStore; *************** *** 62,69 **** } ! protected SPO[] collectEntailments() { // create a place to hold the entailments ! Vector<SPO> stmts3 = new Vector<SPO>(); SPO[] stmts1 = getStmts1(); --- 63,74 ---- } ! public Stats apply( TempTripleStore entailments ) { + Stats stats = new Stats(); + + long computeStart = System.currentTimeMillis(); + // create a place to hold the entailments ! Vector<SPO> stmts3 = new Vector<SPO>(BUFFER_SIZE); SPO[] stmts1 = getStmts1(); *************** *** 71,79 **** SPO[] stmts2 = getStmts2( stmts1[i] ); for ( int j = 0; j < stmts2.length; j++ ) { stmts3.add( buildStmt3( stmts1[i], stmts2[j] ) ); } } ! return stmts3.toArray( new SPO[stmts3.size()] ); } --- 76,95 ---- SPO[] stmts2 = getStmts2( stmts1[i] ); for ( int j = 0; j < stmts2.length; j++ ) { + if (stmts3.size() == BUFFER_SIZE) { + dumpBuffer + ( stmts3.toArray( new SPO[stmts3.size()] ), + entailments + ); + stmts3.clear(); + } stmts3.add( buildStmt3( stmts1[i], stmts2[j] ) ); + stats.numComputed++; } } + dumpBuffer( stmts3.toArray( new SPO[stmts3.size()] ), entailments ); + + stats.computeTime = System.currentTimeMillis() - computeStart; ! return stats; } Index: RuleRdf01.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/RuleRdf01.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** RuleRdf01.java 9 Feb 2007 20:18:56 -0000 1.3 --- RuleRdf01.java 17 Feb 2007 03:07:59 -0000 1.4 *************** *** 48,51 **** --- 48,52 ---- import com.bigdata.objndx.IEntryIterator; import com.bigdata.rdf.KeyOrder; + import com.bigdata.rdf.TempTripleStore; *************** *** 61,67 **** } ! protected SPO[] collectEntailments() { ! Vector<SPO> entailments = new Vector<SPO>(); long lastP = -1; --- 62,72 ---- } ! public Stats apply( TempTripleStore btree ) { ! Stats stats = new Stats(); ! ! long computeStart = System.currentTimeMillis(); ! ! Vector<SPO> entailments = new Vector<SPO>(BUFFER_SIZE); long lastP = -1; *************** *** 80,91 **** lastP = stmt.p; entailments.add ( new SPO(stmt.p, store.rdfType.id, store.rdfProperty.id) ); } } ! return entailments.toArray( new SPO[entailments.size()] ); } --- 85,107 ---- lastP = stmt.p; + if (entailments.size() == BUFFER_SIZE) { + dumpBuffer + ( entailments.toArray( new SPO[entailments.size()] ), + btree + ); + entailments.clear(); + } entailments.add ( new SPO(stmt.p, store.rdfType.id, store.rdfProperty.id) ); + stats.numComputed++; } } + dumpBuffer( entailments.toArray( new SPO[entailments.size()] ), btree ); ! stats.computeTime = System.currentTimeMillis() - computeStart; ! ! return stats; } Index: Rule.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/Rule.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** Rule.java 27 Jan 2007 15:58:57 -0000 1.2 --- Rule.java 17 Feb 2007 03:07:59 -0000 1.3 *************** *** 44,47 **** --- 44,49 ---- package com.bigdata.rdf.inf; + import com.bigdata.rdf.TempTripleStore; + /** *************** *** 100,107 **** * Apply the rule to the statement in the store. * ! * @param store ! * The triple store. * ! * @return The #of statements added to the store. * * @todo support conditional insert in the btree so that we do not have --- 102,109 ---- * Apply the rule to the statement in the store. * ! * @param entailments ! * The temporary triple store used to hold entailments. * ! * @return Statistics related to what the rule did. * * @todo support conditional insert in the btree so that we do not have *************** *** 116,120 **** * statements that they will insert. */ ! abstract public int apply(); } \ No newline at end of file --- 118,136 ---- * statements that they will insert. */ ! abstract public Stats apply( TempTripleStore entailments ); ! ! ! /** ! * Statistics about what the Rule did during {@link Rule#apply()}. ! * ! * @author mikep ! */ ! public static class Stats { ! ! public int numComputed; ! ! long computeTime; ! ! } } \ No newline at end of file Index: AbstractRuleRdf.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/AbstractRuleRdf.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** AbstractRuleRdf.java 9 Feb 2007 20:18:56 -0000 1.3 --- AbstractRuleRdf.java 17 Feb 2007 03:07:59 -0000 1.4 *************** *** 48,56 **** --- 48,62 ---- import org.openrdf.model.URI; + import com.bigdata.objndx.IEntryIterator; import com.bigdata.objndx.IIndex; + import com.bigdata.rdf.KeyOrder; + import com.bigdata.rdf.TempTripleStore; public abstract class AbstractRuleRdf extends Rule { + protected final int BUFFER_SIZE = 10*1024*1024; + + public AbstractRuleRdf(InferenceEngine store, Triple head, Pred[] body) { *************** *** 59,102 **** } ! public int apply() { ! // long startTime = System.currentTimeMillis(); ! ! SPO[] entailments = collectEntailments(); ! /* ! long collectionTime = System.currentTimeMillis() - startTime; ! System.out.println( getClass().getName() + " collected " + ! entailments.length + " entailments in " + ! collectionTime + " millis" ); ! int numStmtsBefore = store.ndx_spo.getEntryCount(); ! ! System.out.println( getClass().getName() + ! " number of statements before: " + ! numStmtsBefore); ! ! startTime = System.currentTimeMillis(); ! */ ! int numAdded = insertEntailments( entailments ); ! /* ! long insertionTime = System.currentTimeMillis() - startTime; ! ! int numStmtsAfter = store.ndx_spo.getEntryCount(); ! ! System.out.println( getClass().getName() + ! " number of statements after: " + ! numStmtsAfter); ! ! System.out.println( getClass().getName() + ! " inserted " + ( numStmtsAfter - numStmtsBefore ) + ! " statements in " + insertionTime + " millis"); ! */ ! return numAdded; ! ! } ! protected abstract SPO[] collectEntailments(); protected int insertEntailments( SPO[] entailments ) { --- 65,110 ---- } ! public abstract Stats apply( TempTripleStore entailments ); ! protected void dumpBuffer( SPO[] stmts, TempTripleStore btree ) { ! // deal with the SPO index ! IIndex spo = btree.getSPOIndex(); ! Arrays.sort(stmts,SPOComparator.INSTANCE); ! for ( int i = 0; i < stmts.length; i++ ) { ! byte[] key = btree.keyBuilder.statement2Key ! ( stmts[i].s, stmts[i].p, stmts[i].o ! ); ! if ( !spo.contains(key) ) { ! spo.insert(key, null); ! } ! } ! // deal with the POS index ! IIndex pos = btree.getPOSIndex(); ! Arrays.sort(stmts,POSComparator.INSTANCE); ! for ( int i = 0; i < stmts.length; i++ ) { ! byte[] key = btree.keyBuilder.statement2Key ! ( stmts[i].p, stmts[i].o, stmts[i].s ! ); ! if ( !pos.contains(key) ) { ! pos.insert(key, null); ! } ! } ! // deal with the OSP index ! IIndex osp = btree.getOSPIndex(); ! Arrays.sort(stmts,OSPComparator.INSTANCE); ! for ( int i = 0; i < stmts.length; i++ ) { ! byte[] key = btree.keyBuilder.statement2Key ! ( stmts[i].o, stmts[i].s, stmts[i].p ! ); ! if ( !osp.contains(key) ) { ! osp.insert(key, null); ! } ! } + } + protected int insertEntailments( SPO[] entailments ) { *************** *** 144,147 **** --- 152,161 ---- } + protected int insertEntailments2( TempTripleStore entailments ) { + + return insertEntailments( convert( entailments ) ); + + } + protected void printStatement( SPO stmt ) { *************** *** 164,166 **** --- 178,216 ---- } + protected TempTripleStore convert( SPO[] stmts ) { + + TempTripleStore tts = new TempTripleStore(); + + for ( int i = 0; i < stmts.length; i++ ) { + + tts.addStatement( stmts[i].s, stmts[i].p, stmts[i].o ); + + } + + return tts; + + } + + protected SPO[] convert( TempTripleStore tts ) { + + SPO[] stmts = new SPO[tts.getStatementCount()]; + + int i = 0; + + IIndex ndx_spo = tts.getSPOIndex(); + + IEntryIterator it = ndx_spo.rangeIterator(null, null); + + while ( it.hasNext() ) { + + it.next(); + + stmts[i++] = new SPO(KeyOrder.SPO, tts.keyBuilder, it.getKey()); + + } + + return stmts; + + } + } \ No newline at end of file Index: AbstractRuleRdfs68101213.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/inf/AbstractRuleRdfs68101213.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** AbstractRuleRdfs68101213.java 9 Feb 2007 20:18:56 -0000 1.3 --- AbstractRuleRdfs68101213.java 17 Feb 2007 03:07:59 -0000 1.4 *************** *** 48,51 **** --- 48,52 ---- import com.bigdata.objndx.IEntryIterator; import com.bigdata.rdf.KeyOrder; + import com.bigdata.rdf.TempTripleStore; *************** *** 62,68 **** } ! protected SPO[] collectEntailments() { ! Vector<SPO> entailments = new Vector<SPO>(); byte[] startKey = store.keyBuilder.statement2Key --- 63,73 ---- } ! public Stats apply( TempTripleStore btree ) { ! Stats stats = new Stats(); ! ! long computeStart = System.currentTimeMillis(); ! ! Vector<SPO> entailments = new Vector<SPO>(BUFFER_SIZE); byte[] startKey = store.keyBuilder.statement2Key *************** *** 85,94 **** long _p = head.p.isVar() ? stmt.s : head.p.id; long _o = head.o.isVar() ? stmt.s : head.o.id; ! entailments.add( new SPO(_s, _p, _o) ); } ! return entailments.toArray( new SPO[entailments.size()] ); } --- 90,110 ---- long _p = head.p.isVar() ? stmt.s : head.p.id; long _o = head.o.isVar() ? stmt.s : head.o.id; ! ! if (entailments.size() == BUFFER_SIZE) { ! dumpBuffer ! ( entailments.toArray( new SPO[entailments.size()] ), ! btree ! ); ! entailments.clear(); ! } entailments.add( new SPO(_s, _p, _o) ); + stats.numComputed++; } + dumpBuffer( entailments.toArray( new SPO[entailments.size()] ), btree ); ! stats.computeTime = System.currentTimeMillis() - computeStart; ! ! return stats; } |
From: Mike P. <mrp...@us...> - 2007-02-17 03:08:10
|
Update of /cvsroot/cweb/bigdata/src/java/com/bigdata/journal In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv24378/bigdata/src/java/com/bigdata/journal Modified Files: TemporaryStore.java Log Message: Converted entailment collection arrays to btrees. Index: TemporaryStore.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/TemporaryStore.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** TemporaryStore.java 15 Feb 2007 22:01:18 -0000 1.2 --- TemporaryStore.java 17 Feb 2007 03:08:00 -0000 1.3 *************** *** 52,55 **** --- 52,58 ---- import java.nio.ByteBuffer; + import com.bigdata.objndx.BTree; + import com.bigdata.objndx.BTreeMetadata; + import com.bigdata.objndx.IIndex; import com.bigdata.rawstore.Addr; import com.bigdata.rawstore.Bytes; *************** *** 136,139 **** --- 139,144 ---- this.useDirectBuffers = useDirectBuffers; + setupName2AddrBTree(); + } *************** *** 279,282 **** --- 284,343 ---- } + + + /** + * BTree mapping index names to the last metadata record committed for the + * named index. The keys are index names (unicode strings). The values are + * the last known {@link Addr address} of the named btree. + */ + private Name2Addr name2Addr; + + /** + * Setup the btree that resolved named btrees. + */ + private void setupName2AddrBTree() { + + assert name2Addr == null; + + name2Addr = new Name2Addr(this); + + } + + public IIndex registerIndex(String name, IIndex btree) { + + if( getIndex(name) != null ) { + + throw new IllegalStateException("BTree already registered: name="+name); + + } + + // add to the persistent name map. + name2Addr.add(name, btree); + + return btree; + + } + + public void dropIndex(String name) { + + name2Addr.dropIndex(name); + + } + + /** + * Return the named index (unisolated). Writes on the returned index will be + * made restart-safe with the next {@link #commit()} regardless of the + * success or failure of a transaction. Transactional writes must use the + * same named method on the {@link Tx} in order to obtain an isolated + * version of the named btree. + */ + public IIndex getIndex(String name) { + + if(name==null) throw new IllegalArgumentException(); + + return name2Addr.get(name); + + } + } |
From: Mike P. <mrp...@us...> - 2007-02-17 03:08:06
|
Update of /cvsroot/cweb/bigdata-rdf/src/test/com/bigdata/rdf/inf In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv24378/bigdata-rdf/src/test/com/bigdata/rdf/inf Modified Files: TestMagicSets.java AbstractInferenceEngineTestCase.java Added Files: TestTempStore.java Log Message: Converted entailment collection arrays to btrees. Index: AbstractInferenceEngineTestCase.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/test/com/bigdata/rdf/inf/AbstractInferenceEngineTestCase.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** AbstractInferenceEngineTestCase.java 11 Feb 2007 17:34:24 -0000 1.4 --- AbstractInferenceEngineTestCase.java 17 Feb 2007 03:08:00 -0000 1.5 *************** *** 119,123 **** protected BufferMode getBufferMode() { ! return BufferMode.Transient; } --- 119,123 ---- protected BufferMode getBufferMode() { ! return BufferMode.Direct; } *************** *** 133,136 **** --- 133,138 ---- File file = new File(filename); + System.err.println( "store file: " + file.getAbsolutePath() ); + if(file.exists() && ! file.delete() ) { Index: TestMagicSets.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/test/com/bigdata/rdf/inf/TestMagicSets.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** TestMagicSets.java 9 Feb 2007 20:18:56 -0000 1.4 --- TestMagicSets.java 17 Feb 2007 03:08:00 -0000 1.5 *************** *** 55,58 **** --- 55,59 ---- import org.openrdf.vocabulary.RDFS; + import com.bigdata.rdf.TempTripleStore; import com.bigdata.rdf.TripleStore; *************** *** 159,171 **** * Applies the base rule iff the {@link Magic} is matched. */ ! public int apply() { if(match()) { ! return rule.apply(); } ! return 0; } --- 160,172 ---- * Applies the base rule iff the {@link Magic} is matched. */ ! public Rule.Stats apply( TempTripleStore entailments ) { if(match()) { ! return rule.apply( entailments ); } ! return null; } --- NEW FILE: TestTempStore.java --- /** The Notice below must appear in each file of the Source Code of any copy you distribute of the Licensed Product. Contributors to any Modifications may add their own copyright notices to identify their own contributions. License: The contents of this file are subject to the CognitiveWeb Open Source License Version 1.1 (the License). You may not copy or use this file, in either source code or executable form, except in compliance with the License. You may obtain a copy of the License from http://www.CognitiveWeb.org/legal/license/ Software distributed under the License is distributed on an AS IS basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. Copyrights: Portions created by or assigned to CognitiveWeb are Copyright (c) 2003-2003 CognitiveWeb. All Rights Reserved. Contact information for CognitiveWeb is available at http://www.CognitiveWeb.org Portions Copyright (c) 2002-2003 Bryan Thompson. Acknowledgements: Special thanks to the developers of the Jabber Open Source License 1.0 (JOSL), from which this License was derived. This License contains terms that differ from JOSL. Special thanks to the CognitiveWeb Open Source Contributors for their suggestions and support of the Cognitive Web. Modifications: */ /* * Created on Jan 26, 2007 */ package com.bigdata.rdf.inf; import java.io.File; import java.io.IOException; /** * Test suite for full forward closure. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id: TestTempStore.java,v 1.1 2007/02/17 03:08:00 mrpersonick Exp $ */ public class TestTempStore extends AbstractInferenceEngineTestCase { /** * */ public TestTempStore() { } /** * @param name */ public TestTempStore(String name) { super(name); } /** * Test of full forward closure. * * @throws IOException */ public void testFullForwardClosure01() throws IOException { /* * @todo this is committing the data first we do not want to do if we * know that we are closing the store. * * @todo use a dataset that we can add to CVS for a performance test and * hand-crafted data sets to test the rule implementations. */ store.loadData(new File("data/alibaba_v41.rdf"),""); // store.loadData(new File("data/nciOncology.owl"),""); store.fullForwardClosure(); store.commit(); } } |
From: Mike P. <mrp...@us...> - 2007-02-17 03:08:05
|
Update of /cvsroot/cweb/bigdata/src/resources/logging In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv24378/bigdata/src/resources/logging Modified Files: log4j.properties Log Message: Converted entailment collection arrays to btrees. Index: log4j.properties =================================================================== RCS file: /cvsroot/cweb/bigdata/src/resources/logging/log4j.properties,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** log4j.properties 3 Jan 2007 20:27:07 -0000 1.4 --- log4j.properties 17 Feb 2007 03:08:00 -0000 1.5 *************** *** 14,17 **** --- 14,18 ---- log4j.logger.com.bigdata.objndx.IndexSegmentBuilder=INFO log4j.logger.com.bigdata.objndx.AbstractBTreeTestCase=INFO + log4j.logger.com.bigdata.rdf=DEBUG log4j.appender.dest1=org.apache.log4j.ConsoleAppender |
From: Bryan T. <tho...@us...> - 2007-02-15 22:01:32
|
Update of /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/rio In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv24672/src/java/com/bigdata/rdf/rio Modified Files: BulkLoaderBuffer.java Log Message: Modified the IndexSegmentBuilder to use the new TemporaryStore and removed the fullyBuffer boolean option since data will be automatically buffered out to 100M and the spill over onto disk. Index: BulkLoaderBuffer.java =================================================================== RCS file: /cvsroot/cweb/bigdata-rdf/src/java/com/bigdata/rdf/rio/BulkLoaderBuffer.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** BulkLoaderBuffer.java 9 Feb 2007 21:19:25 -0000 1.8 --- BulkLoaderBuffer.java 15 Feb 2007 22:01:25 -0000 1.9 *************** *** 93,112 **** /** - * When true, the {@link IndexSegment}s will be built in memory rather than - * using disk to buffer the nodes. - * - * @todo try modifying the {@link NodeSerializer} to use a - * {@link ByteArrayOutputStream} and see if that is any faster the the - * {@link ByteBufferOutputStream}. Be sure to undo the hack in - * {@link KeyBufferSerializer}. - * - * @todo fully buffered is now marginally faster, but the disk buffer - * version winds up writing the leaves on disk and then transfering - * them to another disk file. fix that and then compare performance - * again. - */ - boolean fullyBuffer = true; - - /** * @todo experiment with and without checksum computation. */ --- 93,96 ---- *************** *** 173,177 **** new TermIdIterator(this), branchingFactor, TermIdSerializer.INSTANCE, ! fullyBuffer, useChecksum, recordCompressor, errorRate); --- 157,161 ---- new TermIdIterator(this), branchingFactor, TermIdSerializer.INSTANCE, ! useChecksum, recordCompressor, errorRate); *************** *** 205,209 **** branchingFactor, RdfValueSerializer.INSTANCE, ! fullyBuffer, useChecksum, recordCompressor, errorRate); --- 189,193 ---- branchingFactor, RdfValueSerializer.INSTANCE, ! useChecksum, recordCompressor, errorRate); *************** *** 230,234 **** branchingFactor, StatementSerializer.INSTANCE, ! fullyBuffer, useChecksum, recordCompressor, errorRate); --- 214,218 ---- branchingFactor, StatementSerializer.INSTANCE, ! useChecksum, recordCompressor, errorRate); |
From: Bryan T. <tho...@us...> - 2007-02-15 22:01:28
|
Update of /cvsroot/cweb/bigdata/src/java/com/bigdata/journal In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv24236/src/java/com/bigdata/journal Modified Files: DiskBackedBufferStrategy.java IBufferStrategy.java Tx.java BasicBufferStrategy.java DirectBufferStrategy.java DiskOnlyStrategy.java AbstractBufferStrategy.java TemporaryStore.java Added Files: IDiskBasedStrategy.java Log Message: Modified the IndexSegmentBuilder to use the new TemporaryStore and removed the fullyBuffer boolean option since data will be automatically buffered out to 100M and the spill over onto disk. Index: DiskOnlyStrategy.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/DiskOnlyStrategy.java,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** DiskOnlyStrategy.java 15 Feb 2007 20:59:21 -0000 1.16 --- DiskOnlyStrategy.java 15 Feb 2007 22:01:18 -0000 1.17 *************** *** 25,29 **** * nextOffset (i.e., has written all data that is dirty on the buffer). */ ! public class DiskOnlyStrategy extends AbstractBufferStrategy { /** --- 25,30 ---- * nextOffset (i.e., has written all data that is dirty on the buffer). */ ! public class DiskOnlyStrategy extends AbstractBufferStrategy implements ! IDiskBasedStrategy { /** *************** *** 61,76 **** private boolean open; ! public File getFile() { ! return file; } ! // public FileChannel getFileChannel() { ! // ! // return channel; ! // ! // } DiskOnlyStrategy(long maximumExtent, FileMetadata fileMetadata) { --- 62,83 ---- private boolean open; ! final public int getHeaderSize() { ! return headerSize; } ! final public File getFile() { ! ! return file; ! ! } + final public RandomAccessFile getRandomAccessFile() { + + return raf; + + } + DiskOnlyStrategy(long maximumExtent, FileMetadata fileMetadata) { *************** *** 377,381 **** force(true); ! System.err.println("Disk file: newLength="+newExtent); } catch(IOException ex) { --- 384,388 ---- force(true); ! System.err.println("Disk file: newLength="+cf.format(newExtent)); } catch(IOException ex) { *************** *** 391,393 **** --- 398,406 ---- } + public long transferTo(RandomAccessFile out) throws IOException { + + return super.transferFromDiskTo(this, out); + + } + } Index: AbstractBufferStrategy.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/AbstractBufferStrategy.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** AbstractBufferStrategy.java 15 Feb 2007 20:59:21 -0000 1.9 --- AbstractBufferStrategy.java 15 Feb 2007 22:01:18 -0000 1.10 *************** *** 1,5 **** --- 1,10 ---- package com.bigdata.journal; + import java.io.IOException; + import java.io.RandomAccessFile; import java.nio.ByteBuffer; + import java.nio.channels.FileChannel; + import java.text.Format; + import java.text.NumberFormat; import com.bigdata.rawstore.Addr; *************** *** 40,43 **** --- 45,58 ---- protected int nextOffset; + static final NumberFormat cf; + + static { + + cf = NumberFormat.getIntegerInstance(); + + cf.setGroupingUsed(true); + + } + final public long getInitialExtent() { *************** *** 179,181 **** --- 194,301 ---- } + /** + * Helper method used by {@link DiskBackedBufferStrategy} and + * {@link DiskOnlyStrategy} to implement + * {@link IBufferStrategy#transferTo(RandomAccessFile)} + * + * @param src + * The source. + * @param out + * The output file. + * + * @return The #of bytes transferred. + * + * @throws IOException + */ + protected long transferFromDiskTo(IDiskBasedStrategy src,RandomAccessFile out) throws IOException { + + final long begin = System.currentTimeMillis(); + + // #of bytes to transfer. + final long count = src.getNextOffset(); + + // the output channel. + final FileChannel outChannel = out.getChannel(); + + // current position on the output channel. + final long toPosition = outChannel.position(); + + if(toPosition + count > Integer.MAX_VALUE) { + + throw new IOException("Index segment exceeds int32 bytes."); + + } + + /* + * Transfer data from channel to channel. + */ + + final FileChannel tmpChannel = src.getRandomAccessFile().getChannel(); + + /* + * Set the fromPosition on source channel. We want everything after the + * file header. + */ + tmpChannel.position(src.getHeaderSize()); + + /* + * Extend the output file. This is required at least for some + * circumstances. + */ + out.setLength(toPosition+count); + + /* + * Transfer the data. It is possible that this will take multiple + * writes for at least some implementations. + */ + + // System.err.println("fromPosition="+tmpChannel.position()+", toPosition="+toPosition+", count="+count); + + int nwrites = 0; // #of write operations. + + { + + long n = count; + + long to = toPosition; + + while (n > 0) { + + long nxfer = outChannel.transferFrom(tmpChannel, to, n); + + to += nxfer; + + n -= nxfer; + + nwrites++; + + // // Verify transfer is complete. + // if (nxfer != count) { + // + // throw new IOException("Expected to transfer " + count + // + ", but transferred " + nxfer); + // + // } + + } + + } + + /* + * Update the position on the output channel since transferFrom does + * NOT do this itself. + */ + outChannel.position(toPosition+count); + + final long elapsed = System.currentTimeMillis() - begin; + + System.err.println("\nTransferred " + count + + " bytes from disk channel to disk channel (offset=" + + toPosition + ") in " + nwrites + " writes and " + elapsed + + "ms"); + + return count; + + } + } Index: Tx.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/Tx.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** Tx.java 15 Feb 2007 20:59:21 -0000 1.27 --- Tx.java 15 Feb 2007 22:01:18 -0000 1.28 *************** *** 85,100 **** * concurrently. We do not even need a read-lock on the indices isolated * by the transaction since they are read-only. This might prove to be a ! * nice way to leverage multiple processors / cores on a data server. * ! * @todo support {@link PartitionedIndex}es. * ! * @todo Make the {@link IsolatedBTree}s safe across {@link Journal#overflow()} ! * events. When {@link PartitionedIndex}es are used this adds a ! * requirement for tracking which {@link IndexSegment}s and ! * {@link Journal}s are required to support the {@link IsolatedBTree}. ! * Deletes of old journals and index segments must be deferred until no * transaction remains which can read those data. This metadata must be * restart-safe so that resources are eventually deleted. On restart, ! * active transactions will abort and their resources may be released. * There is also a requirement for quickly locating the specific journal * and index segments required to support isolation of an index. This --- 85,103 ---- * concurrently. We do not even need a read-lock on the indices isolated * by the transaction since they are read-only. This might prove to be a ! * nice way to leverage multiple processors / cores on a data server. The ! * size limit on the transaction write set is currently 2G, but the ! * transaction will run in memory up to 100M. * ! * @todo Support transactions where the indices isolated by the transactions are ! * {@link PartitionedIndex}es. * ! * @todo Track which {@link IndexSegment}s and {@link Journal}s are required ! * to support the {@link IsolatedBTree}s in use by a {@link Tx}. Deletes ! * of old journals and index segments MUST be deferred until no * transaction remains which can read those data. This metadata must be * restart-safe so that resources are eventually deleted. On restart, ! * active transactions will have been discarded abort and their resources ! * released. (Do we need a restart-safe means to indicate the set of ! * running transactions?)<br> * There is also a requirement for quickly locating the specific journal * and index segments required to support isolation of an index. This *************** *** 103,107 **** * far) as well as an index into the named indices index -- perhaps simply * an index by timestamp into the root addresses (or whole root block ! * views). * * @todo The various public methods on this API that have {@link RunState} --- 106,111 ---- * far) as well as an index into the named indices index -- perhaps simply * an index by timestamp into the root addresses (or whole root block ! * views, or moving the root addresses out of the root block and into the ! * store with only the address of the root addresses in the root block). * * @todo The various public methods on this API that have {@link RunState} *************** *** 127,139 **** final static String IS_COMPLETE = "Transaction is complete"; ! /* ! * */ ! final private Journal journal; /** * The timestamp assigned to this transaction. */ ! final private long timestamp; /** --- 131,144 ---- final static String IS_COMPLETE = "Transaction is complete"; ! /** ! * The transaction uses the {@link Journal} for some handshaking in the ! * commit protocol and to locate the named indices that it isolates. */ ! final protected Journal journal; /** * The timestamp assigned to this transaction. */ ! final protected long timestamp; /** *************** *** 141,145 **** * object was created. */ ! final private long commitCounter; private RunState runState; --- 146,150 ---- * object was created. */ ! final protected long commitCounter; private RunState runState; *************** *** 157,168 **** * time it is invoked. */ ! final private IRawStore tmpStore = new TemporaryStore(); /** * BTrees isolated by this transactions. - * - * @todo in order to survive overflow this mapping must be persistent. */ ! private Map<String,IsolatedBTree> btrees = new HashMap<String,IsolatedBTree>(); /** --- 162,171 ---- * time it is invoked. */ ! final protected IRawStore tmpStore = new TemporaryStore(); /** * BTrees isolated by this transactions. */ ! private Map<String, IsolatedBTree> btrees = new HashMap<String, IsolatedBTree>(); /** Index: TemporaryStore.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/TemporaryStore.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** TemporaryStore.java 15 Feb 2007 20:59:21 -0000 1.1 --- TemporaryStore.java 15 Feb 2007 22:01:18 -0000 1.2 *************** *** 280,306 **** } - /** - * Return a temporary file for use by the store. The file will be - * automatically deleted if the JVM exits. - * - * @return A temporary file for use by the store. - */ - protected File getTempFile() { - - try { - - File file = File.createTempFile("transientOverflow", ".store"); - - file.deleteOnExit(); - - return file; - - } catch(IOException ex) { - - throw new RuntimeException(ex); - - } - - } - } --- 280,282 ---- Index: DiskBackedBufferStrategy.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/DiskBackedBufferStrategy.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** DiskBackedBufferStrategy.java 8 Feb 2007 21:32:10 -0000 1.8 --- DiskBackedBufferStrategy.java 15 Feb 2007 22:01:18 -0000 1.9 *************** *** 18,22 **** * @version $Id$ */ ! abstract public class DiskBackedBufferStrategy extends BasicBufferStrategy { /** --- 18,23 ---- * @version $Id$ */ ! abstract public class DiskBackedBufferStrategy extends BasicBufferStrategy ! implements IDiskBasedStrategy { /** *************** *** 35,39 **** private boolean open = false; ! public boolean isOpen() { return open; --- 36,58 ---- private boolean open = false; ! final public int getHeaderSize() { ! ! return headerSize; ! ! } ! ! final public File getFile() { ! ! return file; ! ! } ! ! final public RandomAccessFile getRandomAccessFile() { ! ! return raf; ! ! } ! ! final public boolean isOpen() { return open; *************** *** 41,45 **** } ! public boolean isStable() { return true; --- 60,64 ---- } ! final public boolean isStable() { return true; *************** *** 148,150 **** --- 167,175 ---- } + public long transferTo(RandomAccessFile out) throws IOException { + + return super.transferFromDiskTo(this, out); + + } + } Index: DirectBufferStrategy.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/DirectBufferStrategy.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** DirectBufferStrategy.java 9 Feb 2007 16:13:18 -0000 1.10 --- DirectBufferStrategy.java 15 Feb 2007 22:01:18 -0000 1.11 *************** *** 104,108 **** force(true); ! System.err.println("Disk file: newLength="+newExtent); } catch(IOException ex) { --- 104,108 ---- force(true); ! System.err.println("Disk file: newLength="+cf.format(newExtent)); } catch(IOException ex) { --- NEW FILE: IDiskBasedStrategy.java --- /** The Notice below must appear in each file of the Source Code of any copy you distribute of the Licensed Product. Contributors to any Modifications may add their own copyright notices to identify their own contributions. License: The contents of this file are subject to the CognitiveWeb Open Source License Version 1.1 (the License). You may not copy or use this file, in either source code or executable form, except in compliance with the License. You may obtain a copy of the License from http://www.CognitiveWeb.org/legal/license/ Software distributed under the License is distributed on an AS IS basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. Copyrights: Portions created by or assigned to CognitiveWeb are Copyright (c) 2003-2003 CognitiveWeb. All Rights Reserved. Contact information for CognitiveWeb is available at http://www.CognitiveWeb.org Portions Copyright (c) 2002-2003 Bryan Thompson. Acknowledgements: Special thanks to the developers of the Jabber Open Source License 1.0 (JOSL), from which this License was derived. This License contains terms that differ from JOSL. Special thanks to the CognitiveWeb Open Source Contributors for their suggestions and support of the Cognitive Web. Modifications: */ /* * Created on Feb 15, 2007 */ package com.bigdata.journal; import java.io.File; import java.io.RandomAccessFile; /** * An interface for implementations backed by a file on disk. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ public interface IDiskBasedStrategy extends IBufferStrategy { /** * The size of the file header in bytes. */ public int getHeaderSize(); /** * The backing file. */ public File getFile(); /** * The object used to read and write on that file. */ public RandomAccessFile getRandomAccessFile(); } Index: IBufferStrategy.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/IBufferStrategy.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** IBufferStrategy.java 8 Feb 2007 21:32:10 -0000 1.13 --- IBufferStrategy.java 15 Feb 2007 22:01:18 -0000 1.14 *************** *** 1,4 **** --- 1,7 ---- package com.bigdata.journal; + import java.io.IOException; + import java.io.RandomAccessFile; + import com.bigdata.rawstore.Addr; import com.bigdata.rawstore.IRawStore; *************** *** 92,94 **** --- 95,112 ---- ForceEnum forceOnCommitEnum); + /** + * A block operation that transfers the serialized records (aka the written + * on portion of the user extent) en mass from the buffer onto an output + * file. The buffered records are written "in order" starting at the current + * position on the output file. The file is grown if necessary. + * + * @param out + * The file to which the buffer contents will be transferred. + * + * @return The #of bytes written. + * + * @throws IOException + */ + public long transferTo(RandomAccessFile out) throws IOException; + } Index: BasicBufferStrategy.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/BasicBufferStrategy.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** BasicBufferStrategy.java 15 Feb 2007 20:59:21 -0000 1.14 --- BasicBufferStrategy.java 15 Feb 2007 22:01:18 -0000 1.15 *************** *** 1,5 **** --- 1,8 ---- package com.bigdata.journal; + import java.io.IOException; + import java.io.RandomAccessFile; import java.nio.ByteBuffer; + import java.nio.channels.FileChannel; import com.bigdata.rawstore.Addr; *************** *** 211,217 **** userExtent = newUserExtent; ! System.err.println("Buffer: newCapacity="+newCapacity); } } --- 214,266 ---- userExtent = newUserExtent; ! System.err.println("Buffer: newCapacity=" + cf.format(newCapacity)); } + public long transferTo(RandomAccessFile out) throws IOException { + + long count = nextOffset; + + final FileChannel outChannel = out.getChannel(); + + // current position on the output channel. + final long toPosition = outChannel.position(); + + if(toPosition + count > Integer.MAX_VALUE) { + + throw new IOException("Index segment exceeds int32 bytes."); + + } + + /* + * use a single nio operation to write all the data onto the output + * channel. + */ + + final long begin = System.currentTimeMillis(); + + // setup the buffer for the operation. + directBuffer.limit(nextOffset); + directBuffer.position(0); + + // write the data. + final long nwritten = outChannel.write(directBuffer); + + if( nwritten != count ) { + + throw new AssertionError("Expected to write " + count + + " bytes but wrote " + nwritten); + + } + + final long elapsed = System.currentTimeMillis() - begin; + + System.err.println("\nTransferred " + count + + " bytes from memory to disk at offset=" + toPosition + " in " + + elapsed + "ms"); + + return count; + + } + } |
From: Bryan T. <tho...@us...> - 2007-02-15 22:01:28
|
Update of /cvsroot/cweb/bigdata/src/java/com/bigdata/objndx In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv24236/src/java/com/bigdata/objndx Modified Files: IndexSegmentBuilder.java Log Message: Modified the IndexSegmentBuilder to use the new TemporaryStore and removed the fullyBuffer boolean option since data will be automatically buffered out to 100M and the spill over onto disk. Index: IndexSegmentBuilder.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/objndx/IndexSegmentBuilder.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** IndexSegmentBuilder.java 9 Feb 2007 16:13:18 -0000 1.23 --- IndexSegmentBuilder.java 15 Feb 2007 22:01:18 -0000 1.24 *************** *** 52,56 **** import java.io.ByteArrayOutputStream; import java.io.File; - import java.io.FileOutputStream; import java.io.IOException; import java.io.ObjectOutputStream; --- 52,55 ---- *************** *** 65,74 **** import com.bigdata.journal.Journal; import com.bigdata.objndx.IndexSegment.CustomAddressSerializer; import com.bigdata.rawstore.Addr; import com.bigdata.rawstore.Bytes; import com.bigdata.rawstore.IRawStore; - import com.bigdata.rawstore.SimpleFileRawStore; - import com.bigdata.rawstore.SimpleMemoryRawStore; /** --- 64,72 ---- import com.bigdata.journal.Journal; + import com.bigdata.journal.TemporaryStore; import com.bigdata.objndx.IndexSegment.CustomAddressSerializer; import com.bigdata.rawstore.Addr; import com.bigdata.rawstore.Bytes; import com.bigdata.rawstore.IRawStore; /** *************** *** 157,175 **** /** - * The temporary file created to hold leaves unless the index build - * operation is fully buffered. When created, this file is deleted - * regardless of the outcome of the operation. - */ - public final File leafFile; - - /** - * The temporary file created to hold nodes unless either (a) the index - * build operation is fully buffered; or (b) the index segment will consist - * of just a root leaf. When created, this file is deleted regardless of the - * outcome of the operation. - */ - public final File nodeFile; - - /** * The file on which the {@link IndexSegment} is written. The file is closed * regardless of the outcome of the operation. --- 155,158 ---- *************** *** 181,185 **** * a region of the {@link #outFile}. */ ! protected Buffer leafBuffer; /** --- 164,168 ---- * a region of the {@link #outFile}. */ ! protected TemporaryStore leafBuffer; /** *************** *** 187,191 **** * a region of the {@link #outFile}. */ ! protected Buffer nodeBuffer; /** --- 170,174 ---- * a region of the {@link #outFile}. */ ! protected TemporaryStore nodeBuffer; /** *************** *** 364,372 **** * @throws IOException * ! * @todo make fullyBuffer, checksum, and record compression parameters in ! * this constructor variant * ! * FIXME test with and without each of these options {fullyBuffer, ! * useChecksum, recordCompressor}. */ public IndexSegmentBuilder(File outFile, File tmpDir, AbstractBTree btree, --- 347,355 ---- * @throws IOException * ! * @todo make checksum, and record compression parameters in this ! * constructor variant * ! * FIXME test with and without each of these options { useChecksum, ! * recordCompressor}. */ public IndexSegmentBuilder(File outFile, File tmpDir, AbstractBTree btree, *************** *** 375,380 **** this(outFile, tmpDir, btree.getEntryCount(), btree.entryIterator(), m, ! btree.nodeSer.valueSerializer, true/* fullyBuffer */, ! false/* useChecksum */, null/*new RecordCompressor()*/, errorRate); } --- 358,363 ---- this(outFile, tmpDir, btree.getEntryCount(), btree.entryIterator(), m, ! btree.nodeSer.valueSerializer, false/* useChecksum */, ! null/* new RecordCompressor() */, errorRate); } *************** *** 403,414 **** * @param valueSerializer * Used to serialize values in the new {@link IndexSegment}. - * @param fullyBuffer - * When true the nodes and leaves will be serialized into memory - * until the build is complete and then batched onto disk - * (faster, but more memory overhead). When false a temporary - * file will be used to store the nodes and the leaves will be - * written to disk as they are populated (slower, but less memory - * overhead). If latency is a concern then specify - * <code>fullyBuffer := true</code>. * @param useChecksum * whether or not checksums are computed for nodes and leaves. --- 386,389 ---- *************** *** 436,440 **** public IndexSegmentBuilder(File outFile, File tmpDir, final int entryCount, IEntryIterator entryIterator, final int m, ! IValueSerializer valueSerializer, boolean fullyBuffer, boolean useChecksum, RecordCompressor recordCompressor, final double errorRate) throws IOException { --- 411,415 ---- public IndexSegmentBuilder(File outFile, File tmpDir, final int entryCount, IEntryIterator entryIterator, final int m, ! IValueSerializer valueSerializer, boolean useChecksum, RecordCompressor recordCompressor, final double errorRate) throws IOException { *************** *** 546,563 **** } - /* - * the temporary file used to buffer leaves if the index build operation - * is not fully buffered. - */ - leafFile = (!fullyBuffer ? File.createTempFile("index", - ".leaves.seg", tmpDir) : null); - - /* - * the temporary file is used if there are nodes to write and the build - * operation is not fully buffered. - */ - nodeFile = (plan.nnodes > 0 && !fullyBuffer ? File.createTempFile( - "index", ".nodes.seg", tmpDir) : null); - final FileChannel outChannel; --- 521,524 ---- *************** *** 588,593 **** * index build operation. */ ! leafBuffer = fullyBuffer ? new MemoryBuffer() : new FileBuffer( ! leafFile, mode); /* --- 549,553 ---- * index build operation. */ ! leafBuffer = new TemporaryStore(); /* *************** *** 599,604 **** * abstraction for a disk file. */ ! nodeBuffer = plan.nnodes > 0 && fullyBuffer ? new MemoryBuffer() ! : plan.nnodes > 0 ? new FileBuffer(nodeFile, mode) : null; /* --- 559,563 ---- * abstraction for a disk file. */ ! nodeBuffer = plan.nnodes > 0 ? new TemporaryStore() : null; /* *************** *** 799,812 **** if (leafBuffer != null && leafBuffer.isOpen()) { try { ! leafBuffer.close(); } catch (Throwable t) { } } - if (leafFile != null) { - if(!leafFile.delete()) { - log.warn("Could not delete temporary file: " - + leafFile.getAbsoluteFile()); - } - } /* --- 758,765 ---- if (leafBuffer != null && leafBuffer.isOpen()) { try { ! leafBuffer.close(); // also deletes the file if any. } catch (Throwable t) { } } /* *************** *** 815,828 **** if (nodeBuffer != null && nodeBuffer.isOpen()) { try { ! nodeBuffer.close(); } catch (Throwable t) { } } - if (nodeFile != null) { - if(!nodeFile.delete()) { - log.warn("Could not delete temporary file: " - + nodeFile.getAbsoluteFile()); - } - } } --- 768,775 ---- if (nodeBuffer != null && nodeBuffer.isOpen()) { try { ! nodeBuffer.close(); // also deletes the file if any. } catch (Throwable t) { } } } *************** *** 1318,1322 **** // Transfer the leaf buffer en mass onto the output channel. ! long count = leafBuffer.transferTo(out); // The offset to the start of the node region. --- 1265,1269 ---- // Transfer the leaf buffer en mass onto the output channel. ! long count = leafBuffer.getBufferStrategy().transferTo(out); // The offset to the start of the node region. *************** *** 1341,1345 **** // transfer the nodes en mass onto the output channel. ! long count = nodeBuffer.transferTo(out); // Close the buffer. --- 1288,1292 ---- // transfer the nodes en mass onto the output channel. ! long count = nodeBuffer.getBufferStrategy().transferTo(out); // Close the buffer. *************** *** 1717,1947 **** } - - /** - * An interface that abstracts the buffering of nodes or leaves. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ - static interface Buffer extends IRawStore { - - /** - * A block operation that transfers the serialized records en mass from - * the buffer onto a file. The buffered records are written "in order" - * starting at the current position on the file. The file is grow if - * necessary. - * - * @param out - * The file to which the buffer contents will be transferred. - * - * @return The #of bytes written. - * - * @throws IOException - */ - public long transferTo(RandomAccessFile out) throws IOException; - - } - - /** - * Note: the comments below really apply to the base - * {@link SimpleFileRawStore} class. - * - * @todo consider the file mode and buffering. We should at least buffer - * several pages of data per write and can experiment with writing - * through (vs caching writes in the OS layer). The file does not need - * to be "live" until it is completely written, so there is no need to - * update file metadata until the end of the build process. - * - * @todo Consider using - * {@link FileOutputStream#FileOutputStream(File, boolean)} to open - * the temporary file in an append only mode and then get the - * {@link FileChannel} from {@link FileOutputStream#getChannel()}. - * Does this improve performance? Can we still read from the channel? - * Try this on the {@link Journal} as well, at least for cases where - * we will never read from the journal (i.e., fully buffered). - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ - static class FileBuffer extends SimpleFileRawStore implements Buffer { - - public FileBuffer(File file, String mode) throws IOException { - - super(file,mode); - - } - - // public long write(ByteBuffer data) { - // - // System.err.println("\nwriting "+data.remaining()+" bytes on buffer."); - // - // return super.write(data); - // - // } - - public long transferTo(RandomAccessFile out) throws IOException { - - final long begin = System.currentTimeMillis(); - - // #of bytes to transfer. - final long count = raf.length(); - - final FileChannel outChannel = out.getChannel(); - - // current position on the output channel. - final long toPosition = outChannel.position(); - - if(toPosition + count > Integer.MAX_VALUE) { - - throw new IOException("Index segment exceeds int32 bytes."); - - } - - /* - * Transfer data from channel to channel. - */ - - final FileChannel tmpChannel = raf.getChannel(); - - // Set the fromPosition on source channel. - tmpChannel.position(0); - - /* - * Extend the output file. This is required at least for some - * circumstances. - */ - out.setLength(toPosition+count); - - /* - * Transfer the data. It is possible that this will take multiple - * writes for at least some implementations. - */ - - // System.err.println("fromPosition="+tmpChannel.position()+", toPosition="+toPosition+", count="+count); - - int nwrites = 0; // #of write operations. - - { - - long n = count; - - long to = toPosition; - - while (n > 0) { - - long nxfer = outChannel.transferFrom(tmpChannel, to, n); - - to += nxfer; - - n -= nxfer; - - nwrites++; - - // // Verify transfer is complete. - // if (nxfer != count) { - // - // throw new IOException("Expected to transfer " + count - // + ", but transferred " + nxfer); - // - // } - - } - - } - - /* - * Update the position on the output channel since transferFrom does - * NOT do this itself. - */ - outChannel.position(toPosition+count); - - final long elapsed = System.currentTimeMillis() - begin; - - System.err.println("\nTransferred " + count - + " bytes from disk channel to disk channel (offset=" - + toPosition + ") in " + nwrites + " writes and " + elapsed - + "ms"); - - return count; - - } - - } - - /** - * Buffers in memory and bulks transfers all buffered data to the output - * channel in a single nio operation (maximum speed, but also maximum memory - * overhead). - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ - static class MemoryBuffer extends SimpleMemoryRawStore implements Buffer { - - // public long write(ByteBuffer data) { - // - // System.err.println("\nwriting "+data.remaining()+" bytes on buffer."); - // - // return super.write(data); - // - // } - - public long transferTo(RandomAccessFile out) throws IOException { - - long count = 0L; - - final int n = records.size(); - - ByteBuffer[] bufs = new ByteBuffer[n]; - - for( int i=0; i<n; i++) { - - final byte[] b = records.get(i); - - count += b.length; - - bufs[i] = ByteBuffer.wrap(b); - - } - - final FileChannel outChannel = out.getChannel(); - - // current position on the output channel. - final long toPosition = outChannel.position(); - - if(toPosition + count > Integer.MAX_VALUE) { - - throw new IOException("Index segment exceeds int32 bytes."); - - } - - /* - * use a single nio operation to write all the data onto the output - * channel. - */ - - final long begin = System.currentTimeMillis(); - - // write the data. - final long nwritten = outChannel.write(bufs); - - if( nwritten != count ) { - - throw new AssertionError("Expected to write " + count - + " bytes but wrote " + nwritten); - - } - - final long elapsed = System.currentTimeMillis() - begin; - - System.err.println("\nTransferred " + count + " bytes in " + n - + " records from memory to disk at offset=" + toPosition - + " in " + elapsed + "ms"); - - return count; - - } - - } } --- 1664,1667 ---- |
From: Bryan T. <tho...@us...> - 2007-02-15 22:01:25
|
Update of /cvsroot/cweb/bigdata/src/java/com/bigdata/isolation In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv24236/src/java/com/bigdata/isolation Modified Files: IsolatedBTree.java UnisolatedBTree.java Log Message: Modified the IndexSegmentBuilder to use the new TemporaryStore and removed the fullyBuffer boolean option since data will be automatically buffered out to 100M and the spill over onto disk. Index: UnisolatedBTree.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/isolation/UnisolatedBTree.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** UnisolatedBTree.java 15 Feb 2007 14:23:50 -0000 1.3 --- UnisolatedBTree.java 15 Feb 2007 22:01:18 -0000 1.4 *************** *** 81,88 **** * @version $Id$ * - * @todo efficient sharing of nodes and leaves for concurrent read-only views - * (stealing children vs wrapping them with a flyweight wrapper; reuse of - * the same btree instance for reading from the same historical state). - * * @see IsolatedBTree, a {@link BTree} that has been isolated by a transaction. * --- 81,84 ---- Index: IsolatedBTree.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/isolation/IsolatedBTree.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** IsolatedBTree.java 15 Feb 2007 01:34:22 -0000 1.2 --- IsolatedBTree.java 15 Feb 2007 22:01:18 -0000 1.3 *************** *** 144,153 **** * enough since just writing the tree onto the store does not make it * restart safe. The {@link Tx} class needs to handle this. - * - * @todo It would be very nice if we could reuse immutable nodes from the - * last committed state of a given btree. However, we can not simply - * use the BTree instance from the global state since intervening - * writes will show up inside of its node set and the view MUST be of - * a historical ground state. */ public IsolatedBTree(IRawStore store, UnisolatedBTree src) { --- 144,147 ---- *************** *** 313,324 **** // and that applies them such that a key deleted in the write set will // not have a value reported from the isolated index. return new FusedView(this,src).rangeIterator(fromKey, toKey); } ! @Override public IEntryIterator entryIterator() { ! // TODO Auto-generated method stub ! return super.entryIterator(); } --- 307,323 ---- // and that applies them such that a key deleted in the write set will // not have a value reported from the isolated index. + return new FusedView(this,src).rangeIterator(fromKey, toKey); } ! /** ! * Returns an ordered fused view of the entries in the key range in this ! * write set merged with those in the key range in the isolated index. ! */ public IEntryIterator entryIterator() { ! ! return new FusedView(this,src).rangeIterator(null,null); ! } *************** *** 396,400 **** * conflict resolution that spans more than a single key-value at a * time. However, we also need to expose the Tx to the conflict resolver ! * for that to work. */ BTree tmp = null; --- 395,399 ---- * conflict resolution that spans more than a single key-value at a * time. However, we also need to expose the Tx to the conflict resolver ! * for that to work (which is why we have not exposed the tx yet). */ BTree tmp = null; |
From: Bryan T. <tho...@us...> - 2007-02-15 22:01:25
|
Update of /cvsroot/cweb/bigdata/src/test/com/bigdata/journal In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv24236/src/test/com/bigdata/journal Modified Files: AbstractBufferStrategyTestCase.java Log Message: Modified the IndexSegmentBuilder to use the new TemporaryStore and removed the fullyBuffer boolean option since data will be automatically buffered out to 100M and the spill over onto disk. Index: AbstractBufferStrategyTestCase.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/test/com/bigdata/journal/AbstractBufferStrategyTestCase.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** AbstractBufferStrategyTestCase.java 9 Feb 2007 16:13:17 -0000 1.1 --- AbstractBufferStrategyTestCase.java 15 Feb 2007 22:01:18 -0000 1.2 *************** *** 53,56 **** --- 53,57 ---- import java.util.Random; + import com.bigdata.objndx.IndexSegmentBuilder; import com.bigdata.rawstore.AbstractRawStoreTestCase; import com.bigdata.rawstore.Bytes; *************** *** 58,61 **** --- 59,67 ---- /** + * + * @todo write tests for + * {@link IBufferStrategy#transferTo(java.io.RandomAccessFile)}. This + * code is currently getting "checked" by the {@link IndexSegmentBuilder}. + * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ |
From: Bryan T. <tho...@us...> - 2007-02-15 22:01:24
|
Update of /cvsroot/cweb/bigdata/src/java/com/bigdata/scaleup In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv24236/src/java/com/bigdata/scaleup Modified Files: PartitionedJournal.java Log Message: Modified the IndexSegmentBuilder to use the new TemporaryStore and removed the fullyBuffer boolean option since data will be automatically buffered out to 100M and the spill over onto disk. Index: PartitionedJournal.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/scaleup/PartitionedJournal.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** PartitionedJournal.java 15 Feb 2007 14:23:50 -0000 1.6 --- PartitionedJournal.java 15 Feb 2007 22:01:19 -0000 1.7 *************** *** 911,916 **** null, mergeItr.nentries, new MergedEntryIterator(mergeItr), mseg, oldIndex.btree.getNodeSerializer() ! .getValueSerializer(), true/* fullyBuffer */, ! false/* useChecksum */, null/* recordCompressor */, 0d/* errorRate */); // close the merged leaf iterator (and release its buffer/file). --- 911,916 ---- null, mergeItr.nentries, new MergedEntryIterator(mergeItr), mseg, oldIndex.btree.getNodeSerializer() ! .getValueSerializer(), false/* useChecksum */, ! null/* recordCompressor */, 0d/* errorRate */); // close the merged leaf iterator (and release its buffer/file). |
From: Bryan T. <tho...@us...> - 2007-02-15 22:01:24
|
Update of /cvsroot/cweb/bigdata/src/test/com/bigdata/isolation In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv24236/src/test/com/bigdata/isolation Modified Files: TestUnisolatedBTree.java Log Message: Modified the IndexSegmentBuilder to use the new TemporaryStore and removed the fullyBuffer boolean option since data will be automatically buffered out to 100M and the spill over onto disk. Index: TestUnisolatedBTree.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/test/com/bigdata/isolation/TestUnisolatedBTree.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** TestUnisolatedBTree.java 15 Feb 2007 14:23:50 -0000 1.3 --- TestUnisolatedBTree.java 15 Feb 2007 22:01:19 -0000 1.4 *************** *** 379,383 **** /** ! * @todo Test restart-safety of data, including deletion markers. */ public void test_restartSafe() { --- 379,383 ---- /** ! * Tests restart-safety of data, including deletion markers. */ public void test_restartSafe() { |
From: Bryan T. <tho...@us...> - 2007-02-15 22:01:24
|
Update of /cvsroot/cweb/bigdata/src/test/com/bigdata/scaleup In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv24236/src/test/com/bigdata/scaleup Modified Files: TestMetadataIndex.java Log Message: Modified the IndexSegmentBuilder to use the new TemporaryStore and removed the fullyBuffer boolean option since data will be automatically buffered out to 100M and the spill over onto disk. Index: TestMetadataIndex.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/test/com/bigdata/scaleup/TestMetadataIndex.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** TestMetadataIndex.java 13 Feb 2007 23:01:12 -0000 1.4 --- TestMetadataIndex.java 15 Feb 2007 22:01:19 -0000 1.5 *************** *** 604,611 **** 100, btree, seg01).merge(); ! new IndexSegmentBuilder(outFile02, null, mergeItr.nentries, new MergedEntryIterator(mergeItr ! ), 100, btree.getNodeSerializer().getValueSerializer(), ! true/* fullyBuffer */, false/* useChecksum */, ! null/*recordCompressor*/, 0d/*errorRate*/); /* --- 604,611 ---- 100, btree, seg01).merge(); ! new IndexSegmentBuilder(outFile02, null, mergeItr.nentries, ! new MergedEntryIterator(mergeItr), 100, btree ! .getNodeSerializer().getValueSerializer(), ! false/* useChecksum */, null/* recordCompressor */, 0d/* errorRate */); /* *************** *** 884,891 **** IndexSegmentBuilder builder = new IndexSegmentBuilder( outFile02, null, mergeItr.nentries, ! new MergedEntryIterator(mergeItr), mseg, ! testData.getNodeSerializer().getValueSerializer(), ! true/* fullyBuffer */, false/* useChecksum */, ! null/* recordCompressor */, 0d/* errorRate */); // close the merged leaf iterator (and release its buffer/file). --- 884,891 ---- IndexSegmentBuilder builder = new IndexSegmentBuilder( outFile02, null, mergeItr.nentries, ! new MergedEntryIterator(mergeItr), mseg, testData ! .getNodeSerializer().getValueSerializer(), ! false/* useChecksum */, null/* recordCompressor */, ! 0d/* errorRate */); // close the merged leaf iterator (and release its buffer/file). |
From: Bryan T. <tho...@us...> - 2007-02-15 20:59:26
|
Update of /cvsroot/cweb/bigdata/src/test/com/bigdata/journal In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv1690/src/test/com/bigdata/journal Modified Files: TestAll.java Added Files: TestTemporaryStore.java Log Message: Added a TemporaryStore that starts in memory and then automatically overflows to disk. --- NEW FILE: TestTemporaryStore.java --- /** The Notice below must appear in each file of the Source Code of any copy you distribute of the Licensed Product. Contributors to any Modifications may add their own copyright notices to identify their own contributions. License: The contents of this file are subject to the CognitiveWeb Open Source License Version 1.1 (the License). You may not copy or use this file, in either source code or executable form, except in compliance with the License. You may obtain a copy of the License from http://www.CognitiveWeb.org/legal/license/ Software distributed under the License is distributed on an AS IS basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. Copyrights: Portions created by or assigned to CognitiveWeb are Copyright (c) 2003-2003 CognitiveWeb. All Rights Reserved. Contact information for CognitiveWeb is available at http://www.CognitiveWeb.org Portions Copyright (c) 2002-2003 Bryan Thompson. Acknowledgements: Special thanks to the developers of the Jabber Open Source License 1.0 (JOSL), from which this License was derived. This License contains terms that differ from JOSL. Special thanks to the CognitiveWeb Open Source Contributors for their suggestions and support of the Cognitive Web. Modifications: */ /* * Created on Feb 15, 2007 */ package com.bigdata.journal; import java.io.File; import java.nio.ByteBuffer; import java.util.Random; import com.bigdata.rawstore.AbstractRawStoreTestCase; import com.bigdata.rawstore.Bytes; import com.bigdata.rawstore.IRawStore; /** * Test suite for {@link TemporaryStore}. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ public class TestTemporaryStore extends AbstractRawStoreTestCase { /** * */ public TestTemporaryStore() { } /** * @param name */ public TestTemporaryStore(String name) { super(name); } protected IRawStore getStore() { return new TemporaryStore(); } /** * Unit test for {@link AbstractBufferStrategy#overflow(int)}. The test * verifies that the extent and the user extent are correctly updated after * an overflow. */ public void test_overflow() { TemporaryStore store = (TemporaryStore) getStore(); AbstractBufferStrategy bufferStrategy = (AbstractBufferStrategy) store .getBufferStrategy(); final long userExtent = bufferStrategy.getUserExtent(); final long extent = bufferStrategy.getExtent(); final long initialExtent = bufferStrategy.getInitialExtent(); final int nextOffset = bufferStrategy.getNextOffset(); assertEquals("extent",initialExtent, extent); final int needed = Bytes.kilobyte32; assertTrue("overflow()", bufferStrategy.overflow(needed)); assertTrue("extent", extent + needed <= bufferStrategy.getExtent()); assertTrue("userExtent", userExtent + needed <= bufferStrategy .getUserExtent()); assertEquals(nextOffset, bufferStrategy.getNextOffset()); store.close(); } /** * Test verifies that a write up to the remaining extent does not trigger an * overflow. */ public void test_writeNoExtend() { TemporaryStore store = (TemporaryStore) getStore(); AbstractBufferStrategy bufferStrategy = (AbstractBufferStrategy) store .getBufferStrategy(); final long userExtent = bufferStrategy.getUserExtent(); final long extent = bufferStrategy.getExtent(); final long initialExtent = bufferStrategy.getInitialExtent(); final int nextOffset = bufferStrategy.getNextOffset(); assertEquals("extent",initialExtent, extent); long remaining = userExtent - nextOffset; assertTrue(remaining<Integer.MAX_VALUE); ByteBuffer tmp = ByteBuffer.allocate((int)remaining); bufferStrategy.write(tmp); // no change in extent. assertEquals("extent",extent, bufferStrategy.getExtent()); // no change in user extent. assertEquals("userExtent",userExtent, bufferStrategy.getUserExtent()); store.close(); } /** * Test verifies that a write over the remaining extent triggers an * overflow. The test also makes sure that the existing data is recoverable * and that the new data is also recoverable (when the buffer is extended it * is typically copied while the length of a file is simply changed). */ public void test_writeWithExtend() { TemporaryStore store = (TemporaryStore) getStore(); AbstractBufferStrategy bufferStrategy = (AbstractBufferStrategy) store .getBufferStrategy(); final long userExtent = bufferStrategy.getUserExtent(); final long extent = bufferStrategy.getExtent(); final long initialExtent = bufferStrategy.getInitialExtent(); final int nextOffset = bufferStrategy.getNextOffset(); assertEquals("extent",initialExtent, extent); /* * now write random bytes that exactly fill the remaining space and * verify that write. */ long remaining = userExtent - nextOffset; assertTrue(remaining<Integer.MAX_VALUE); final byte[] b = new byte[(int)remaining]; Random r = new Random(); r.nextBytes(b); ByteBuffer tmp = ByteBuffer.wrap(b); final long addr = bufferStrategy.write(tmp); // no change in extent. assertEquals("extent",extent, bufferStrategy.getExtent()); // no change in user extent. assertEquals("userExtent",userExtent, bufferStrategy.getUserExtent()); assertEquals(b, bufferStrategy.read(addr, null)); /* * now write some more random bytes forcing an extension of the buffer. * we verify both the original write on the buffer and the new write. * this helps to ensure that data was copied correctly into the extended * buffer. */ final byte[] b2 = new byte[Bytes.kilobyte32]; r.nextBytes(b2); ByteBuffer tmp2 = ByteBuffer.wrap(b2); final long addr2 = bufferStrategy.write(tmp2); // verify extension of buffer. assertTrue("extent", extent + b2.length <= bufferStrategy.getExtent()); // verify extension of buffer. assertTrue("userExtent", userExtent + b2.length <= bufferStrategy .getUserExtent()); // verify data written before we overflowed the buffer. assertEquals(b, bufferStrategy.read(addr, null)); // verify data written after we overflowed the buffer. assertEquals(b2, bufferStrategy.read(addr2, null)); store.close(); } /** * Test that the store transparently overflows onto disk when the maximum * in-memory limit has been exceeded. The test also makes sure that the * existing data is recoverable and that the new data is also recoverable * (when the buffer is extended it is typically copied while the length of a * file is simply changed). Finally, the test makes sure that the temporary * file is deleted when the store is closed. */ public void test_overflowToDisk() { Random r = new Random(); /* * Note: We use a small store for this test to minimize the resource * requirements. This should have no impact on the ability to test * correctness. */ TemporaryStore store = new TemporaryStore(Bytes.kilobyte*10, Bytes.kilobyte * 100, false); // verify that we are using an in-memory buffer. assertTrue(store.getBufferStrategy() instanceof TransientBufferStrategy); { AbstractBufferStrategy bufferStrategy = (AbstractBufferStrategy) store .getBufferStrategy(); final long userExtent = bufferStrategy.getUserExtent(); final long extent = bufferStrategy.getExtent(); final long initialExtent = bufferStrategy.getInitialExtent(); final int nextOffset = bufferStrategy.getNextOffset(); // will be zero for a transient buffer. assertEquals("nextOffset",0,nextOffset); // check the initial extent. assertEquals("extent", store.initialInMemoryExtent, extent); // will be the same for a transient buffer. assertEquals("initialExtent", store.initialInMemoryExtent, initialExtent ); // will be the same for a transient buffer. assertEquals("userExtent", store.initialInMemoryExtent, userExtent ); /* * pre-extend the transient buffer to its maximum capacity. */ bufferStrategy.truncate(store.maximumInMemoryExtent); // verify that we are using an in-memory buffer. assertTrue(store.getBufferStrategy() instanceof TransientBufferStrategy); /* * for the transient store, this gives us exactly that many bytes in * both the user extent and the overall extent (there is no reserved * header). */ assertEquals("extent", store.maximumInMemoryExtent, bufferStrategy .getExtent()); assertEquals("userExtent", store.maximumInMemoryExtent, bufferStrategy.getUserExtent()); } final byte[] b; final long addr; { final long extent = store.getBufferStrategy().getExtent(); final long userExtent = store.getBufferStrategy().getUserExtent(); /* * now write random bytes that exactly fill the remaining space and * verify that write. */ long remaining = userExtent - store.getBufferStrategy().getNextOffset(); assertTrue(remaining < Integer.MAX_VALUE); b = new byte[(int) remaining]; r.nextBytes(b); ByteBuffer tmp = ByteBuffer.wrap(b); addr = store.write(tmp); // verify that we are using an in-memory buffer. assertTrue(store.getBufferStrategy() instanceof TransientBufferStrategy); // no change in extent. assertEquals("extent", extent, store.getBufferStrategy() .getExtent()); // no change in user extent. assertEquals("userExtent", userExtent, store.getBufferStrategy() .getUserExtent()); // verify the data. assertEquals(b, store.read(addr, null)); } /* * Now write some more random bytes forcing an extension of the buffer. * * Note that this will cause the buffer to overflow and convert to a * disk-based buffer. * * We verify both the original write on the buffer and the new write. * this helps to ensure that data was copied correctly into the extended * buffer. */ final byte[] b2 = new byte[Bytes.kilobyte32]; r.nextBytes(b2); ByteBuffer tmp2 = ByteBuffer.wrap(b2); final long addr2 = store.write(tmp2); // verify that we are using an disk-based store. assertTrue(store.getBufferStrategy() instanceof DiskOnlyStrategy); // verify extension of store. assertTrue("extent", store.maximumInMemoryExtent + b2.length <= store .getBufferStrategy().getExtent()); // verify extension of store. assertTrue("userExtent", store.maximumInMemoryExtent + b2.length <= store .getBufferStrategy().getUserExtent()); // verify data written before we overflowed the buffer. assertEquals(b, store.read(addr, null)); // verify data written after we overflowed the buffer. assertEquals(b2, store.read(addr2, null)); // the name of the on-disk file. File file = ((DiskOnlyStrategy)store.getBufferStrategy()).getFile(); // verify that it exists. assertTrue(file.exists()); // close the store. store.close(); // verify that the file is gone. assertFalse(file.exists()); } } Index: TestAll.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/test/com/bigdata/journal/TestAll.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** TestAll.java 13 Feb 2007 23:01:01 -0000 1.6 --- TestAll.java 15 Feb 2007 20:59:21 -0000 1.7 *************** *** 83,86 **** --- 83,88 ---- suite.addTestSuite( TestRootBlockView.class ); + + suite.addTestSuite( TestTemporaryStore.class ); suite.addTest( TestTransientJournal.suite() ); |
From: Bryan T. <tho...@us...> - 2007-02-15 20:59:26
|
Update of /cvsroot/cweb/bigdata/src/java/com/bigdata/rawstore In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv1690/src/java/com/bigdata/rawstore Modified Files: Addr.java SimpleMemoryRawStore.java SimpleFileRawStore.java Log Message: Added a TemporaryStore that starts in memory and then automatically overflows to disk. Index: SimpleFileRawStore.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/rawstore/SimpleFileRawStore.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** SimpleFileRawStore.java 15 Feb 2007 14:23:49 -0000 1.2 --- SimpleFileRawStore.java 15 Feb 2007 20:59:21 -0000 1.3 *************** *** 55,58 **** --- 55,60 ---- import java.util.Set; + import com.bigdata.journal.TemporaryStore; + /** *************** *** 61,64 **** --- 63,70 ---- * must code the offset into the file using {@link Addr#toLong(int, int)}. * + * @see {@link TemporaryStore}, which provides a more solution for temporary + * data that begins with the benefits of a memory-resident buffer and then + * converts to a disk-based store on overflow. + * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ Index: Addr.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/rawstore/Addr.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** Addr.java 5 Feb 2007 18:17:42 -0000 1.1 --- Addr.java 15 Feb 2007 20:59:21 -0000 1.2 *************** *** 54,64 **** /** * An address encodes both an int32 length and an int32 offset into a single ! * long integer. This limits the addressable size of a file to int32 bytes, but ! * that limit far exceeds the envisoned capacity of a single file in the bigdata ! * architecture. Note that the long integer ZERO (0L) is reserved and always has ! * the semantics of a <em>null</em> reference. Writes at offset zero are ! * allowed, depending on the store, by writes of zero length are disallowed and ! * hence no address will ever be ZERO (0L). ! * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ --- 54,65 ---- /** * An address encodes both an int32 length and an int32 offset into a single ! * long integer. This limits the addressable size of a file to int32 bytes ! * (actually, only 2^31 bytes, e.g., 2G, since Java is using signed integers), ! * but that limit far exceeds the envisoned capacity of a single file in the ! * bigdata architecture. Note that the long integer ZERO (0L) is reserved and ! * always has the semantics of a <em>null</em> reference. Writes at offset ! * zero are allowed, depending on the store, by writes of zero length are ! * disallowed and hence no address will ever be ZERO (0L). ! * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ *************** *** 67,71 **** * containing its own address space. A prime candidate for this is the * {@link IndexSegmentBuilder} which currently jumps through hoops in ! * order to make the nodes resolvable. When considering segments, note * that addresses may currently be directly tested for order since the * offset is in the high int32 word. --- 68,72 ---- * containing its own address space. A prime candidate for this is the * {@link IndexSegmentBuilder} which currently jumps through hoops in ! * order to make the nodes resolvable. When considering segments, note * that addresses may currently be directly tested for order since the * offset is in the high int32 word. Index: SimpleMemoryRawStore.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/rawstore/SimpleMemoryRawStore.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** SimpleMemoryRawStore.java 9 Feb 2007 16:13:17 -0000 1.1 --- SimpleMemoryRawStore.java 15 Feb 2007 20:59:21 -0000 1.2 *************** *** 53,56 **** --- 53,57 ---- import java.util.Map; + import com.bigdata.journal.TemporaryStore; /** *************** *** 58,61 **** --- 59,65 ---- * buffered in memory. The writes are stored in an {@link ArrayList}. * + * @see {@link TemporaryStore}, which provides a more scalable solution for temporary + * data. + * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ |
From: Bryan T. <tho...@us...> - 2007-02-15 20:59:25
|
Update of /cvsroot/cweb/bigdata/src/java/com/bigdata/journal In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv1690/src/java/com/bigdata/journal Modified Files: AbstractBufferStrategy.java FileMetadata.java Tx.java BasicBufferStrategy.java DiskOnlyStrategy.java Added Files: TemporaryStore.java OverflowException.java Log Message: Added a TemporaryStore that starts in memory and then automatically overflows to disk. Index: DiskOnlyStrategy.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/DiskOnlyStrategy.java,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** DiskOnlyStrategy.java 9 Feb 2007 16:13:18 -0000 1.15 --- DiskOnlyStrategy.java 15 Feb 2007 20:59:21 -0000 1.16 *************** *** 61,64 **** --- 61,76 ---- private boolean open; + public File getFile() { + + return file; + + } + + // public FileChannel getFileChannel() { + // + // return channel; + // + // } + DiskOnlyStrategy(long maximumExtent, FileMetadata fileMetadata) { *************** *** 255,259 **** if (!overflow((int) needed)) { ! throw new RuntimeException("overflow"); } --- 267,271 ---- if (!overflow((int) needed)) { ! throw new OverflowException(); } Index: AbstractBufferStrategy.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/AbstractBufferStrategy.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** AbstractBufferStrategy.java 9 Feb 2007 16:13:18 -0000 1.8 --- AbstractBufferStrategy.java 15 Feb 2007 20:59:21 -0000 1.9 *************** *** 101,106 **** */ public boolean overflow(int needed) { ! if( getUserExtent() +needed > Integer.MAX_VALUE) { // Would overflow int32 bytes. --- 101,110 ---- */ public boolean overflow(int needed) { + + final long userExtent = getUserExtent(); ! final long required = userExtent + needed; ! ! if ( required > Integer.MAX_VALUE) { // Would overflow int32 bytes. *************** *** 110,117 **** } /* * Increase by the initial extent or by 32M, whichever is greater. */ ! long newExtent = getUserExtent() + Math.max(initialExtent, Bytes.megabyte * 32); --- 114,129 ---- } + if( required > maximumExtent ) { + + // Would exceed the maximum extent. + + return false; + + } + /* * Increase by the initial extent or by 32M, whichever is greater. */ ! long newExtent = userExtent + Math.max(initialExtent, Bytes.megabyte * 32); *************** *** 124,128 **** /* ! * Extent the capacity. */ truncate( newExtent ); --- 136,140 ---- /* ! * Extend the capacity. */ truncate( newExtent ); Index: FileMetadata.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/FileMetadata.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** FileMetadata.java 8 Feb 2007 21:32:10 -0000 1.10 --- FileMetadata.java 15 Feb 2007 20:59:21 -0000 1.11 *************** *** 91,95 **** * journal. */ ! final int headerSize0 = SIZE_MAGIC + SIZE_VERSION + (SIZEOF_ROOT_BLOCK * 2); /** --- 91,95 ---- * journal. */ ! static final int headerSize0 = SIZE_MAGIC + SIZE_VERSION + (SIZEOF_ROOT_BLOCK * 2); /** *************** *** 116,120 **** * The unique segment identifier. * @param file ! * The name of the file to be opened. * @param bufferMode * The {@link BufferMode}. --- 116,122 ---- * The unique segment identifier. * @param file ! * The name of the file to be opened - when null, a file with a ! * unique name will be created in the default temporary ! * directory. * @param bufferMode * The {@link BufferMode}. *************** *** 140,149 **** * journal. */ ! FileMetadata(int segmentId, File file, BufferMode bufferMode, boolean useDirectBuffers, ! long initialExtent, boolean create, boolean readOnly, ! ForceEnum forceWrites) throws RuntimeException { ! if (file == null) ! throw new IllegalArgumentException(); if (bufferMode == null) --- 142,151 ---- * journal. */ ! FileMetadata(int segmentId, File file, BufferMode bufferMode, ! boolean useDirectBuffers, long initialExtent, boolean create, ! boolean readOnly, ForceEnum forceWrites) throws RuntimeException { ! // if (file == null) ! // throw new IllegalArgumentException(); if (bufferMode == null) *************** *** 175,180 **** this.segment = segmentId; - this.file = file; - this.bufferMode = bufferMode; --- 177,180 ---- *************** *** 183,187 **** this.readOnly = readOnly; ! exists = file.exists(); if (exists) { --- 183,187 ---- this.readOnly = readOnly; ! this.exists = file != null && file.exists(); if (exists) { *************** *** 208,215 **** } System.err.println("Will create file: " + file.getAbsoluteFile()); } ! try { --- 208,231 ---- } + if (file == null) { + + try { + + file = File.createTempFile("bigdata", ".store"); + + } catch (IOException ex) { + + throw new RuntimeException(ex); + + } + + } + System.err.println("Will create file: " + file.getAbsoluteFile()); } ! ! this.file = file; ! try { Index: Tx.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/Tx.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** Tx.java 15 Feb 2007 14:23:49 -0000 1.26 --- Tx.java 15 Feb 2007 20:59:21 -0000 1.27 *************** *** 58,62 **** import com.bigdata.objndx.IndexSegment; import com.bigdata.rawstore.IRawStore; - import com.bigdata.rawstore.SimpleMemoryRawStore; import com.bigdata.scaleup.MetadataIndex; import com.bigdata.scaleup.PartitionedIndex; --- 58,61 ---- *************** *** 147,163 **** /** ! * A store used to hold write sets for the transaction. The same store can ! * be used to buffer resolved write-write conflicts. ! * ! * @todo This uses a memory-based store to avoid issues with maintaining ! * transactions across journal boundaries. This could be improved on ! * trivially by transparently promoting the store from memory-based to ! * disk-based if it overflows some set maximum capacity. In such a ! * scenario, the file backing the on disk store would be flagged for ! * deletion on exit of the JVM and allocated in a temporary directory.<br> ! * A further improvement would allow transactions to use partitioned ! * indices. */ ! final private IRawStore tmpStore = new SimpleMemoryRawStore(); /** --- 146,161 ---- /** ! * A store used to hold write sets for the transaction. The same store can ! * be used to buffer resolved write-write conflicts. This uses a ! * {@link TemporaryStore} to avoid issues with maintaining transactions ! * across journal boundaries. This places a limit on transactions of 2G in ! * their serialized write set. Since the indices use a copy-on-write model, ! * the amount of user data can be significantly less due to multiple ! * versions of the same btree nodes. Using smaller branching factors in the ! * isolated index helps significantly to increase the effective utilization ! * of the store since copy-on-write causes fewer bytes to be copied each ! * time it is invoked. */ ! final private IRawStore tmpStore = new TemporaryStore(); /** --- NEW FILE: TemporaryStore.java --- /** The Notice below must appear in each file of the Source Code of any copy you distribute of the Licensed Product. Contributors to any Modifications may add their own copyright notices to identify their own contributions. License: The contents of this file are subject to the CognitiveWeb Open Source License Version 1.1 (the License). You may not copy or use this file, in either source code or executable form, except in compliance with the License. You may obtain a copy of the License from http://www.CognitiveWeb.org/legal/license/ Software distributed under the License is distributed on an AS IS basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. Copyrights: Portions created by or assigned to CognitiveWeb are Copyright (c) 2003-2003 CognitiveWeb. All Rights Reserved. Contact information for CognitiveWeb is available at http://www.CognitiveWeb.org Portions Copyright (c) 2002-2003 Bryan Thompson. Acknowledgements: Special thanks to the developers of the Jabber Open Source License 1.0 (JOSL), from which this License was derived. This License contains terms that differ from JOSL. Special thanks to the CognitiveWeb Open Source Contributors for their suggestions and support of the Cognitive Web. Modifications: */ /* * Created on Feb 15, 2007 */ package com.bigdata.journal; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import com.bigdata.rawstore.Addr; import com.bigdata.rawstore.Bytes; import com.bigdata.rawstore.IRawStore; /** * A non-restart-safe store for temporary data that buffers data in memory until * a maximum capacity has been reached and then converts to a disk-based store * with a maximum capacity of 2G. The maximum capacity constraint is imposed by * {@link Addr}. On conversion to a disk-backed store, the disk file is created * using the temporary file mechansism and is marked for eventual deletion no * later than when the JVM exits and as soon as the store is {@link #close()}. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ * * @todo The {@link TemporaryStore} would benefit from any caching or AIO * solutions developed for the {@link DiskOnlyStrategy}. */ public class TemporaryStore implements IRawStore { /** * The initial size of the in-memory buffer. This buffer will grow as * necessary until {@link #maximumInMemoryExtent} at which point the store * will convert over to a disk-based mechanism. */ public final long initialInMemoryExtent; /** * The maximum capacity of the in-memory buffer. */ public final long maximumInMemoryExtent; /** * Whether or not a direct buffer was used for the in-memory store (not * recommended). */ public final boolean useDirectBuffers; private boolean open = true; private IBufferStrategy buf; public IBufferStrategy getBufferStrategy() { return buf; } /** * Create a {@link TemporaryStore} with an initial in-memory capacity of 10M * that will grow up to 100M before converting into a disk-based store * backed by a temporary file. */ public TemporaryStore() { this( Bytes.megabyte*10, Bytes.megabyte*100, false ); } /** * Create a {@link TemporaryStore} with the specified configuration. * * @param initialInMemoryExtent * The initial size of the in-memory buffer. This buffer will * grow as necessary until <i>maximumInMemoryExtent</i> at which * point the store will convert over to a disk-based mechanism. * @param maximumInMemoryExtent * The maximum capacity of the in-memory buffer. The actual * maximum may differ slightly based on the buffer growth policy. * @param useDirectBuffers * Whether or not the in-memory buffer will be direct. The use of * a direct buffer here is NOT recommended. */ public TemporaryStore(long initialInMemoryExtent, long maximumInMemoryExtent, boolean useDirectBuffers) { buf = new TransientBufferStrategy(initialInMemoryExtent, maximumInMemoryExtent, useDirectBuffers); this.initialInMemoryExtent = initialInMemoryExtent; this.maximumInMemoryExtent = maximumInMemoryExtent; this.useDirectBuffers = useDirectBuffers; } /** * Close the store and delete the associated file, if any. */ public void close() { if(!open) throw new IllegalStateException(); open = false; buf.close(); buf.deleteFile(); } public void force(boolean metadata) { if(!open) throw new IllegalStateException(); buf.force(metadata); } public boolean isOpen() { return open; } /** * Always returns <code>false</code> since the store will be deleted as soon * as it is closed. */ public boolean isStable() { return false; } public ByteBuffer read(long addr, ByteBuffer dst) { if(!open) throw new IllegalStateException(); return buf.read(addr, dst); } public long write(ByteBuffer data) { if(!open) throw new IllegalStateException(); try { return buf.write(data); } catch(OverflowException ex) { if(buf instanceof TransientBufferStrategy) { overflowToDisk(); return buf.write(data); } else { throw ex; } } } protected void overflowToDisk() { TransientBufferStrategy tmp = (TransientBufferStrategy)buf; int segmentId = 0; File file = null; // request a unique filename. /* * Set the initial extent to be large enough for the root blocks plus * twice the data in the in-memory buffer. */ long initialExtent = FileMetadata.headerSize0 + tmp.getUserExtent() * 2; final boolean create = true; final boolean readOnly = false; // Do not force writes since the store is not restart safe. final ForceEnum forceWrites = ForceEnum.No; /* Create a unique store file and setup the root blocks. The file * will be pre-extended to the requested initialExtent. */ FileMetadata fileMetadata = new FileMetadata(segmentId, file, BufferMode.Disk, useDirectBuffers, initialExtent, create, readOnly, forceWrites); // Mark the file for deletion on exit. fileMetadata.file.deleteOnExit(); // Open the disk-based store file. DiskOnlyStrategy diskBuf = new DiskOnlyStrategy(Bytes.gigabyte * 2, fileMetadata); try { /* * Transfer the data from the in-memory buffer to the disk. The * write begins immediately after the file header. */ // setup the transfer source. ByteBuffer b = tmp.directBuffer; b.limit(tmp.nextOffset); b.position(0); // write the data on the channel. diskBuf.channel.write(b,diskBuf.headerSize); // increment the offset. diskBuf.nextOffset += tmp.nextOffset; } catch(IOException ex) { try { diskBuf.close(); } catch (Throwable ex2) { } throw new RuntimeException(); } this.buf = diskBuf; } /** * Return a temporary file for use by the store. The file will be * automatically deleted if the JVM exits. * * @return A temporary file for use by the store. */ protected File getTempFile() { try { File file = File.createTempFile("transientOverflow", ".store"); file.deleteOnExit(); return file; } catch(IOException ex) { throw new RuntimeException(ex); } } } Index: BasicBufferStrategy.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/BasicBufferStrategy.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** BasicBufferStrategy.java 9 Feb 2007 16:13:18 -0000 1.13 --- BasicBufferStrategy.java 15 Feb 2007 20:59:21 -0000 1.14 *************** *** 91,97 **** if (needed > 0) { ! if(!overflow((int)needed)) { ! throw new RuntimeException("overflow"); } --- 91,97 ---- if (needed > 0) { ! if (!overflow((int) needed)) { ! throw new OverflowException(); } *************** *** 164,170 **** } - /** - * FIXME write tests of this method. - */ public void truncate(long newExtent) { --- 164,167 ---- --- NEW FILE: OverflowException.java --- /** The Notice below must appear in each file of the Source Code of any copy you distribute of the Licensed Product. Contributors to any Modifications may add their own copyright notices to identify their own contributions. License: The contents of this file are subject to the CognitiveWeb Open Source License Version 1.1 (the License). You may not copy or use this file, in either source code or executable form, except in compliance with the License. You may obtain a copy of the License from http://www.CognitiveWeb.org/legal/license/ Software distributed under the License is distributed on an AS IS basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. Copyrights: Portions created by or assigned to CognitiveWeb are Copyright (c) 2003-2003 CognitiveWeb. All Rights Reserved. Contact information for CognitiveWeb is available at http://www.CognitiveWeb.org Portions Copyright (c) 2002-2003 Bryan Thompson. Acknowledgements: Special thanks to the developers of the Jabber Open Source License 1.0 (JOSL), from which this License was derived. This License contains terms that differ from JOSL. Special thanks to the CognitiveWeb Open Source Contributors for their suggestions and support of the Cognitive Web. Modifications: */ /* * Created on Feb 15, 2007 */ package com.bigdata.journal; /** * An instance of this class is thrown if an * {@link AbstractBufferStrategy#overflow(int)} request is denied. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ public class OverflowException extends RuntimeException { /** * */ private static final long serialVersionUID = -8511505622215579950L; /** * */ public OverflowException() { } /** * @param message */ public OverflowException(String message) { super(message); } /** * @param cause */ public OverflowException(Throwable cause) { super(cause); } /** * @param message * @param cause */ public OverflowException(String message, Throwable cause) { super(message, cause); } } |
From: Bryan T. <tho...@us...> - 2007-02-15 14:23:57
|
Update of /cvsroot/cweb/bigdata/src/test/com/bigdata/objndx In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv7688/src/test/com/bigdata/objndx Modified Files: TestIterators.java Log Message: Added support for filtering and resolving on EntryIterator and worked through most of the test suite for UnsisolatedBTree. Index: TestIterators.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/test/com/bigdata/objndx/TestIterators.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** TestIterators.java 26 Jan 2007 02:39:25 -0000 1.6 --- TestIterators.java 15 Feb 2007 14:23:50 -0000 1.7 *************** *** 50,53 **** --- 50,55 ---- import org.apache.log4j.Level; + import com.bigdata.objndx.EntryIterator.EntryFilter; + /** * Test suite for iterators. The tests are presented from the least dependencies *************** *** 467,469 **** --- 469,543 ---- } + /** + * Test the use of an {@link EntryFilter} to visit only certain values. + */ + public void test_entryFilter() { + + final byte[] k3 = i2k(3); + final byte[] k5 = i2k(5); + final byte[] k7 = i2k(7); + + final SimpleEntry v3 = new SimpleEntry(3); + final SimpleEntry v5 = new SimpleEntry(5); + final SimpleEntry v7 = new SimpleEntry(7); + + BTree btree = getBTree(3); + + btree.insert(k3,v3); + btree.insert(k5,v5); + btree.insert(k7,v7); + + final Leaf a = (Leaf) btree.root; + + // visit everything in the root leaf. + assertSameIterator(new Object[]{v3,v5,v7},btree.entryIterator()); + + // visit everything in the root leaf. + assertSameIterator(new Object[]{v3,v5,v7},btree.rangeIterator(null,null)); + + // visit everything in the root leaf using an explicit EntryIterator ctor. + assertSameIterator(new Object[]{v3,v5,v7},new EntryIterator(a,null,null,null,null)); + + // visit everything exception v3. + assertSameIterator(new Object[]{v5,v7},new EntryIterator(a,null,null,null,new EntryFilter() { + private static final long serialVersionUID = 1L; + public boolean isValid(Object value) { + if (value.equals(v3)) + return false; + return true; + } + })); + + // visit everything exception v5. + assertSameIterator(new Object[]{v3,v7},new EntryIterator(a,null,null,null,new EntryFilter() { + private static final long serialVersionUID = 1L; + public boolean isValid(Object value) { + if (value.equals(v5)) + return false; + return true; + } + })); + + // visit everything exception v7. + assertSameIterator(new Object[]{v3,v5},new EntryIterator(a,null,null,null,new EntryFilter() { + private static final long serialVersionUID = 1L; + public boolean isValid(Object value) { + if (value.equals(v7)) + return false; + return true; + } + })); + + // visit everything exception v7 using a rangeIterator. + assertSameIterator(new Object[]{v3,v5},a.rangeIterator(null,null,new EntryFilter() { + private static final long serialVersionUID = 1L; + public boolean isValid(Object value) { + if (value.equals(v7)) + return false; + return true; + } + })); + + } + } |
From: Bryan T. <tho...@us...> - 2007-02-15 14:23:57
|
Update of /cvsroot/cweb/bigdata/src/test/com/bigdata/isolation In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv7688/src/test/com/bigdata/isolation Modified Files: TestUnisolatedBTree.java Log Message: Added support for filtering and resolving on EntryIterator and worked through most of the test suite for UnsisolatedBTree. Index: TestUnisolatedBTree.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/test/com/bigdata/isolation/TestUnisolatedBTree.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** TestUnisolatedBTree.java 15 Feb 2007 01:34:23 -0000 1.2 --- TestUnisolatedBTree.java 15 Feb 2007 14:23:50 -0000 1.3 *************** *** 48,54 **** --- 48,56 ---- package com.bigdata.isolation; + import com.bigdata.objndx.AbstractBTree; import com.bigdata.objndx.AbstractBTreeTestCase; import com.bigdata.objndx.BTreeMetadata; import com.bigdata.objndx.IBatchOp; + import com.bigdata.objndx.IRangeQuery; import com.bigdata.rawstore.IRawStore; import com.bigdata.rawstore.SimpleMemoryRawStore; *************** *** 59,75 **** * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ - * - * @todo test the batch apis. all methods must work with {@link Value}s (the - * test for this could be a test of the - * {@link IBatchOp#apply(com.bigdata.objndx.ISimpleBTree)} implementations - * in the btree package since we apply that method in a trivial manner to - * support the batch api. - * - * @todo test the rangeCount and rangeIterator api. the former will over - * estimate if there are deleted entries while the latter must not visit - * deleted entries (or may it -- we will need to see them for mergeDown() - * and validate())? - * - * @todo test entryIterator() - it visits only those that are not deleted. */ public class TestUnisolatedBTree extends AbstractBTreeTestCase { --- 61,64 ---- *************** *** 276,288 **** } ! public void test_crud_batchApi() { fail("write test"); } public void test_restartSafe() { ! fail("write test"); } --- 265,462 ---- } ! ! /** ! * Tests the {@link IRangeQuery} interface and ! * {@link AbstractBTree#entryIterator()}. ! * {@link IRangeQuery#rangeCount(byte[], byte[])} will over estimate if ! * there are deleted entries while the iterators MUST NOT visit deleted ! * entries. ! */ ! public void test_rangeQueryApi() { ! ! final byte[] k3 = new byte[]{3}; ! final byte[] k5 = new byte[]{5}; ! final byte[] k7 = new byte[]{7}; ! ! final byte[] v3 = new byte[]{3}; ! final byte[] v5 = new byte[]{5}; ! final byte[] v7 = new byte[]{7}; ! ! final byte[] v5a = new byte[]{5,1}; ! ! UnisolatedBTree btree = new UnisolatedBTree(new SimpleMemoryRawStore(), ! 3, null); ! ! /* ! * fill the root leaf. ! */ ! btree.insert(k3,v3); ! btree.insert(k5,v5); ! btree.insert(k7,v7); ! ! assertEquals(3,btree.getEntryCount()); ! assertEquals(3,btree.rangeCount(null, null)); ! assertEquals(2,btree.rangeCount(k3, k7)); ! assertEquals(1,btree.rangeCount(k3, k5)); ! assertEquals(1,btree.rangeCount(null, k5)); ! assertEquals(0,btree.rangeCount(null, k3)); ! assertEquals(0,btree.rangeCount(k5, k5)); ! assertEquals(1,btree.rangeCount(k5, k7)); ! assertEquals(2,btree.rangeCount(k5, null)); ! assertSameIterator(new Object[]{v3,v5,v7},btree.entryIterator()); ! assertSameIterator(new Object[]{v3,v5,v7},btree.rangeIterator(null,null)); ! assertSameIterator(new Object[]{v3,v5},btree.rangeIterator(k3,k7)); ! assertSameIterator(new Object[]{v3},btree.rangeIterator(k3,k5)); ! assertSameIterator(new Object[]{v3},btree.rangeIterator(null,k5)); ! assertSameIterator(new Object[]{},btree.rangeIterator(null,k3)); ! assertSameIterator(new Object[]{},btree.rangeIterator(k5,k5)); ! assertSameIterator(new Object[]{v5},btree.rangeIterator(k5,k7)); ! assertSameIterator(new Object[]{v5,v7},btree.rangeIterator(k5,null)); ! ! /* ! * delete one entry. this will not change the rangeCounts, but it does ! * effect the iterators which MUST NOT visit the deleted value. ! */ ! btree.remove(k5); ! ! assertEquals(3,btree.getEntryCount()); ! assertEquals(3,btree.rangeCount(null, null)); ! assertEquals(2,btree.rangeCount(k3, k7)); ! assertEquals(1,btree.rangeCount(k3, k5)); ! assertEquals(1,btree.rangeCount(null, k5)); ! assertEquals(0,btree.rangeCount(null, k3)); ! assertEquals(0,btree.rangeCount(k5, k5)); ! assertEquals(1,btree.rangeCount(k5, k7)); ! assertEquals(2,btree.rangeCount(k5, null)); ! assertSameIterator(new Object[]{v3,v7},btree.entryIterator()); ! assertSameIterator(new Object[]{v3,v7},btree.rangeIterator(null,null)); ! assertSameIterator(new Object[]{v3},btree.rangeIterator(k3,k7)); ! assertSameIterator(new Object[]{v3},btree.rangeIterator(k3,k5)); ! assertSameIterator(new Object[]{v3},btree.rangeIterator(null,k5)); ! assertSameIterator(new Object[]{},btree.rangeIterator(null,k3)); ! assertSameIterator(new Object[]{},btree.rangeIterator(k5,k5)); ! assertSameIterator(new Object[]{},btree.rangeIterator(k5,k7)); ! assertSameIterator(new Object[]{v7},btree.rangeIterator(k5,null)); ! ! /* ! * re-insert the deleted value. ! */ ! btree.insert(k5,v5a); ! ! assertEquals(3,btree.getEntryCount()); ! assertEquals(3,btree.rangeCount(null, null)); ! assertEquals(2,btree.rangeCount(k3, k7)); ! assertEquals(1,btree.rangeCount(k3, k5)); ! assertEquals(1,btree.rangeCount(null, k5)); ! assertEquals(0,btree.rangeCount(null, k3)); ! assertEquals(0,btree.rangeCount(k5, k5)); ! assertEquals(1,btree.rangeCount(k5, k7)); ! assertEquals(2,btree.rangeCount(k5, null)); ! assertSameIterator(new Object[]{v3,v5a,v7},btree.entryIterator()); ! assertSameIterator(new Object[]{v3,v5a,v7},btree.rangeIterator(null,null)); ! assertSameIterator(new Object[]{v3,v5a},btree.rangeIterator(k3,k7)); ! assertSameIterator(new Object[]{v3},btree.rangeIterator(k3,k5)); ! assertSameIterator(new Object[]{v3},btree.rangeIterator(null,k5)); ! assertSameIterator(new Object[]{},btree.rangeIterator(null,k3)); ! assertSameIterator(new Object[]{},btree.rangeIterator(k5,k5)); ! assertSameIterator(new Object[]{v5a},btree.rangeIterator(k5,k7)); ! assertSameIterator(new Object[]{v5a,v7},btree.rangeIterator(k5,null)); ! ! } ! ! /** ! * @todo test the batch apis. all methods must work with {@link Value}s ! * (the test for this could be a test of the ! * {@link IBatchOp#apply(com.bigdata.objndx.ISimpleBTree)} ! * implementations in the btree package since we apply that method in ! * a trivial manner to support the batch api. ! */ public void test_crud_batchApi() { fail("write test"); + } + /** + * @todo Test restart-safety of data, including deletion markers. + */ public void test_restartSafe() { ! IRawStore store = new SimpleMemoryRawStore(); ! ! final byte[] k3 = new byte[]{3}; ! final byte[] k5 = new byte[]{5}; ! final byte[] k7 = new byte[]{7}; ! ! final byte[] v3 = new byte[]{3}; ! final byte[] v5 = new byte[]{5}; ! final byte[] v7 = new byte[]{7}; ! ! final byte[] v5a = new byte[]{5,1}; ! final byte[] v7a = new byte[]{7,1}; ! ! UnisolatedBTree btree = new UnisolatedBTree(store, 3, null); ! ! /* ! * fill the root leaf. ! */ ! btree.insert(k3,v3); ! btree.insert(k5,v5); ! btree.insert(k7,v7); ! ! assertSameIterator(new Object[]{v3,v5,v7},btree.entryIterator()); ! ! /* ! * write out the btree and re-load it. ! */ ! final long addr1 = btree.write(); ! ! btree = new UnisolatedBTree(store,BTreeMetadata.read(store, addr1)); ! ! assertSameIterator(new Object[]{v3,v5,v7},btree.entryIterator()); ! ! /* ! * delete a key, verify, write out the btree, re-load it and re-verify. ! */ ! ! btree.remove(k5); ! ! assertSameIterator(new Object[]{v3,v7},btree.entryIterator()); ! ! final long addr2 = btree.write(); ! ! btree = new UnisolatedBTree(store,BTreeMetadata.read(store, addr2)); ! ! assertSameIterator(new Object[]{v3,v7},btree.entryIterator()); ! ! /* ! * update a key, verify, write out the btree, re-load it and re-verify. ! */ ! ! btree.insert(k7,v7a); ! ! assertSameIterator(new Object[]{v3,v7a},btree.entryIterator()); ! ! final long addr3 = btree.write(); ! ! btree = new UnisolatedBTree(store,BTreeMetadata.read(store, addr3)); ! ! assertSameIterator(new Object[]{v3,v7a},btree.entryIterator()); ! ! /* ! * update a deleted key, verify, write out the btree, re-load it and ! * re-verify. ! */ ! ! btree.insert(k5,v5a); ! ! assertSameIterator(new Object[]{v3,v5a,v7a},btree.entryIterator()); ! ! final long addr4 = btree.write(); ! ! btree = new UnisolatedBTree(store,BTreeMetadata.read(store, addr4)); ! ! assertSameIterator(new Object[]{v3,v5a,v7a},btree.entryIterator()); ! } |
From: Bryan T. <tho...@us...> - 2007-02-15 14:23:54
|
Update of /cvsroot/cweb/bigdata/src/java/com/bigdata/journal In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv7688/src/java/com/bigdata/journal Modified Files: Tx.java Options.java Journal.java Log Message: Added support for filtering and resolving on EntryIterator and worked through most of the test suite for UnsisolatedBTree. Index: Options.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/Options.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** Options.java 12 Feb 2007 21:51:07 -0000 1.7 --- Options.java 15 Feb 2007 14:23:49 -0000 1.8 *************** *** 1,2 **** --- 1,45 ---- + /** + + The Notice below must appear in each file of the Source Code of any + copy you distribute of the Licensed Product. Contributors to any + Modifications may add their own copyright notices to identify their + own contributions. + + License: + + The contents of this file are subject to the CognitiveWeb Open Source + License Version 1.1 (the License). You may not copy or use this file, + in either source code or executable form, except in compliance with + the License. You may obtain a copy of the License from + + http://www.CognitiveWeb.org/legal/license/ + + Software distributed under the License is distributed on an AS IS + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + Copyrights: + + Portions created by or assigned to CognitiveWeb are Copyright + (c) 2003-2003 CognitiveWeb. All Rights Reserved. Contact + information for CognitiveWeb is available at + + http://www.CognitiveWeb.org + + Portions Copyright (c) 2002-2003 Bryan Thompson. + + Acknowledgements: + + Special thanks to the developers of the Jabber Open Source License 1.0 + (JOSL), from which this License was derived. This License contains + terms that differ from JOSL. + + Special thanks to the CognitiveWeb Open Source Contributors for their + suggestions and support of the Cognitive Web. + + Modifications: + + */ package com.bigdata.journal; *************** *** 4,7 **** --- 47,51 ---- import java.util.Properties; + import com.bigdata.objndx.IndexSegment; import com.bigdata.rawstore.Bytes; *************** *** 60,67 **** /** * <code>maximumExtent</code> - The maximum extent of the journal (bytes). ! * Once journal will "overflow" once it approaches this limit during a ! * {@link #commit()}. The default implementation ignores overflow events. A ! * scale up or scale out implementation will use this event as a trigger to ! * evict data from application btrees into index segments. * * @see #DEFAULT_MAXIMUM_EXTENT --- 104,112 ---- /** * <code>maximumExtent</code> - The maximum extent of the journal (bytes). ! * The journal will {@link Journal#overflow()} once it approaches this limit ! * during a {@link Journal#commit()}. The default implementation ignores ! * overflow events. A scale up or scale out implementation uses this event ! * as a trigger to evict data from btrees that absorb writes on the journal ! * into partitioned {@link IndexSegment}s. * * @see #DEFAULT_MAXIMUM_EXTENT *************** *** 82,87 **** * <code>true</code>). When true and the named file is not found, a new * journal will be created. - * - * @todo Write tests for this feature. */ public static final String CREATE = "create"; --- 127,130 ---- *************** *** 90,95 **** * <code>readOnly</code> - When true, the journal must pre-exist and * will be read-only (optional, default is <code>false</code>). - * - * @todo Write tests for this feature. */ public static final String READ_ONLY = "readOnly"; --- 133,136 ---- *************** *** 161,176 **** public static final String DOUBLE_SYNC = "doubleSync"; - // /** - // * <code>conflictResolver</code> - The name of a class that implements - // * the {@link IConflictResolver} interface (optional). When specified, - // * the class MUST define a public constructor with the signature - // * <code><i>class</i>( Journal journal )</code>. There is NO - // * default. Resolution of write-write conflicts is enabled iff a - // * conflict resolution class is declared with this parameter. If a value - // * is not provided, the a write-write conflict will result in the - // * rollback of a transaction. - // */ - // public static final String CONFLICT_RESOLVER = "conflictResolver"; - /** * <code>deleteOnClose</code> - This optional boolean option causes --- 202,205 ---- Index: Tx.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/Tx.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** Tx.java 15 Feb 2007 01:34:22 -0000 1.25 --- Tx.java 15 Feb 2007 14:23:49 -0000 1.26 *************** *** 82,85 **** --- 82,91 ---- * transactions are not visible within the transaction). * + * @todo The write set of a transaction is currently written onto an independent + * store. This means that concurrent transactions can actually execute + * concurrently. We do not even need a read-lock on the indices isolated + * by the transaction since they are read-only. This might prove to be a + * nice way to leverage multiple processors / cores on a data server. + * * @todo support {@link PartitionedIndex}es. * *************** *** 376,380 **** // Atomic commit. ! journal.commit(); runState = RunState.COMMITTED; --- 382,386 ---- // Atomic commit. ! journal.commit(this); runState = RunState.COMMITTED; Index: Journal.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/journal/Journal.java,v retrieving revision 1.49 retrieving revision 1.50 diff -C2 -d -r1.49 -r1.50 *** Journal.java 15 Feb 2007 01:34:22 -0000 1.49 --- Journal.java 15 Feb 2007 14:23:49 -0000 1.50 *************** *** 137,248 **** * FIXME Priority items are: * <ol> * <li> Segment server (mixture of journal server and read-optimized database * server).</li> - * <li> Persistence capable data structures for the object index (basically, a - * btree) and the allocation indices. The allocation index is less clear, but a - * BitSet will do until I settle on something better - one of the tricks no - * matter how you approach it is getting closure on the self-referential issue - * with slots and a slot allocation index; maybe the per-tx data structure is - * just transient will the committed data structure is persistent?</li> [...1121 lines suppressed...] + // /* + // * @todo Search backwards from the current {@link IRootBlockView}. + // */ + // throw new UnsupportedOperationException(); + // + // } + // + // /** + // * FIXME Write commit record, including: the transaction identifier, the + // * location of the object index and the slot allocation index, the + // location + // * of a run length encoded representation of slots allocated by this + // * transaction, and the identifier and location of the prior transaction + // * serialized on this journal. + // */ + // protected void writeCommitRecord(IStore tx) { + // + // } + } |
From: Bryan T. <tho...@us...> - 2007-02-15 14:23:54
|
Update of /cvsroot/cweb/bigdata/src/java/com/bigdata/scaleup In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv7688/src/java/com/bigdata/scaleup Modified Files: PartitionedJournal.java Name2MetadataAddr.java Log Message: Added support for filtering and resolving on EntryIterator and worked through most of the test suite for UnsisolatedBTree. Index: Name2MetadataAddr.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/scaleup/Name2MetadataAddr.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** Name2MetadataAddr.java 9 Feb 2007 16:13:18 -0000 1.1 --- Name2MetadataAddr.java 15 Feb 2007 14:23:50 -0000 1.2 *************** *** 49,53 **** import com.bigdata.journal.Name2Addr; - import com.bigdata.objndx.BTree; import com.bigdata.objndx.BTreeMetadata; import com.bigdata.objndx.IIndex; --- 49,52 ---- Index: PartitionedJournal.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/scaleup/PartitionedJournal.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** PartitionedJournal.java 15 Feb 2007 01:34:23 -0000 1.5 --- PartitionedJournal.java 15 Feb 2007 14:23:50 -0000 1.6 *************** *** 73,82 **** * </p> * <p> ! * An {@link PartitionedIndex} is dynamically decomposed into key-range ! * partitions. Each partition is defined by the first key that may be inserted ! * on, or read from, that parition. A total ordering over partitions and their ! * locations is maintained in a metadata index. An insert or read is directed to ! * the first partition having a minimum key greater than or equal to the probe ! * key. * </p> * <p> --- 73,83 ---- * </p> * <p> ! * A {@link PartitionedIndex} is an {@link IIndex} that is dynamically ! * decomposed into key-range partitions. Each partition is defined by a ! * <i>separator key</i>. The separator key is the first key that may be ! * inserted into, or read from, that partition. A total ordering over partitions ! * and their locations is maintained in a {@link MetadataIndex}. An insert or ! * read is directed to the first partition having a separator key greater than ! * or equal to the probe key. * </p> * <p> *************** *** 84,104 **** * that buffers writes for that partition and zero or more {@link IndexSegment}s * per partition. The relationship between keys and partitions is managed by a ! * {@link MetadataIndex}. * </p> * <p> ! * All writes bound for any partition of any index are absorbed on this ! * {@link Journal}. For each {@link PartitionedIndex}, there is a ! * corresponding {@link BTree} that absorbs writes (including deletes) on the ! * slave. On overflow, {@link Journal}, the backing store is frozen and a new ! * buffer and backing store are deployed to absorb further writes. During this ! * time, reads on the {@link PartitionedJournal} are served by a {@link FusedView} ! * of the data on the new slave and the data on the old slave. * </p> * <p> ! * While reads and writes proceed in the forground on the new buffer and backing ! * store, a background thread builds an {@link IndexSegment} from each modified ! * {@link BTree} corresponding to a {@link PartitionedIndex}. Once all indices ! * have been processed, the old buffer and backing store are released and their ! * use is atomic replaced by the use of the new index segments. * </p> * <p> --- 85,113 ---- * that buffers writes for that partition and zero or more {@link IndexSegment}s * per partition. The relationship between keys and partitions is managed by a ! * {@link MetadataIndex}. Partitions may be multiplexed onto the same ! * {@link Journal} in order to reduce the #of disk files that are being actively ! * written and maximize the use of sequential IO on a given IO channel. * </p> * <p> ! * All writes bound for any partition of any index are absorbed on the ! * {@link Journal} to which that partition has been assigned. For each ! * {@link PartitionedIndex}, there is a corresponding {@link BTree} that ! * absorbs writes (including deletes) on the {@link Journal}. The ! * {@link PartitionedJournal} actually delegates all storage services to a ! * {@link SlaveJournal}. When the {@link SlaveJournal} {@link #overflow()}s, ! * it is frozen and a new {@link SlaveJournal} is deployed to absorb further ! * writes. During this time, reads on the {@link PartitionedJournal} are served ! * by a {@link FusedView} of the data on the new {@link SlaveJournal} and the ! * data on the old {@link SlaveJournal}. * </p> * <p> ! * While reads and writes proceed in the foreground on the new ! * {@link SlaveJournal}, a background thread builds an {@link IndexSegment} ! * from each modified {@link BTree} corresponding to a {@link PartitionedIndex}. ! * Note that we would be free to delete the old {@link SlaveJournal} and old ! * {@link IndexSegment}s except that concurrent transactions may still need to ! * read from historical states which would be lost by the merge. However, ! * unisolated read and write operations and new transactions immediately begin ! * to use the new {@link IndexSegment}s. * </p> * <p> *************** *** 106,127 **** * compacting merge MAY be performed. Otherwise a new {@link IndexSegment} is * generated, possibly resulting in more than one {@link IndexSegment} for the ! * same partition. Once all data from the slave are stable in the appropriate ! * {@link IndexSegment}s, the old {@link Journal} is closed and either deleted ! * synchronously or marked for deletion. Each {@link IndexSegment} built in this ! * manner contains a historical snapshot of data written on {@link Journal} for ! * a given partition. If multiple {@link IndexSegment} are allowed to accumulate ! * per partition, then the {@link IndexSegment}s are combined periodically ! * using a compacting merge (responsible for retaining the most recent versions ! * for any key and processing delete markers). The inputs to this process are ! * then deleted (or marked for deletion). * </p> * <p> ! * A partition overflows and is split when the total data size of an index ! * partition (estimated as the sum of the {@link IndexSegment}s and the data on ! * the {@link Journal} for that partition) exceeds a threshold. Likewise, ! * partitions may underflow and be joined. These operations require updates to ! * the metadata index so that futher requests are directed based on the new ! * parition boundaries. * </p> * * @todo When you open an index segment the nodes are fully buffered, so that --- 115,177 ---- * compacting merge MAY be performed. Otherwise a new {@link IndexSegment} is * generated, possibly resulting in more than one {@link IndexSegment} for the ! * same partition. Once all data from the {@link SlaveJournal} are stable in the ! * appropriate {@link IndexSegment}s, the old {@link SlaveJournal} is no longer ! * required for unisolated reads or writes or for new transactions (however, it ! * MAY be required to serve existing transactions reading from historical states ! * that are no longer present in the merged index segments). Each ! * {@link IndexSegment} built in this manner contains a snapshot of surviving ! * data written on {@link Journal} for a given partition. If multiple ! * {@link IndexSegment} are allowed to accumulate per partition, then the ! * {@link IndexSegment}s are combined periodically using a compacting merge ! * (responsible for retaining the most recent versions for any key and ! * processing delete markers). The inputs to a compacting merge are then no ! * longer required by unisolated reads or writes but MAY be required to support ! * concurrent transactions reading from historical states not preserved by the ! * merge. * </p> * <p> ! * An index partition overflows and is split when the total data size of an ! * index partition (estimated as the sum of the {@link IndexSegment}s and the ! * data on the {@link Journal} for that partition) exceeds a threshold. ! * Likewise, index partitions may underflow and be joined. These operations ! * require updates to the {@link MetadataIndex} so that futher requests are ! * directed based on the new partition boundaries. * </p> + * <p> + * Either once the journal is frozen or periodically during its life we would + * evict an index to a perfect index segment on disk (write the leaves out in a + * linear sequence, complete with prior-next references, and build up a perfect + * index over those leaves - the branching factor here can be much higher in + * order to reduce the tree depth and optimize IO). The perfect index would have + * to have "delete" entries to mark deleted keys. Periodically we would merge + * evicted index segments. The goal is to balance write absorbtion and + * concurrency control within memory and using pure sequantial IO against 100:1 + * or better data on disk with random seeks for reading any given leaf. The + * index nodes would be written densely after the leaves such that it would be + * possible to fully buffer the index nodes with a single sequential IO. A key + * range scan would likewise be a sequential IO since we would know the start + * and end leaf for the key range directly from the index. A read would first + * resolve against the current journal, then the prior journal iff one is in the + * process of be paritioned out into individual perfect index segments, and then + * against those index segments in order. A "point" read would therefore be + * biased to be more efficient for "recently" written data, but reads otherwise + * must present a merged view of the data in each remaining historical perfect + * index that covers at least part of the key range. Periodically the index + * segments could be fully merged, at which point we would no longer have any + * delete entries in a given perfect index segment. + * </p> + * <p> + * At one extreme there will be one journal per disk and the journal will use + * the entire disk partition. In a pure write scenario the disk would perform + * only sequential IO. However, applications will also need to read data from + * disk. Read and write buffers need to be split. Write buffers are used to + * defer IOs until large sequential IOs may be realized. Read buffers are used + * for pre-fetching when the data on disk is much larger than the available RAM + * and the expectation of reuse is low while the expectation of completing a + * sequential scan is high. Direct buffering may be used for hot-spots but + * requires a means to bound the locality of the buffered segment, e.g., by not + * multiplexing an index segment that will be directly buffered and providing a + * sufficient resource abundence for high performance. + * <p> * * @todo When you open an index segment the nodes are fully buffered, so that *************** *** 144,147 **** --- 194,242 ---- * and a distributed protocol for access to the various index partitions. * + * @todo I've been considering the problem of a distributed index more. A + * mutiplexed journal holding segments for one or more clustered indices + * (containing anything from database rows to generic objects to terms to + * triples) would provide fast write absorption and good read rates + * without a secondary read-optimized segment. If this is combined with + * pipelined journals for per-index segment redundency (a given segment is + * replicated on N=3+ hosts) then we can have both failover and + * load-balancing for read-intensive segments. + * <p> + * Clustered indices support (re-)distribution and redundency since their + * rows are their essential data while the index simply provides efficient + * access. Therefore the same objects may be written onto multiple + * replications of an index range hosted by different journals. The object + * data will remain invariant, but the entailed index data will be + * different for each journal on which the object is written. + * <p> + * This requires a variety of metadata for each index segment. If a + * segment is defined by an index identifier and a separator key, then the + * metadata model needs to identify the master journal for a given segment + * (writes are directed to this journal) and the secondary journals for + * the segment (the master writes through to the secondary journals using + * a pipeline). Likewise, the journal must be able to identify the root + * for any given committed state of each index range written on that + * journal. + * <p> + * We need to track write load and read load per index range in the + * journal, per journal, per IO channel, per disk, per host, and per + * network segment. Write load can be reduced by splitting a segment, by + * using a host with faster resources, or by reducing other utilization of + * the host. The latter would include the case of preferring reads from a + * secondary journal for that index segment. It is an extremely cheap + * action to offload readers to a secondary service. Likewise, failover of + * the master for an index range is also inexpensive since the data are + * already in place on several secondaries, + * <p> + * A new replication of an index range may be built up gradually, e.g., by + * moving a leaf at a time and piplining only the sub-range of the index + * range that has been already mirrored. For simplicity, the new copy of + * the index range would not be visible in the index metadata until a full + * copy of the index range was life. Registration of the copy with the + * metadata index is then trivial. Until that time, the metadata index + * would only know that a copy was being developed. If one of the existing + * replicas is not heavily loaded then it can handle the creation of the + * new copy. + * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ |
From: Bryan T. <tho...@us...> - 2007-02-15 14:23:53
|
Update of /cvsroot/cweb/bigdata/src/java/com/bigdata/isolation In directory sc8-pr-cvs4.sourceforge.net:/tmp/cvs-serv7688/src/java/com/bigdata/isolation Modified Files: UnisolatedBTree.java Log Message: Added support for filtering and resolving on EntryIterator and worked through most of the test suite for UnsisolatedBTree. Index: UnisolatedBTree.java =================================================================== RCS file: /cvsroot/cweb/bigdata/src/java/com/bigdata/isolation/UnisolatedBTree.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** UnisolatedBTree.java 15 Feb 2007 01:34:22 -0000 1.2 --- UnisolatedBTree.java 15 Feb 2007 14:23:50 -0000 1.3 *************** *** 57,60 **** --- 57,61 ---- import com.bigdata.objndx.IEntryIterator; import com.bigdata.objndx.ISimpleBTree; + import com.bigdata.objndx.EntryIterator.EntryFilter; import com.bigdata.rawstore.IRawStore; *************** *** 339,352 **** } ! @Override public IEntryIterator rangeIterator(byte[] fromKey, byte[] toKey) { ! // TODO Auto-generated method stub ! return super.rangeIterator(fromKey, toKey); } - @Override public IEntryIterator entryIterator() { ! // TODO Auto-generated method stub ! return super.entryIterator(); } --- 340,385 ---- } ! /** ! * Visits only the non-deleted entries in the key range. ! */ public IEntryIterator rangeIterator(byte[] fromKey, byte[] toKey) { ! ! return root.rangeIterator(fromKey, toKey, DeletedEntryFilter.INSTANCE); ! } public IEntryIterator entryIterator() { ! ! return root.rangeIterator(null, null, DeletedEntryFilter.INSTANCE); ! ! } ! ! /** ! * A filter that hides deleted entries. ! * ! * @author <a href="mailto:tho...@us...">Bryan Thompson</a> ! * @version $Id$ ! */ ! public static class DeletedEntryFilter extends EntryFilter { ! ! static public final transient EntryFilter INSTANCE = new DeletedEntryFilter(); ! ! private static final long serialVersionUID = 2783761261078831116L; ! ! public boolean isValid(Object value) { ! ! return ! ((Value)value).deleted; ! ! } ! ! /** ! * Resolve the {@link Value} to its {@link Value#datum}. ! */ ! public Object resolve(Object value) { ! ! return ((Value)value).datum; ! ! } ! } |