From: Eric F. <er...@us...> - 2001-12-27 19:20:29
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent/io In directory usw-pr-cvs1:/tmp/cvs-serv11903/src/java/opennlp/maxent/io Modified Files: GISModelReader.java GISModelWriter.java OldFormatGISModelReader.java Log Message: This is the merge of the no_colt branch -> head. The following notes are copied from the head of the CHANGES file. Removed Colt dependency in favor of GNU Trove. (Eric) Refactored index() method in DataIndexer so that only one pass over the list of events is needed. This saves time (of course) and also space, since it's no longer necessary to allocate temporary data structures to share data between two loops. (Eric) Refactored sorting/merging algorithm for ComparableEvents so that merging can be done in place. This makes it possible to merge without copying duplicate events into sublists and so improves the indexer's ability to work on large data sets with a reasonable amount of memory. There is still more to be done in this department, however. (Eric) Index: GISModelReader.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/io/GISModelReader.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** GISModelReader.java 2001/11/15 15:42:14 1.3 --- GISModelReader.java 2001/12/27 19:20:26 1.4 *************** *** 18,23 **** package opennlp.maxent.io; import opennlp.maxent.*; - import cern.colt.map.*; import java.util.StringTokenizer; --- 18,23 ---- package opennlp.maxent.io; + import gnu.trove.*; import opennlp.maxent.*; import java.util.StringTokenizer; *************** *** 79,83 **** int[][] outcomePatterns = getOutcomePatterns(); String[] predLabels = getPredicates(); ! OpenIntDoubleHashMap[] params = getParameters(outcomePatterns); return new GISModel(params, --- 79,83 ---- int[][] outcomePatterns = getOutcomePatterns(); String[] predLabels = getPredicates(); ! TIntDoubleHashMap[] params = getParameters(outcomePatterns); return new GISModel(params, *************** *** 134,151 **** } ! protected OpenIntDoubleHashMap[] getParameters (int[][] outcomePatterns) throws java.io.IOException { ! OpenIntDoubleHashMap[] params = new OpenIntDoubleHashMap[NUM_PREDS]; int pid=0; for (int i=0; i<outcomePatterns.length; i++) { for (int j=0; j<outcomePatterns[i][0]; j++) { ! params[pid] = new OpenIntDoubleHashMap(); for (int k=1; k<outcomePatterns[i].length; k++) { double d = readDouble(); params[pid].put(outcomePatterns[i][k], d); } ! params[pid].trimToSize(); pid++; } --- 134,151 ---- } ! protected TIntDoubleHashMap[] getParameters (int[][] outcomePatterns) throws java.io.IOException { ! TIntDoubleHashMap[] params = new TIntDoubleHashMap[NUM_PREDS]; int pid=0; for (int i=0; i<outcomePatterns.length; i++) { for (int j=0; j<outcomePatterns[i][0]; j++) { ! params[pid] = new TIntDoubleHashMap(); for (int k=1; k<outcomePatterns[i].length; k++) { double d = readDouble(); params[pid].put(outcomePatterns[i][k], d); } ! params[pid].compact(); pid++; } Index: GISModelWriter.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/io/GISModelWriter.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** GISModelWriter.java 2001/11/15 16:18:40 1.3 --- GISModelWriter.java 2001/12/27 19:20:26 1.4 *************** *** 20,25 **** import opennlp.maxent.*; import gnu.trove.*; - import cern.colt.list.*; - import cern.colt.map.*; import java.io.*; import java.util.*; --- 20,23 ---- *************** *** 34,38 **** */ public abstract class GISModelWriter { ! protected OpenIntDoubleHashMap[] PARAMS; protected String[] OUTCOME_LABELS; protected int CORRECTION_CONSTANT; --- 32,36 ---- */ public abstract class GISModelWriter { ! protected TIntDoubleHashMap[] PARAMS; protected String[] OUTCOME_LABELS; protected int CORRECTION_CONSTANT; *************** *** 44,48 **** Object[] data = model.getDataStructures(); ! PARAMS = (OpenIntDoubleHashMap[])data[0]; TObjectIntHashMap pmap = (TObjectIntHashMap)data[1]; OUTCOME_LABELS = (String[])data[2]; --- 42,46 ---- Object[] data = model.getDataStructures(); ! PARAMS = (TIntDoubleHashMap[])data[0]; TObjectIntHashMap pmap = (TObjectIntHashMap)data[1]; OUTCOME_LABELS = (String[])data[2]; *************** *** 121,153 **** protected ComparablePredicate[] sortValues () { ! ComparablePredicate[] sortPreds = ! new ComparablePredicate[PARAMS.length]; ! int numParams = 0; ! for (int pid=0; pid<PARAMS.length; pid++) { ! IntArrayList predkeys = PARAMS[pid].keys(); ! predkeys.sort(); ! int numActive = predkeys.size(); ! numParams += numActive; ! int[] activeOCs = new int[numActive]; ! double[] activeParams = new double[numActive]; ! int id = 0; ! for (int i=0; i<predkeys.size(); i++) { ! int oid = predkeys.get(i); ! activeOCs[id] = oid; ! activeParams[id] = PARAMS[pid].get(oid); ! id++; ! } ! sortPreds[pid] = new ComparablePredicate(PRED_LABELS[pid], ! activeOCs, ! activeParams); ! } ! Arrays.sort(sortPreds); ! return sortPreds; } --- 119,151 ---- protected ComparablePredicate[] sortValues () { ! ComparablePredicate[] sortPreds = ! new ComparablePredicate[PARAMS.length]; ! int numParams = 0; ! for (int pid=0; pid<PARAMS.length; pid++) { ! int[] predkeys = PARAMS[pid].keys(); ! Arrays.sort(predkeys); ! int numActive = predkeys.length; ! numParams += numActive; ! int[] activeOCs = new int[numActive]; ! double[] activeParams = new double[numActive]; ! int id = 0; ! for (int i=0; i < predkeys.length; i++) { ! int oid = predkeys[i]; ! activeOCs[id] = oid; ! activeParams[id] = PARAMS[pid].get(oid); ! id++; ! } ! sortPreds[pid] = new ComparablePredicate(PRED_LABELS[pid], ! activeOCs, ! activeParams); ! } ! Arrays.sort(sortPreds); ! return sortPreds; } Index: OldFormatGISModelReader.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/io/OldFormatGISModelReader.java,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -C2 -d -r1.1.1.1 -r1.2 *** OldFormatGISModelReader.java 2001/10/23 14:06:53 1.1.1.1 --- OldFormatGISModelReader.java 2001/12/27 19:20:26 1.2 *************** *** 18,22 **** package opennlp.maxent.io; ! import cern.colt.map.*; import java.io.*; import java.util.zip.*; --- 18,22 ---- package opennlp.maxent.io; ! import gnu.trove.*; import java.io.*; import java.util.zip.*; *************** *** 45,66 **** } ! protected OpenIntDoubleHashMap[] getParameters (int[][] outcomePatterns) ! throws java.io.IOException { ! OpenIntDoubleHashMap[] params = new OpenIntDoubleHashMap[NUM_PREDS]; ! int pid=0; ! for (int i=0; i<outcomePatterns.length; i++) { ! for (int j=0; j<outcomePatterns[i][0]; j++) { ! params[pid] = new OpenIntDoubleHashMap(); ! for (int k=1; k<outcomePatterns[i].length; k++) { ! double d = paramsInput.readDouble(); ! params[pid].put(outcomePatterns[i][k], d); ! } ! params[pid].trimToSize(); ! pid++; ! } ! } ! return params; } --- 45,66 ---- } ! protected TIntDoubleHashMap[] getParameters (int[][] outcomePatterns) ! throws java.io.IOException { ! TIntDoubleHashMap[] params = new TIntDoubleHashMap[NUM_PREDS]; ! int pid=0; ! for (int i=0; i<outcomePatterns.length; i++) { ! for (int j=0; j<outcomePatterns[i][0]; j++) { ! params[pid] = new TIntDoubleHashMap(); ! for (int k=1; k<outcomePatterns[i].length; k++) { ! double d = paramsInput.readDouble(); ! params[pid].put(outcomePatterns[i][k], d); ! } ! params[pid].compact(); ! pid++; ! } ! } ! return params; } |