From: Eric F. <er...@us...> - 2001-12-27 19:20:29
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory usw-pr-cvs1:/tmp/cvs-serv11903/src/java/opennlp/maxent Modified Files: ComparableEvent.java DataIndexer.java GISModel.java GISTrainer.java Log Message: This is the merge of the no_colt branch -> head. The following notes are copied from the head of the CHANGES file. Removed Colt dependency in favor of GNU Trove. (Eric) Refactored index() method in DataIndexer so that only one pass over the list of events is needed. This saves time (of course) and also space, since it's no longer necessary to allocate temporary data structures to share data between two loops. (Eric) Refactored sorting/merging algorithm for ComparableEvents so that merging can be done in place. This makes it possible to merge without copying duplicate events into sublists and so improves the indexer's ability to work on large data sets with a reasonable amount of memory. There is still more to be done in this department, however. (Eric) Index: ComparableEvent.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/ComparableEvent.java,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -C2 -d -r1.1.1.1 -r1.2 *** ComparableEvent.java 2001/10/23 14:06:53 1.1.1.1 --- ComparableEvent.java 2001/12/27 19:20:26 1.2 *************** *** 30,67 **** public int outcome; public int[] predIndexes; public ComparableEvent(int oc, int[] pids) { ! outcome = oc; ! Arrays.sort(pids); ! predIndexes = pids; } public int compareTo(Object o) { ! ComparableEvent ce = (ComparableEvent)o; ! if (outcome < ce.outcome) return -1; ! else if (outcome > ce.outcome) return 1; ! int smallerLength = (predIndexes.length > ce.predIndexes.length? ! ce.predIndexes.length : predIndexes.length); ! for (int i=0; i<smallerLength; i++) { ! if (predIndexes[i] < ce.predIndexes[i]) return -1; ! else if (predIndexes[i] > ce.predIndexes[i]) return 1; ! } ! if (predIndexes.length < ce.predIndexes.length) return -1; ! else if (predIndexes.length > ce.predIndexes.length) return 1; ! return 0; } public String toString() { ! String s = ""; ! for (int i=0; i<predIndexes.length; i++) s+= " "+predIndexes[i]; ! return s; } - } --- 30,68 ---- public int outcome; public int[] predIndexes; + public int seen = 1; // the number of times this event + // has been seen. public ComparableEvent(int oc, int[] pids) { ! outcome = oc; ! Arrays.sort(pids); ! predIndexes = pids; } public int compareTo(Object o) { ! ComparableEvent ce = (ComparableEvent)o; ! if (outcome < ce.outcome) return -1; ! else if (outcome > ce.outcome) return 1; ! int smallerLength = (predIndexes.length > ce.predIndexes.length? ! ce.predIndexes.length : predIndexes.length); ! for (int i=0; i<smallerLength; i++) { ! if (predIndexes[i] < ce.predIndexes[i]) return -1; ! else if (predIndexes[i] > ce.predIndexes[i]) return 1; ! } ! if (predIndexes.length < ce.predIndexes.length) return -1; ! else if (predIndexes.length > ce.predIndexes.length) return 1; ! return 0; } public String toString() { ! String s = ""; ! for (int i=0; i<predIndexes.length; i++) s+= " "+predIndexes[i]; ! return s; } } Index: DataIndexer.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/DataIndexer.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** DataIndexer.java 2001/11/15 18:08:20 1.4 --- DataIndexer.java 2001/12/27 19:20:26 1.5 *************** *** 83,106 **** System.out.print("Sorting and merging events... "); Arrays.sort(eventsToCompare); ComparableEvent ce = eventsToCompare[0]; ! List uniqueEvents = new ArrayList(); ! List newGroup = new ArrayList(); ! int numEvents = eventsToCompare.length; ! for (int i=0; i<numEvents; i++) { if (ce.compareTo(eventsToCompare[i]) == 0) { ! newGroup.add(eventsToCompare[i]); ! } else { ! ce = eventsToCompare[i]; ! uniqueEvents.add(newGroup); ! newGroup = new ArrayList(); ! newGroup.add(eventsToCompare[i]); } } - uniqueEvents.add(newGroup); - int numUniqueEvents = uniqueEvents.size(); - System.out.println("done. Reduced " + eventsToCompare.length + " events to " + numUniqueEvents + "."); --- 83,118 ---- System.out.print("Sorting and merging events... "); + sortAndMerge(eventsToCompare); + System.out.println("Done indexing."); + } + + /** + * Sorts and uniques the array of comparable events. This method + * will alter the eventsToCompare array -- it does an in place + * sort, followed by an in place edit to remove duplicates. + * + * @param eventsToCompare a <code>ComparableEvent[]</code> value + * @since maxent 1.2.6 + */ + private void sortAndMerge(ComparableEvent[] eventsToCompare) { Arrays.sort(eventsToCompare); + int numEvents = eventsToCompare.length; + int numUniqueEvents = 1; // assertion: eventsToCompare.length >= 1 + + if (eventsToCompare.length <= 1) { + return; // nothing to do; edge case (see assertion) + } ComparableEvent ce = eventsToCompare[0]; ! for (int i=1; i<numEvents; i++) { if (ce.compareTo(eventsToCompare[i]) == 0) { ! ce.seen++; // increment the seen count ! eventsToCompare[i] = null; // kill the duplicate ! } else { ! ce = eventsToCompare[i]; // a new champion emerges... ! numUniqueEvents++; // increment the # of unique events } } System.out.println("done. Reduced " + eventsToCompare.length + " events to " + numUniqueEvents + "."); *************** *** 110,122 **** numTimesEventsSeen = new int[numUniqueEvents]; ! for (int i=0; i<numUniqueEvents; i++) { ! List group = (List)uniqueEvents.get(i); ! numTimesEventsSeen[i] = group.size(); ! ComparableEvent nextCE = (ComparableEvent)group.get(0); ! outcomeList[i] = nextCE.outcome; ! contexts[i] = nextCE.predIndexes; } - - System.out.println("Done indexing."); } --- 122,135 ---- numTimesEventsSeen = new int[numUniqueEvents]; ! for (int i = 0, j = 0; i<numEvents; i++) { ! ComparableEvent evt = eventsToCompare[i]; ! if (null == evt) { ! continue; // this was a dupe, skip over it. ! } ! numTimesEventsSeen[j] = evt.seen; ! outcomeList[j] = evt.outcome; ! contexts[j] = evt.predIndexes; ! ++j; } } *************** *** 161,167 **** int outcomeCount = 0; int predCount = 0; ! int[] uncompressedOutcomeList = new int[numEvents]; ! List uncompressedContexts = new ArrayList(); ! for (int eventIndex=0; eventIndex<numEvents; eventIndex++) { Event ev = (Event)events.removeFirst(); --- 174,179 ---- int outcomeCount = 0; int predCount = 0; ! ComparableEvent[] eventsToCompare = new ComparableEvent[numEvents]; ! for (int eventIndex=0; eventIndex<numEvents; eventIndex++) { Event ev = (Event)events.removeFirst(); *************** *** 191,225 **** } } ! uncompressedContexts.add(indexedContext); ! uncompressedOutcomeList[eventIndex] = ocID.intValue(); ! } ! outcomeLabels = new String[omap.size()]; ! for (Iterator i=omap.keySet().iterator(); i.hasNext();) { ! String oc = (String)i.next(); ! outcomeLabels[((Integer)omap.get(oc)).intValue()] = oc; ! } ! omap = null; ! ! predLabels = new String[pmap.size()]; ! for (Iterator i = pmap.keySet().iterator(); i.hasNext();) { ! String n = (String)i.next(); ! predLabels[((Integer)pmap.get(n)).intValue()] = n; } ! pmap = null; ! ! ComparableEvent[] eventsToCompare = new ComparableEvent[numEvents]; ! for (int i=0; i<numEvents; i++) { ! List ecLL = (List)uncompressedContexts.get(i); ! int[] ecInts = new int[ecLL.size()]; ! for (int j=0; j<ecInts.length; j++) { ! ecInts[j] = ((Integer)ecLL.get(j)).intValue(); ! } ! eventsToCompare[i] = ! new ComparableEvent(uncompressedOutcomeList[i], ecInts); } ! return eventsToCompare; } - } --- 203,250 ---- } } ! eventsToCompare[eventIndex] = ! new ComparableEvent(ocID.intValue(), ! toIntArray(indexedContext)); } ! outcomeLabels = toIndexedStringArray(omap); ! predLabels = toIndexedStringArray(pmap); ! return eventsToCompare; ! } ! /** ! * Utility method for creating a String[] array from a map whose ! * keys are labels (Strings) to be stored in the array and whose ! * values are the indices (Integers) at which the corresponding ! * labels should be inserted. ! * ! * @param labelToIndexMap a <code>Map</code> value ! * @return a <code>String[]</code> value ! * @since maxent 1.2.6 ! */ ! static String[] toIndexedStringArray(Map labelToIndexMap) { ! String[] array = new String[labelToIndexMap.size()]; ! for (Iterator i = labelToIndexMap.keySet().iterator(); i.hasNext();) { ! String label = (String)i.next(); ! int index = ((Integer)labelToIndexMap.get(label)).intValue(); ! array[index] = label; } + return array; + } ! /** ! * Utility method for turning a list of Integer objects into a ! * native array of primitive ints. ! * ! * @param integers a <code>List</code> value ! * @return an <code>int[]</code> value ! * @since maxent 1.2.6 ! */ ! static final int[] toIntArray(List integers) { ! int[] rv = new int[integers.size()]; ! int i = 0; ! for (Iterator it = integers.iterator(); it.hasNext();) { ! rv[i++] = ((Integer)it.next()).intValue(); ! } ! return rv; } } Index: GISModel.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/GISModel.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** GISModel.java 2001/11/30 14:33:28 1.5 --- GISModel.java 2001/12/27 19:20:26 1.6 *************** *** 19,24 **** import gnu.trove.*; - import cern.colt.list.*; - import cern.colt.map.*; import java.util.*; --- 19,22 ---- *************** *** 31,35 **** */ public final class GISModel implements MaxentModel { ! private final OpenIntDoubleHashMap[] params; private final TObjectIntHashMap pmap; private final String[] ocNames; --- 29,33 ---- */ public final class GISModel implements MaxentModel { ! private final TIntDoubleHashMap[] params; private final TObjectIntHashMap pmap; private final String[] ocNames; *************** *** 41,62 **** private final double fval; ! public GISModel (OpenIntDoubleHashMap[] _params, ! String[] predLabels, ! String[] _ocNames, ! int _correctionConstant, ! double _correctionParam) { ! pmap = new TObjectIntHashMap(predLabels.length); ! for (int i=0; i<predLabels.length; i++) ! pmap.put(predLabels[i], i); ! params = _params; ! ocNames = _ocNames; ! correctionConstant = _correctionConstant; ! correctionParam = _correctionParam; ! numOutcomes = ocNames.length; ! iprob = Math.log(1.0/numOutcomes); ! fval = 1.0/correctionConstant; } --- 39,60 ---- private final double fval; ! public GISModel (TIntDoubleHashMap[] _params, ! String[] predLabels, ! String[] _ocNames, ! int _correctionConstant, ! double _correctionParam) { ! pmap = new TObjectIntHashMap(predLabels.length); ! for (int i=0; i<predLabels.length; i++) ! pmap.put(predLabels[i], i); ! params = _params; ! ocNames = _ocNames; ! correctionConstant = _correctionConstant; ! correctionParam = _correctionParam; ! numOutcomes = ocNames.length; ! iprob = Math.log(1.0/numOutcomes); ! fval = 1.0/correctionConstant; } *************** *** 77,115 **** */ public final double[] eval(String[] context) { ! double[] outsums = new double[numOutcomes]; ! int[] numfeats = new int[numOutcomes]; ! for (int oid=0; oid<numOutcomes; oid++) { ! outsums[oid] = iprob; ! numfeats[oid] = 0; ! } ! IntArrayList activeOutcomes = new IntArrayList(0); ! for (int i=0; i<context.length; i++) { ! if (pmap.containsKey(context[i])) { ! OpenIntDoubleHashMap predParams = ! params[pmap.get(context[i])]; ! predParams.keys(activeOutcomes); ! for (int j=0; j<activeOutcomes.size(); j++) { ! int oid = activeOutcomes.getQuick(j); ! numfeats[oid]++; ! outsums[oid] += fval * predParams.get(oid); ! } ! } ! } ! double normal = 0.0; ! for (int oid=0; oid<numOutcomes; oid++) { ! outsums[oid] = Math.exp(outsums[oid] ! + ((1.0 - ! (numfeats[oid]/correctionConstant)) ! * correctionParam)); ! normal += outsums[oid]; ! } ! for (int oid=0; oid<numOutcomes; oid++) ! outsums[oid] /= normal; ! return outsums; } --- 75,113 ---- */ public final double[] eval(String[] context) { ! double[] outsums = new double[numOutcomes]; ! int[] numfeats = new int[numOutcomes]; ! for (int oid=0; oid<numOutcomes; oid++) { ! outsums[oid] = iprob; ! numfeats[oid] = 0; ! } ! int[] activeOutcomes; ! for (int i=0; i<context.length; i++) { ! if (pmap.containsKey(context[i])) { ! TIntDoubleHashMap predParams = ! params[pmap.get(context[i])]; ! activeOutcomes = predParams.keys(); ! for (int j=0; j<activeOutcomes.length; j++) { ! int oid = activeOutcomes[j]; ! numfeats[oid]++; ! outsums[oid] += fval * predParams.get(oid); ! } ! } ! } ! double normal = 0.0; ! for (int oid=0; oid<numOutcomes; oid++) { ! outsums[oid] = Math.exp(outsums[oid] ! + ((1.0 - ! (numfeats[oid]/correctionConstant)) ! * correctionParam)); ! normal += outsums[oid]; ! } ! for (int oid=0; oid<numOutcomes; oid++) ! outsums[oid] /= normal; ! return outsums; } *************** *** 124,131 **** */ public final String getBestOutcome(double[] ocs) { ! int best = 0; ! for (int i = 1; i<ocs.length; i++) ! if (ocs[i] > ocs[best]) best = i; ! return ocNames[best]; } --- 122,129 ---- */ public final String getBestOutcome(double[] ocs) { ! int best = 0; ! for (int i = 1; i<ocs.length; i++) ! if (ocs[i] > ocs[best]) best = i; ! return ocNames[best]; } *************** *** 144,164 **** */ public final String getAllOutcomes (double[] ocs) { ! if (ocs.length != ocNames.length) { ! return "The double array sent as a parameter to GISModel.getAllOutcomes() must not have been produced by this model."; ! } ! else { ! StringBuffer sb = new StringBuffer(ocs.length*2); ! String d = Double.toString(ocs[0]); ! if (d.length() > 6) ! d = d.substring(0,7); ! sb.append(ocNames[0]).append("[").append(d).append("]"); ! for (int i = 1; i<ocs.length; i++) { ! d = Double.toString(ocs[i]); ! if (d.length() > 6) ! d = d.substring(0,7); ! sb.append(" ").append(ocNames[i]).append("[").append(d).append("]"); ! } ! return sb.toString(); ! } } --- 142,162 ---- */ public final String getAllOutcomes (double[] ocs) { ! if (ocs.length != ocNames.length) { ! return "The double array sent as a parameter to GISModel.getAllOutcomes() must not have been produced by this model."; ! } ! else { ! StringBuffer sb = new StringBuffer(ocs.length*2); ! String d = Double.toString(ocs[0]); ! if (d.length() > 6) ! d = d.substring(0,7); ! sb.append(ocNames[0]).append("[").append(d).append("]"); ! for (int i = 1; i<ocs.length; i++) { ! d = Double.toString(ocs[i]); ! if (d.length() > 6) ! d = d.substring(0,7); ! sb.append(" ").append(ocNames[i]).append("[").append(d).append("]"); ! } ! return sb.toString(); ! } } *************** *** 171,175 **** */ public final String getOutcome(int i) { ! return ocNames[i]; } --- 169,173 ---- */ public final String getOutcome(int i) { ! return ocNames[i]; } *************** *** 183,191 **** **/ public int getIndex (String outcome) { ! for (int i=0; i<ocNames.length; i++) { ! if (ocNames[i].equals(outcome)) ! return i; ! } ! return -1; } --- 181,189 ---- **/ public int getIndex (String outcome) { ! for (int i=0; i<ocNames.length; i++) { ! if (ocNames[i].equals(outcome)) ! return i; ! } ! return -1; } *************** *** 197,201 **** * which is returned by this method: * ! * <li>index 0: cern.colt.map.OpenIntDoubleHashMap[] containing the model * parameters * <li>index 1: java.util.Map containing the mapping of model predicates --- 195,199 ---- * which is returned by this method: * ! * <li>index 0: gnu.trove.TIntDoubleHashMap[] containing the model * parameters * <li>index 1: java.util.Map containing the mapping of model predicates *************** *** 212,225 **** */ public final Object[] getDataStructures () { ! Object[] data = new Object[5]; ! data[0] = params; ! data[1] = pmap; ! data[2] = ocNames; ! data[3] = new Integer(correctionConstant); ! data[4] = new Double(correctionParam); ! return data; } - - - } --- 210,220 ---- */ public final Object[] getDataStructures () { ! Object[] data = new Object[5]; ! data[0] = params; ! data[1] = pmap; ! data[2] = ocNames; ! data[3] = new Integer(correctionConstant); ! data[4] = new Double(correctionParam); ! return data; } } Index: GISTrainer.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/GISTrainer.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** GISTrainer.java 2001/11/16 10:37:43 1.2 --- GISTrainer.java 2001/12/27 19:20:26 1.3 *************** *** 18,24 **** package opennlp.maxent; ! import cern.colt.function.*; ! import cern.colt.list.*; ! import cern.colt.map.*; import java.io.*; --- 18,22 ---- package opennlp.maxent; ! import gnu.trove.*; import java.io.*; *************** *** 82,95 **** // stores the observed expections of each of the events ! private OpenIntDoubleHashMap[] observedExpects; // stores the estimated parameter value of each predicate during iteration ! private OpenIntDoubleHashMap[] params; // stores the modifiers of the parameter values, paired to params ! private OpenIntDoubleHashMap[] modifiers; // a helper object for storing predicate indexes ! private IntArrayList predkeys; // a boolean to track if all events have same number of active features --- 80,93 ---- // stores the observed expections of each of the events ! private TIntDoubleHashMap[] observedExpects; // stores the estimated parameter value of each predicate during iteration ! private TIntDoubleHashMap[] params; // stores the modifiers of the parameter values, paired to params ! private TIntDoubleHashMap[] modifiers; // a helper object for storing predicate indexes ! private int[] predkeys; // a boolean to track if all events have same number of active features *************** *** 109,137 **** // stores the value of corrections feature for each event's predicate list, // expanded to include all outcomes which might come from those predicates. ! private OpenIntIntHashMap[] cfvals; // Normalized Probabilities Of Outcomes Given Context: p(a|b_i) // Stores the computation of each iterations for the update to the // modifiers (and therefore the params) ! private OpenIntDoubleHashMap[] pabi; ! // make all values in an OpenIntDoubleHashMap return to 0.0 ! private DoubleFunction backToZeros = ! new DoubleFunction() { ! public double apply(double arg) { return 0.0; } }; ! // divide all values in the OpenIntDoubleHashMap pabi[TID] by the sum of // all values in the map. ! private DoubleFunction normalizePABI = ! new DoubleFunction() { ! public double apply(double arg) { return arg / PABISUM; } }; // add the previous iteration's parameters to the computation of the // modifiers of this iteration. ! private IntDoubleProcedure addParamsToPABI = ! new IntDoubleProcedure() { ! public boolean apply(int oid, double arg) { pabi[TID].put(oid, pabi[TID].get(oid) + arg); return true; --- 107,135 ---- // stores the value of corrections feature for each event's predicate list, // expanded to include all outcomes which might come from those predicates. ! private TIntIntHashMap[] cfvals; // Normalized Probabilities Of Outcomes Given Context: p(a|b_i) // Stores the computation of each iterations for the update to the // modifiers (and therefore the params) ! private TIntDoubleHashMap[] pabi; ! // make all values in an TIntDoubleHashMap return to 0.0 ! private TDoubleFunction backToZeros = ! new TDoubleFunction() { ! public double execute(double arg) { return 0.0; } }; ! // divide all values in the TIntDoubleHashMap pabi[TID] by the sum of // all values in the map. ! private TDoubleFunction normalizePABI = ! new TDoubleFunction() { ! public double execute(double arg) { return arg / PABISUM; } }; // add the previous iteration's parameters to the computation of the // modifiers of this iteration. ! private TIntDoubleProcedure addParamsToPABI = ! new TIntDoubleProcedure() { ! public boolean execute(int oid, double arg) { pabi[TID].put(oid, pabi[TID].get(oid) + arg); return true; *************** *** 140,146 **** // add the correction parameter and exponentiate it ! private IntDoubleProcedure addCorrectionToPABIandExponentiate = ! new IntDoubleProcedure() { ! public boolean apply(int oid, double arg) { if (needCorrection) arg = arg + (correctionParam * cfvals[TID].get(oid)); --- 138,144 ---- // add the correction parameter and exponentiate it ! private TIntDoubleProcedure addCorrectionToPABIandExponentiate = ! new TIntDoubleProcedure() { ! public boolean execute(int oid, double arg) { if (needCorrection) arg = arg + (correctionParam * cfvals[TID].get(oid)); *************** *** 153,159 **** // update the modifiers based on the new pabi values ! private IntDoubleProcedure updateModifiers = ! new IntDoubleProcedure() { ! public boolean apply(int oid, double arg) { modifiers[PID].put(oid, arg --- 151,157 ---- // update the modifiers based on the new pabi values ! private TIntDoubleProcedure updateModifiers = ! new TIntDoubleProcedure() { ! public boolean execute(int oid, double arg) { modifiers[PID].put(oid, arg *************** *** 165,171 **** // update the params based on the newly computed modifiers ! private IntDoubleProcedure updateParams = ! new IntDoubleProcedure() { ! public boolean apply(int oid, double arg) { params[PID].put(oid, arg --- 163,169 ---- // update the params based on the newly computed modifiers ! private TIntDoubleProcedure updateParams = ! new TIntDoubleProcedure() { ! public boolean execute(int oid, double arg) { params[PID].put(oid, arg *************** *** 179,185 **** // update the correction feature modifier, which will then be used to // updated the correction parameter ! private IntDoubleProcedure updateCorrectionFeatureModifier = ! new IntDoubleProcedure() { ! public boolean apply(int oid, double arg) { CFMOD += arg * cfvals[TID].get(oid) * numTimesEventsSeen[TID]; return true; --- 177,183 ---- // update the correction feature modifier, which will then be used to // updated the correction parameter ! private TIntDoubleProcedure updateCorrectionFeatureModifier = ! new TIntDoubleProcedure() { ! public boolean execute(int oid, double arg) { CFMOD += arg * cfvals[TID].get(oid) * numTimesEventsSeen[TID]; return true; *************** *** 304,315 **** // implementation, this is cancelled out when we compute the next // iteration of a parameter, making the extra divisions wasteful. ! params = new OpenIntDoubleHashMap[numPreds]; ! modifiers = new OpenIntDoubleHashMap[numPreds]; ! observedExpects = new OpenIntDoubleHashMap[numPreds]; for (PID=0; PID<numPreds; PID++) { ! params[PID] = new OpenIntDoubleHashMap(); ! modifiers[PID] = new OpenIntDoubleHashMap(); ! observedExpects[PID] = new OpenIntDoubleHashMap(); for (OID=0; OID<numOutcomes; OID++) { if (predCount[PID][OID] > 0) { --- 302,313 ---- // implementation, this is cancelled out when we compute the next // iteration of a parameter, making the extra divisions wasteful. ! params = new TIntDoubleHashMap[numPreds]; ! modifiers = new TIntDoubleHashMap[numPreds]; ! observedExpects = new TIntDoubleHashMap[numPreds]; for (PID=0; PID<numPreds; PID++) { ! params[PID] = new TIntDoubleHashMap(); ! modifiers[PID] = new TIntDoubleHashMap(); ! observedExpects[PID] = new TIntDoubleHashMap(); for (OID=0; OID<numOutcomes; OID++) { if (predCount[PID][OID] > 0) { *************** *** 324,330 **** } } ! params[PID].trimToSize(); ! modifiers[PID].trimToSize(); ! observedExpects[PID].trimToSize(); } --- 322,328 ---- } } ! params[PID].compact(); ! modifiers[PID].compact(); ! observedExpects[PID].compact(); } *************** *** 333,337 **** display("...done.\n"); ! pabi = new OpenIntDoubleHashMap[numTokens]; if (needCorrection) { --- 331,335 ---- display("...done.\n"); ! pabi = new TIntDoubleHashMap[numTokens]; if (needCorrection) { *************** *** 339,351 **** display("Computing correction feature matrix... "); ! cfvals = new OpenIntIntHashMap[numTokens]; for (TID=0; TID<numTokens; TID++) { ! cfvals[TID] = new OpenIntIntHashMap(); ! pabi[TID] = new OpenIntDoubleHashMap(); for (int j=0; j<contexts[TID].length; j++) { PID = contexts[TID][j]; predkeys = params[PID].keys(); ! for (int i=0; i<predkeys.size(); i++) { ! OID = predkeys.get(i); if (cfvals[TID].containsKey(OID)) { cfvals[TID].put(OID, cfvals[TID].get(OID) + 1); --- 337,349 ---- display("Computing correction feature matrix... "); ! cfvals = new TIntIntHashMap[numTokens]; for (TID=0; TID<numTokens; TID++) { ! cfvals[TID] = new TIntIntHashMap(); ! pabi[TID] = new TIntDoubleHashMap(); for (int j=0; j<contexts[TID].length; j++) { PID = contexts[TID][j]; predkeys = params[PID].keys(); ! for (int i=0; i<predkeys.length; i++) { ! OID = predkeys[i]; if (cfvals[TID].containsKey(OID)) { cfvals[TID].put(OID, cfvals[TID].get(OID) + 1); *************** *** 356,367 **** } } ! cfvals[TID].trimToSize(); ! pabi[TID].trimToSize(); } for (TID=0; TID<numTokens; TID++) { predkeys = cfvals[TID].keys(); ! for (int i=0; i<predkeys.size(); i++) { ! OID = predkeys.get(i); cfvals[TID].put(OID, constant - cfvals[TID].get(OID)); } --- 354,365 ---- } } ! cfvals[TID].compact(); ! pabi[TID].compact(); } for (TID=0; TID<numTokens; TID++) { predkeys = cfvals[TID].keys(); ! for (int i=0; i<predkeys.length; i++) { ! OID = predkeys[i]; cfvals[TID].put(OID, constant - cfvals[TID].get(OID)); } *************** *** 381,394 **** else { // initialize just the pabi table ! pabi = new OpenIntDoubleHashMap[numTokens]; for (TID=0; TID<numTokens; TID++) { ! pabi[TID] = new OpenIntDoubleHashMap(); for (int j=0; j<contexts[TID].length; j++) { PID = contexts[TID][j]; predkeys = params[PID].keys(); ! for (int i=0; i<predkeys.size(); i++) ! pabi[TID].put(predkeys.get(i), 0.0); } ! pabi[TID].trimToSize(); } } --- 379,392 ---- else { // initialize just the pabi table ! pabi = new TIntDoubleHashMap[numTokens]; for (TID=0; TID<numTokens; TID++) { ! pabi[TID] = new TIntDoubleHashMap(); for (int j=0; j<contexts[TID].length; j++) { PID = contexts[TID][j]; predkeys = params[PID].keys(); ! for (int i=0; i<predkeys.length; i++) ! pabi[TID].put(predkeys[i], 0.0); } ! pabi[TID].compact(); } } *************** *** 434,448 **** CFMOD = 0.0; for (TID=0; TID<numTokens; TID++) { ! pabi[TID].assign(backToZeros); for (int j=0; j<contexts[TID].length; j++) ! params[contexts[TID][j]].forEachPair(addParamsToPABI); PABISUM = 0.0; // PABISUM is computed in the next line's procedure ! pabi[TID].forEachPair(addCorrectionToPABIandExponentiate); ! if (PABISUM > 0.0) pabi[TID].assign(normalizePABI); if (needCorrection) ! pabi[TID].forEachPair(updateCorrectionFeatureModifier); } display("."); --- 432,446 ---- CFMOD = 0.0; for (TID=0; TID<numTokens; TID++) { ! pabi[TID].transformValues(backToZeros); for (int j=0; j<contexts[TID].length; j++) ! params[contexts[TID][j]].forEachEntry(addParamsToPABI); PABISUM = 0.0; // PABISUM is computed in the next line's procedure ! pabi[TID].forEachEntry(addCorrectionToPABIandExponentiate); ! if (PABISUM > 0.0) pabi[TID].transformValues(normalizePABI); if (needCorrection) ! pabi[TID].forEachEntry(updateCorrectionFeatureModifier); } display("."); *************** *** 455,459 **** // globally for the updateModifiers procedure used after it PID = contexts[TID][j]; ! modifiers[PID].forEachPair(updateModifiers); } } --- 453,457 ---- // globally for the updateModifiers procedure used after it PID = contexts[TID][j]; ! modifiers[PID].forEachEntry(updateModifiers); } } *************** *** 462,467 **** // compute the new parameter values for (PID=0; PID<numPreds; PID++) { ! params[PID].forEachPair(updateParams); ! modifiers[PID].assign(backToZeros); // re-initialize to 0.0's } --- 460,465 ---- // compute the new parameter values for (PID=0; PID<numPreds; PID++) { ! params[PID].forEachEntry(updateParams); ! modifiers[PID].transformValues(backToZeros); // re-initialize to 0.0's } |