From: Thomas M. <tsm...@us...> - 2004-06-11 20:51:53
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv15112/src/java/opennlp/maxent Modified Files: GISTrainer.java GISModel.java Added Files: TObjectIndexHashMap.java TIntParamHashMap.java Log Message: maxent specific trove extensions for optimization. --- NEW FILE: TObjectIndexHashMap.java --- package opennlp.maxent; import gnu.trove.TObjectHashingStrategy; import gnu.trove.TObjectIntHashMap; public class TObjectIndexHashMap extends TObjectIntHashMap { public TObjectIndexHashMap() { super(); } public TObjectIndexHashMap(TObjectHashingStrategy arg0) { super(arg0); } public TObjectIndexHashMap(int arg0) { super(arg0); } public TObjectIndexHashMap(int arg0, float arg1) { super(arg0, arg1); } public TObjectIndexHashMap(int arg0, float arg1, TObjectHashingStrategy arg2) { super(arg0, arg1, arg2); } public TObjectIndexHashMap(int arg0, TObjectHashingStrategy arg1) { super(arg0, arg1); } public int get(Object key) { int index = index(key); return index < 0 ? (int)-1 : _values[index]; } } --- NEW FILE: TIntParamHashMap.java --- package opennlp.maxent; import gnu.trove.TIntDoubleHashMap; public class TIntParamHashMap extends TIntDoubleHashMap { public TIntParamHashMap() { super(); } public TIntParamHashMap(int initialCapacity) { super(initialCapacity); } public TIntParamHashMap(int initialCapacity, float loadFactor) { super(initialCapacity, loadFactor); } public double get(int key) { int hash, probe, index, length; int[] set; byte[] states; states = _states; set = _set; length = states.length; hash = key & 0x7fffffff; index = hash % length; if (states[index] != FREE && (states[index] == REMOVED || set[index] != key)) { // see Knuth, p. 529 probe = 1 + (hash % (length - 2)); do { index -= probe; if (index < 0) { index += length; } } while (states[index] != FREE && (states[index] == REMOVED || set[index] != key)); } return states[index] == FREE ? 0d : _values[index]; } } Index: GISTrainer.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/GISTrainer.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** GISTrainer.java 10 May 2004 03:11:54 -0000 1.13 --- GISTrainer.java 11 Jun 2004 20:51:37 -0000 1.14 *************** *** 58,64 **** private int OID; - /** A global variable for adding probabilities in an array.*/ - private double SUM; - /** Records the array of predicates seen in each event. */ private int[][] contexts; --- 58,61 ---- *************** *** 86,99 **** /** Stores the observed expected values of the features based on training data. */ ! private TIntDoubleHashMap[] observedExpects; /** Stores the estimated parameter value of each predicate during iteration */ ! private TIntDoubleHashMap[] params; /** Stores the expected values of the features based on the current models */ ! private TIntDoubleHashMap[] modelExpects; - /** A helper object for storing predicate indexes. */ - private int[] predkeys; /** The maximum number of feattures fired in an event. Usually refered to a C.*/ --- 83,94 ---- /** Stores the observed expected values of the features based on training data. */ ! private TIntParamHashMap[] observedExpects; /** Stores the estimated parameter value of each predicate during iteration */ ! private TIntParamHashMap[] params; /** Stores the expected values of the features based on the current models */ ! private TIntParamHashMap[] modelExpects; /** The maximum number of feattures fired in an event. Usually refered to a C.*/ *************** *** 281,287 **** // implementation, this is cancelled out when we compute the next // iteration of a parameter, making the extra divisions wasteful. ! params = new TIntDoubleHashMap[numPreds]; ! modelExpects = new TIntDoubleHashMap[numPreds]; ! observedExpects = new TIntDoubleHashMap[numPreds]; int initialCapacity; --- 276,282 ---- // implementation, this is cancelled out when we compute the next // iteration of a parameter, making the extra divisions wasteful. ! params = new TIntParamHashMap[numPreds]; ! modelExpects = new TIntParamHashMap[numPreds]; ! observedExpects = new TIntParamHashMap[numPreds]; int initialCapacity; *************** *** 298,304 **** } for (PID = 0; PID < numPreds; PID++) { ! params[PID] = new TIntDoubleHashMap(initialCapacity, loadFactor); ! modelExpects[PID] = new TIntDoubleHashMap(initialCapacity, loadFactor); ! observedExpects[PID] = new TIntDoubleHashMap(initialCapacity, loadFactor); for (OID = 0; OID < numOutcomes; OID++) { if (predCount[PID][OID] > 0) { --- 293,299 ---- } for (PID = 0; PID < numPreds; PID++) { ! params[PID] = new TIntParamHashMap(initialCapacity, loadFactor); ! modelExpects[PID] = new TIntParamHashMap(initialCapacity, loadFactor); ! observedExpects[PID] = new TIntParamHashMap(initialCapacity, loadFactor); for (OID = 0; OID < numOutcomes; OID++) { if (predCount[PID][OID] > 0) { *************** *** 406,410 **** int[] activeOutcomes; for (int i = 0; i < context.length; i++) { ! TIntDoubleHashMap predParams = params[context[i]]; activeOutcomes = predParams.keys(); for (int j = 0; j < activeOutcomes.length; j++) { --- 401,405 ---- int[] activeOutcomes; for (int i = 0; i < context.length; i++) { ! TIntParamHashMap predParams = params[context[i]]; activeOutcomes = predParams.keys(); for (int j = 0; j < activeOutcomes.length; j++) { Index: GISModel.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/GISModel.java,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** GISModel.java 9 Dec 2003 23:13:53 -0000 1.12 --- GISModel.java 11 Jun 2004 20:51:44 -0000 1.13 *************** *** 1,4 **** /////////////////////////////////////////////////////////////////////////////// ! // Copyright (C) 2001 Jason Baldridge and Gann Bierner // // This library is free software; you can redistribute it and/or --- 1,4 ---- /////////////////////////////////////////////////////////////////////////////// ! // Copyright (C) 2004 Jason Baldridge, Gann Bierner, and Tom Morton // // This library is free software; you can redistribute it and/or *************** *** 18,22 **** package opennlp.maxent; - import gnu.trove.*; import java.text.DecimalFormat; --- 18,21 ---- *************** *** 29,38 **** */ public final class GISModel implements MaxentModel { ! private final TIntDoubleHashMap[] params; ! private final TObjectIntHashMap pmap; private final String[] ocNames; private final double correctionConstant; private final double correctionParam; ! private final int numOutcomes; private final double iprob; --- 28,41 ---- */ public final class GISModel implements MaxentModel { ! /** Mapping between outcomes and paramater values for each context. ! * The integer representation of the context can be found using <code>pmap</code>.*/ ! private final TIntParamHashMap[] params; ! /** Maping between predicates/contexts and an integer representing them. */ ! private final TObjectIndexHashMap pmap; ! /** The names of the outcomes. */ private final String[] ocNames; private final double correctionConstant; private final double correctionParam; ! /** The number of outcomes. */ private final int numOutcomes; private final double iprob; *************** *** 42,46 **** private int[] numfeats; ! public GISModel (TIntDoubleHashMap[] _params, String[] predLabels, String[] _ocNames, --- 45,49 ---- private int[] numfeats; ! public GISModel (TIntParamHashMap[] _params, String[] predLabels, String[] _ocNames, *************** *** 48,52 **** double _correctionParam) { ! pmap = new TObjectIntHashMap(predLabels.length); for (int i=0; i<predLabels.length; i++) pmap.put(predLabels[i], i); --- 51,55 ---- double _correctionParam) { ! pmap = new TObjectIndexHashMap(predLabels.length); for (int i=0; i<predLabels.length; i++) pmap.put(predLabels[i], i); *************** *** 90,119 **** * ids, and the actual string representation of the * outcomes can be obtained from the method ! * getOutcome(int i). */ public final double[] eval(String[] context, double[] outsums) { ! int[] activeOutcomes; ! for (int oid=0; oid<numOutcomes; oid++) { outsums[oid] = iprob; numfeats[oid] = 0; } for (int i=0; i<context.length; i++) { ! if (pmap.containsKey(context[i])) { ! TIntDoubleHashMap predParams = ! params[pmap.get(context[i])]; ! activeOutcomes = predParams.keys(); ! for (int j=0; j<activeOutcomes.length; j++) { ! int oid = activeOutcomes[j]; ! numfeats[oid]++; ! outsums[oid] += fval * predParams.get(oid); ! } ! } } double normal = 0.0; for (int oid=0; oid<numOutcomes; oid++) { ! outsums[oid] = Math.exp(outsums[oid] ! + ((1.0 - ! (numfeats[oid]/correctionConstant)) * correctionParam)); normal += outsums[oid]; --- 93,121 ---- * ids, and the actual string representation of the * outcomes can be obtained from the method ! * getOutcome(int i). */ public final double[] eval(String[] context, double[] outsums) { ! int[] activeOutcomes; ! for (int oid=0; oid<numOutcomes; oid++) { outsums[oid] = iprob; numfeats[oid] = 0; } for (int i=0; i<context.length; i++) { ! int contextIndex = pmap.get(context[i]); ! if (contextIndex >= 0) { ! TIntParamHashMap predParams = params[contextIndex]; ! activeOutcomes = predParams.keys(); ! for (int j=0; j<activeOutcomes.length; j++) { ! int oid = activeOutcomes[j]; ! numfeats[oid]++; ! outsums[oid] += predParams.get(oid); ! } ! } } double normal = 0.0; for (int oid=0; oid<numOutcomes; oid++) { ! outsums[oid] = Math.exp((outsums[oid]*fval) ! + ((1.0 -(numfeats[oid]/correctionConstant)) * correctionParam)); normal += outsums[oid]; |