From: Thomas M. <tsm...@us...> - 2003-01-08 15:44:50
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs1:/tmp/cvs-serv8817 Modified Files: GISTrainer.java Log Message: Fixed some bugs with the updating off the corrections paramater. Namely the expected value needed to check if a particular context was avalable with the outcome seen in training and if not add a term to the expected value of the correction constant. Smothing initial value wasn't in the log domain. Loglikelihood update as returned by nextIteration wasn't in the right place so the loglikelihood value it returned was incorrect. Index: GISTrainer.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/GISTrainer.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** GISTrainer.java 2 Jan 2003 14:46:30 -0000 1.8 --- GISTrainer.java 8 Jan 2003 15:44:47 -0000 1.9 *************** *** 216,249 **** //printTable(contexts); ! // a boolean to track if all events have same number of active features ! boolean needCorrection = false; ! ! // determine the correction constant and its inverse, and check to see ! // whether we need the correction features constant = contexts[0].length; for (TID=1; TID<contexts.length; TID++) { ! if (contexts[TID].length < constant) { ! needCorrection = true; ! } ! else if (contexts[TID].length > constant) { ! needCorrection = true; ! constant = contexts[TID].length; } } - int cfvalSum = 0; - for (TID=0; TID<numTokens; TID++) - cfvalSum += (constant - contexts[TID].length) - * numTimesEventsSeen[TID]; - if (cfvalSum == 0) { - cfObservedExpect = Math.log(NEAR_ZERO);//nearly zero so log is defined - } - else { - cfObservedExpect = Math.log(cfvalSum); - } display("done.\n"); - constantInverse = 1.0/constant; outcomeLabels = di.outcomeLabels; numOutcomes = outcomeLabels.length; --- 216,231 ---- //printTable(contexts); ! // determine the correction constant and its inverse constant = contexts[0].length; for (TID=1; TID<contexts.length; TID++) { ! if (contexts[TID].length > constant) { ! constant = contexts[TID].length; } } + constantInverse = 1.0/constant; display("done.\n"); outcomeLabels = di.outcomeLabels; numOutcomes = outcomeLabels.length; *************** *** 265,271 **** //printTable(predCount); - di = null; // don't need it anymore // A fake "observation" to cover features which are not detected in // the data. The default is to assume that we observed "1/10th" of a --- 247,253 ---- //printTable(predCount); di = null; // don't need it anymore + // A fake "observation" to cover features which are not detected in // the data. The default is to assume that we observed "1/10th" of a *************** *** 307,311 **** params[PID].put(OID, 0.0); modifiers[PID].put(OID, 0.0); ! observedExpects[PID].put(OID, smoothingObservation); } } --- 289,293 ---- params[PID].put(OID, 0.0); modifiers[PID].put(OID, 0.0); ! observedExpects[PID].put(OID, logSmoothingObservation); } } *************** *** 314,317 **** --- 296,319 ---- observedExpects[PID].compact(); } + + // compute the expected value of correction + int cfvalSum = 0; + for (TID=0; TID<numTokens; TID++) { + for (int j=0; j<contexts[TID].length; j++) { + PID = contexts[TID][j]; + if (!modifiers[PID].containsKey(outcomes[TID])) { + cfvalSum+=numTimesEventsSeen[TID]; + } + } + cfvalSum += (constant - contexts[TID].length) + * numTimesEventsSeen[TID]; + } + if (cfvalSum == 0) { + cfObservedExpect = Math.log(NEAR_ZERO);//nearly zero so log is defined + } + else { + cfObservedExpect = Math.log(cfvalSum); + } + correctionParam = 0.0; predCount = null; // don't need it anymore *************** *** 338,343 **** /* Estimate and return the model parameters. */ private void findParameters(int iterations) { ! double prevLL = 0.0; ! double currLL = 0.0; display("Performing " + iterations + " iterations.\n"); for (int i=1; i<=iterations; i++) { --- 340,345 ---- /* Estimate and return the model parameters. */ private void findParameters(int iterations) { ! double prevLL = 0.0; ! double currLL = 0.0; display("Performing " + iterations + " iterations.\n"); for (int i=1; i<=iterations; i++) { *************** *** 347,357 **** currLL=nextIteration(); if (i > 1) { ! if (prevLL > currLL) { ! System.err.println("Model Diverging: loglikelihood decreased"); ! break; ! } ! if (currLL-prevLL < LLThreshold) { ! break; ! } } prevLL=currLL; --- 349,359 ---- currLL=nextIteration(); if (i > 1) { ! if (prevLL > currLL) { ! System.err.println("Model Diverging: loglikelihood decreased"); ! break; ! } ! if (currLL-prevLL < LLThreshold) { ! break; ! } } prevLL=currLL; *************** *** 415,420 **** double loglikelihood = 0.0; CFMOD=0.0; for (TID=0; TID<numTokens; TID++) { ! // modeldistribution and PID are globals used in // the updateModifiers procedure. They need to be set. eval(contexts[TID],modelDistribution); --- 417,423 ---- double loglikelihood = 0.0; CFMOD=0.0; + int numEvents=0; for (TID=0; TID<numTokens; TID++) { ! // TID, modeldistribution and PID are globals used in // the updateModifiers procedure. They need to be set. eval(contexts[TID],modelDistribution); *************** *** 427,433 **** } } - loglikelihood+=Math.log(modelDistribution[outcomes[TID]]); } ! CFMOD+=constant-contexts[TID].length; } display("."); --- 430,438 ---- } } } ! CFMOD+=(constant-contexts[TID].length)*numTimesEventsSeen[TID]; ! ! loglikelihood+=Math.log(modelDistribution[outcomes[TID]])*numTimesEventsSeen[TID]; ! numEvents+=numTimesEventsSeen[TID]; } display("."); |