[Maxent-commit] CVS: maxent/src/java/opennlp/maxent GISTrainer.java,1.8,1.9

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent
In directory sc8-pr-cvs1:/tmp/cvs-serv8817

Modified Files:
	GISTrainer.java 
Log Message:

Fixed some bugs with the updating off the corrections paramater.  
  Namely the expected value needed to check if a particular context was
  avalable with the outcome seen in training and if not add a term to the
  expected value of the correction constant.
Smothing initial value wasn't in the log domain.
Loglikelihood update as returned by nextIteration wasn't in the 
  right place so the loglikelihood value it returned was incorrect.

Index: GISTrainer.java
===================================================================
RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/GISTrainer.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -d -r1.8 -r1.9
*** GISTrainer.java	2 Jan 2003 14:46:30 -0000	1.8
--- GISTrainer.java	8 Jan 2003 15:44:47 -0000	1.9
***************
*** 216,249 ****
          //printTable(contexts);

! 	// a boolean to track if all events have same number of active features
!         boolean needCorrection = false; 
! 
!         // determine the correction constant and its inverse, and check to see
!         // whether we need the correction features
          constant = contexts[0].length;
          for (TID=1; TID<contexts.length; TID++) {
!             if (contexts[TID].length < constant) {
!                 needCorrection = true;
!             }
!             else if (contexts[TID].length > constant) {
!                 needCorrection = true;
!                 constant = contexts[TID].length;
              }
          }

- 	int cfvalSum = 0;
- 	for (TID=0; TID<numTokens; TID++)
- 	  cfvalSum += (constant - contexts[TID].length)
- 	    * numTimesEventsSeen[TID];
- 	if (cfvalSum == 0) {
- 	  cfObservedExpect = Math.log(NEAR_ZERO);//nearly zero so log is defined
- 	}
- 	else {
- 	  cfObservedExpect = Math.log(cfvalSum);
- 	}

  	display("done.\n");

-         constantInverse = 1.0/constant;
          outcomeLabels = di.outcomeLabels;
          numOutcomes = outcomeLabels.length;
--- 216,231 ----
          //printTable(contexts);

!         // determine the correction constant and its inverse
          constant = contexts[0].length;
          for (TID=1; TID<contexts.length; TID++) {
!             if (contexts[TID].length > constant) {
! 	      constant = contexts[TID].length;
              }
          }
+         constantInverse = 1.0/constant;

  	display("done.\n");

          outcomeLabels = di.outcomeLabels;
          numOutcomes = outcomeLabels.length;
***************
*** 265,271 ****

          //printTable(predCount);
- 
          di = null; // don't need it anymore

  	// A fake "observation" to cover features which are not detected in
  	// the data.  The default is to assume that we observed "1/10th" of a
--- 247,253 ----

          //printTable(predCount);
          di = null; // don't need it anymore

+ 
  	// A fake "observation" to cover features which are not detected in
  	// the data.  The default is to assume that we observed "1/10th" of a
***************
*** 307,311 ****
                      params[PID].put(OID, 0.0);
                      modifiers[PID].put(OID, 0.0);
!                     observedExpects[PID].put(OID, smoothingObservation);
  		}
              }
--- 289,293 ----
                      params[PID].put(OID, 0.0);
                      modifiers[PID].put(OID, 0.0);
!                     observedExpects[PID].put(OID, logSmoothingObservation);
  		}
              }
***************
*** 314,317 ****
--- 296,319 ----
              observedExpects[PID].compact();
          }
+ 
+ 	// compute the expected value of correction
+ 	int cfvalSum = 0;
+ 	for (TID=0; TID<numTokens; TID++) {
+ 	  for (int j=0; j<contexts[TID].length; j++) {
+ 	    PID = contexts[TID][j];
+ 	    if (!modifiers[PID].containsKey(outcomes[TID])) {
+ 	      cfvalSum+=numTimesEventsSeen[TID];
+ 	    }
+ 	  }
+ 	  cfvalSum += (constant - contexts[TID].length)
+ 	    * numTimesEventsSeen[TID];
+ 	}
+ 	if (cfvalSum == 0) {
+ 	  cfObservedExpect = Math.log(NEAR_ZERO);//nearly zero so log is defined
+ 	}
+ 	else {
+ 	  cfObservedExpect = Math.log(cfvalSum);
+ 	}
+ 
  	correctionParam = 0.0;
          predCount = null; // don't need it anymore
***************
*** 338,343 ****
      /* Estimate and return the model parameters. */
      private void findParameters(int iterations) {
!       double prevLL = 0.0;
!       double currLL = 0.0;
          display("Performing " + iterations + " iterations.\n");
          for (int i=1; i<=iterations; i++) {
--- 340,345 ----
      /* Estimate and return the model parameters. */
      private void findParameters(int iterations) {
!         double prevLL = 0.0;
!         double currLL = 0.0;
          display("Performing " + iterations + " iterations.\n");
          for (int i=1; i<=iterations; i++) {
***************
*** 347,357 ****
              currLL=nextIteration();
  	    if (i > 1) {
! 	      if (prevLL > currLL) {
! 		System.err.println("Model Diverging: loglikelihood decreased");
! 		break;
! 	      }
! 	      if (currLL-prevLL < LLThreshold) {
! 		break;
! 	      }
  	    }
  	    prevLL=currLL;
--- 349,359 ----
              currLL=nextIteration();
  	    if (i > 1) {
! 	        if (prevLL > currLL) {
! 		    System.err.println("Model Diverging: loglikelihood decreased");
! 		    break;
! 	        }
! 	        if (currLL-prevLL < LLThreshold) {
! 		    break;
! 	        }
  	    }
  	    prevLL=currLL;
***************
*** 415,420 ****
          double loglikelihood = 0.0; 
          CFMOD=0.0;
          for (TID=0; TID<numTokens; TID++) {
! 	  // modeldistribution and PID are globals used in 
  	  // the updateModifiers procedure.  They need to be set.
  	  eval(contexts[TID],modelDistribution);
--- 417,423 ----
          double loglikelihood = 0.0; 
          CFMOD=0.0;
+ 	int numEvents=0;
          for (TID=0; TID<numTokens; TID++) {
! 	  // TID, modeldistribution and PID are globals used in 
  	  // the updateModifiers procedure.  They need to be set.
  	  eval(contexts[TID],modelDistribution);
***************
*** 427,433 ****
  	      }
  	    }
- 	    loglikelihood+=Math.log(modelDistribution[outcomes[TID]]);
  	  }
! 	  CFMOD+=constant-contexts[TID].length;
          }
          display(".");
--- 430,438 ----
  	      }
  	    }
  	  }
! 	  CFMOD+=(constant-contexts[TID].length)*numTimesEventsSeen[TID];
! 
! 	  loglikelihood+=Math.log(modelDistribution[outcomes[TID]])*numTimesEventsSeen[TID];
! 	  numEvents+=numTimesEventsSeen[TID];
          }
          display(".");