You can subscribe to this list here.
2001 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
(5) |
Sep
|
Oct
(14) |
Nov
(37) |
Dec
(13) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2002 |
Jan
(14) |
Feb
|
Mar
|
Apr
(15) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
(3) |
Dec
(2) |
2003 |
Jan
(4) |
Feb
|
Mar
(1) |
Apr
(2) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(4) |
2004 |
Jan
(1) |
Feb
(3) |
Mar
|
Apr
|
May
(4) |
Jun
(3) |
Jul
(1) |
Aug
(6) |
Sep
|
Oct
|
Nov
|
Dec
|
2005 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(17) |
Nov
(3) |
Dec
|
2006 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
(23) |
Dec
|
2007 |
Jan
|
Feb
|
Mar
(7) |
Apr
(17) |
May
(1) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2008 |
Jan
|
Feb
|
Mar
|
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
(3) |
Sep
(20) |
Oct
|
Nov
(15) |
Dec
(2) |
2009 |
Jan
(38) |
Feb
(4) |
Mar
(20) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2010 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
(4) |
Jul
|
Aug
(17) |
Sep
(26) |
Oct
|
Nov
(2) |
Dec
|
From: Thomas M. <tsm...@us...> - 2008-08-22 01:16:53
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv8623/src/java/opennlp/maxent Modified Files: GISTrainer.java Log Message: fixed issue with real-valued features. Index: GISTrainer.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/GISTrainer.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** GISTrainer.java 12 Apr 2007 17:21:59 -0000 1.27 --- GISTrainer.java 22 Aug 2008 01:16:50 -0000 1.28 *************** *** 73,81 **** private int[][] contexts; ! /** The value associates with each context. If null then context values are assumes to be 1. */ private float[][] values; ! ! /** Records the array of outcomes seen in each event. */ ! private int[] outcomes; /** List of outcomes for each event i, in context[i]. */ private int[] outcomeList; --- 73,79 ---- private int[][] contexts; ! /** The value associated with each context. If null then context values are assumes to be 1. */ private float[][] values; ! /** List of outcomes for each event i, in context[i]. */ private int[] outcomeList; *************** *** 212,216 **** contexts = di.getContexts(); values = di.getValues(); - outcomes = di.getOutcomeList(); this.cutoff = cutoff; predicateCounts = di.getPredCounts(); --- 210,213 ---- *************** *** 242,249 **** // set up feature arrays ! int[][] predCount = new int[numPreds][numOutcomes]; for (int ti = 0; ti < numUniqueEvents; ti++) { for (int j = 0; j < contexts[ti].length; j++) { ! predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]; } } --- 239,251 ---- // set up feature arrays ! float[][] predCount = new float[numPreds][numOutcomes]; for (int ti = 0; ti < numUniqueEvents; ti++) { for (int j = 0; j < contexts[ti].length; j++) { ! if (values == null || values[ti] == null) { ! predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]; ! } ! else { ! predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]*values[ti][j]; ! } } } *************** *** 304,308 **** modelExpects[pi].setParameter(aoi, 0.0); if (predCount[pi][oi] > 0) { ! observedExpects[pi].setParameter(aoi, predCount[pi][oi]); } else if (useSimpleSmoothing) { --- 306,310 ---- modelExpects[pi].setParameter(aoi, 0.0); if (predCount[pi][oi] > 0) { ! observedExpects[pi].setParameter(aoi, predCount[pi][oi]); } else if (useSimpleSmoothing) { *************** *** 318,322 **** for (int j = 0; j < contexts[ti].length; j++) { int pi = contexts[ti][j]; ! if (!modelExpects[pi].contains(outcomes[ti])) { cfvalSum += numTimesEventsSeen[ti]; } --- 320,324 ---- for (int j = 0; j < contexts[ti].length; j++) { int pi = contexts[ti][j]; ! if (!modelExpects[pi].contains(outcomeList[ti])) { cfvalSum += numTimesEventsSeen[ti]; } *************** *** 443,447 **** CFMOD += (evalParams.correctionConstant - contexts[ei].length) * numTimesEventsSeen[ei]; ! loglikelihood += Math.log(modelDistribution[outcomes[ei]]) * numTimesEventsSeen[ei]; numEvents += numTimesEventsSeen[ei]; if (printMessages) { --- 445,449 ---- CFMOD += (evalParams.correctionConstant - contexts[ei].length) * numTimesEventsSeen[ei]; ! loglikelihood += Math.log(modelDistribution[outcomeList[ei]]) * numTimesEventsSeen[ei]; numEvents += numTimesEventsSeen[ei]; if (printMessages) { *************** *** 452,456 **** } } ! if (max == outcomes[ei]) { numCorrect += numTimesEventsSeen[ei]; } --- 454,458 ---- } } ! if (max == outcomeList[ei]) { numCorrect += numTimesEventsSeen[ei]; } |
From: farzaneh k. <kaz...@ya...> - 2008-04-03 11:56:24
|
Hello, I want to know that "Dose the number of iteration of GIS depend to features of MaxEnt?" or "Size of training data" Could you give me a literature that help me how we should select the number of iteration? Best Regrades, Farzan --------------------------------- You rock. That's why Blockbuster's offering you one month of Blockbuster Total Access, No Cost. |
From: Thomas M. <tsm...@us...> - 2007-05-16 13:37:04
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent/io In directory sc8-pr-cvs16:/tmp/cvs-serv23265/src/java/opennlp/maxent/io Added Files: PooledGISModelReader.java Log Message: Reader which pools context strings. --- NEW FILE: PooledGISModelReader.java --- /////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2007 Thomas Morton // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ////////////////////////////////////////////////////////////////////////////// package opennlp.maxent.io; import java.io.File; import java.io.IOException; /** * This class works exactly like the SuffisSensitiveGISModelReader except that it * attempts to pool all context strings. This is useful when loading models which * share many context strings. * */ public class PooledGISModelReader extends SuffixSensitiveGISModelReader { /** * A reader for GIS models which inspects the filename and invokes the * appropriate GISModelReader depending on the filename's suffixes. * * <p>The following assumption are made about suffixes: * <li>.gz --> the file is gzipped (must be the last suffix) * <li>.txt --> the file is plain text * <li>.bin --> the file is binary * @param f * @throws IOException */ public PooledGISModelReader(File f) throws IOException { super(f); } protected String readUTF() throws IOException { return super.readUTF().intern(); } } |
From: Thomas M. <tsm...@us...> - 2007-04-13 16:26:22
|
Update of /cvsroot/maxent/maxent In directory sc8-pr-cvs16:/tmp/cvs-serv13823 Modified Files: CHANGES Log Message: updated for 2.5 release. Index: CHANGES =================================================================== RCS file: /cvsroot/maxent/maxent/CHANGES,v retrieving revision 1.21 retrieving revision 1.22 diff -C2 -d -r1.21 -r1.22 *** CHANGES 13 Oct 2005 19:59:23 -0000 1.21 --- CHANGES 13 Apr 2007 16:26:12 -0000 1.22 *************** *** 1,2 **** --- 1,18 ---- + 2.5.0 + ----- + Added support for real-valued features with RealBasicEventStream, + RealValueFileEventStream, OnePassRealValueDataIndexer, and + TwoPassRealValueDataIndexer classes. + + Added support for priors with the Prior interface. By default the + package has been using a uniform prior when no other information is + known, now you can create other priors. + + Set TwoPassDataIndexer to use UTF8 encoding. + + Made GISModel thread safe for calls to the eval() method. + + Refactored GISModel and GISTrainer to use common eval() method. + 2.4.0 ----- |
From: Thomas M. <tsm...@us...> - 2007-04-13 16:25:52
|
Update of /cvsroot/maxent/maxent/samples/sports In directory sc8-pr-cvs16:/tmp/cvs-serv13399/samples/sports Added Files: realTeam.test realTeam.dat Log Message: data for real-valued example. --- NEW FILE: realTeam.test --- home pdiff=0.6875 ptwins=0.5 ? home pdiff=1.0625 ptwins=0.5 ? away pdiff=0.8125 ptwins=0.5 ? away pdiff=0.6875 ptwins=0.6 ? home pdiff=0.9375 ptwins=0.5 ? home pdiff=0.6875 ptwins=0.3333 ? away pdiff=1.0625 ptwins=0.6666 ? home pdiff=0.8125 ptwins=0.6666 ? home pdiff=0.9375 ptwins=0.3333 ? home pdiff=0.6875 ptwins=0.5 ? --- NEW FILE: realTeam.dat --- away pdiff=0.6875 ptwins=0.5 lose away pdiff=1.0625 ptwins=0.5 win home pdiff=0.8125 ptwins=0.5 lose home pdiff=0.9375 ptwins=0.5 win away pdiff=0.6875 ptwins=0.6666 lose home pdiff=1.0625 ptwins=0.3333 win away pdiff=0.8125 ptwins=0.6666 win home pdiff=0.9375 ptwins=0.3333 win home pdiff=0.6875 ptwins=0.75 win away pdiff=1.0625 ptwins=0.25 tie away pdiff=0.8125 ptwins=0.5 tie away pdiff=0.9375 ptwins=0.25 tie home pdiff=0.6875 ptwins=0.6 tie home pdiff=1.0625 ptwins=0.25 tie away pdiff=0.8125 ptwins=0.5 lose home pdiff=0.9375 ptwins=0.25 lose away pdiff=0.6875 ptwins=0.6 lose home pdiff=1.0625 ptwins=0.25 lose home pdiff=0.8125 ptwins=0.6 win home pdiff=0.9375 ptwins=0.4 lose away pdiff=0.6875 ptwins=0.6666 lose home pdiff=1.0625 ptwins=0.4 lose away pdiff=0.8125 ptwins=0.5 lose home pdiff=0.9375 ptwins=0.5 tie away pdiff=0.6875 ptwins=0.7142 win away pdiff=1.0625 ptwins=0.5 win home pdiff=0.8125 ptwins=0.5714 win away pdiff=0.9375 ptwins=0.5 lose home pdiff=0.6875 ptwins=0.625 win home pdiff=1.0625 ptwins=0.4285 lose away pdiff=0.8125 ptwins=0.5 lose home pdiff=0.9375 ptwins=0.5714 win home pdiff=0.6875 ptwins=0.5555 lose away pdiff=1.0625 ptwins=0.5 lose away pdiff=0.8125 ptwins=0.5555 lose away pdiff=0.9375 ptwins=0.5 tie home pdiff=0.6875 ptwins=0.6 win home pdiff=1.0625 ptwins=0.5555 win away pdiff=0.8125 ptwins=0.6 tie home pdiff=0.9375 ptwins=0.5 win home pdiff=0.6875 ptwins=0.5454 win home pdiff=1.0625 ptwins=0.5 win home pdiff=0.8125 ptwins=0.6 win home pdiff=0.9375 ptwins=0.4444 lose away pdiff=0.6875 ptwins=0.5 lose home pdiff=1.0625 ptwins=0.4545 tie home pdiff=0.8125 ptwins=0.5454 tie away pdiff=0.9375 ptwins=0.5 lose away pdiff=0.6875 ptwins=0.5384 tie away pdiff=1.0625 ptwins=0.4545 lose home pdiff=0.8125 ptwins=0.5454 lose home pdiff=0.9375 ptwins=0.5454 win home pdiff=0.6875 ptwins=0.5384 lose away pdiff=1.0625 ptwins=0.5 lose home pdiff=0.8125 ptwins=0.5833 win home pdiff=0.9375 ptwins=0.5 lose away pdiff=0.6875 ptwins=0.5714 lose away pdiff=1.0625 ptwins=0.5384 win away pdiff=0.8125 ptwins=0.5384 lose away pdiff=0.9375 ptwins=0.5384 win home pdiff=0.6875 ptwins=0.6 tie home pdiff=1.0625 ptwins=0.5 tie away pdiff=0.8125 ptwins=0.5714 win home pdiff=0.9375 ptwins=0.5 win home pdiff=0.6875 ptwins=0.6 lose away pdiff=1.0625 ptwins=0.5 lose home pdiff=0.8125 ptwins=0.5333 win home pdiff=0.9375 ptwins=0.4666 win home pdiff=0.6875 ptwins=0.625 lose away pdiff=1.0625 ptwins=0.5333 tie away pdiff=0.8125 ptwins=0.5 lose home pdiff=0.9375 ptwins=0.4375 win away pdiff=0.6875 ptwins=0.6470 win home pdiff=1.0625 ptwins=0.5333 lose home pdiff=0.8125 ptwins=0.5294 tie away pdiff=0.9375 ptwins=0.4117 lose away pdiff=0.6875 ptwins=0.6111 tie away pdiff=1.0625 ptwins=0.5625 lose home pdiff=0.8125 ptwins=0.5294 lose away pdiff=0.9375 ptwins=0.4444 lose away pdiff=0.6875 ptwins=0.6111 lose home pdiff=1.0625 ptwins=0.5882 tie home pdiff=0.8125 ptwins=0.5555 win away pdiff=0.9375 ptwins=0.4736 tie home pdiff=0.6875 ptwins=0.6315 win home pdiff=1.0625 ptwins=0.5882 tie home pdiff=0.8125 ptwins=0.5263 lose home pdiff=0.9375 ptwins=0.4736 win home pdiff=0.6875 ptwins=0.6 lose home pdiff=1.0625 ptwins=0.5882 tie away pdiff=0.8125 ptwins=0.55 tie home pdiff=0.9375 ptwins=0.45 win home pdiff=0.6875 ptwins=0.6190 lose home pdiff=1.0625 ptwins=0.5882 tie away pdiff=0.8125 ptwins=0.55 lose away pdiff=0.9375 ptwins=0.4285 lose away pdiff=0.6875 ptwins=0.6363 lose home pdiff=1.0625 ptwins=0.5882 lose home pdiff=0.8125 ptwins=0.5714 lose away pdiff=0.9375 ptwins=0.4545 lose |
From: Thomas M. <tsm...@us...> - 2007-04-13 16:24:58
|
Update of /cvsroot/maxent/maxent/samples/sports In directory sc8-pr-cvs16:/tmp/cvs-serv12959/samples/sports Modified Files: README Log Message: added real-valued example. Index: README =================================================================== RCS file: /cvsroot/maxent/maxent/samples/sports/README,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** README 21 Mar 2007 19:00:16 -0000 1.6 --- README 13 Apr 2007 16:24:55 -0000 1.7 *************** *** 91,99 **** Go ahead and modify the data to experiment with how the results can vary depending on the input to training. There isn't much data, so ! its not a full-fledge example of maxent, but it should still give the general idea. Also, add more contexts in the test files to see what the model will produce with different features active. - In all the previous examples, the features we're binary values, meaning that the feature was either on or off. You can also use features which --- 91,98 ---- Go ahead and modify the data to experiment with how the results can vary depending on the input to training. There isn't much data, so ! its not a full-fledged example of maxent, but it should still give the general idea. Also, add more contexts in the test files to see what the model will produce with different features active. In all the previous examples, the features we're binary values, meaning that the feature was either on or off. You can also use features which *************** *** 107,115 **** Features which don't contains are not in this format are considered to ! have a value of 1. Note feature values MUST BE POSITIVE. ! On a side note, though the features appear in almost the same orderings in the data files, this is not important. You can list them in whatever order you like. --- 106,145 ---- Features which don't contains are not in this format are considered to ! have a value of 1. Note feature values MUST BE POSITIVE. Using real-valued ! features has some additional overhead so you'll need to let the model know ! that it should look for these features. For these examples, you can use ! the "-real" option. + > java CreateModel -real realTeam.dat + The model will converge in 15 iterations. ! You can then test the models on the test data: ! ! > java Predict -real realTeam.test ! ! You see output like: ! -------------------------------------------------- ! For context: home pdiff=0.6875 ptwins=0.5 ! lose[0.3279] win[0.4311] tie[0.2410] ! ! For context: home pdiff=1.0625 ptwins=0.5 ! lose[0.3414] win[0.4301] tie[0.2284] ! ! For context: away pdiff=0.8125 ptwins=0.5 ! lose[0.5590] win[0.1864] tie[0.2546] ! ! For context: away pdiff=0.6875 ptwins=0.6 ! lose[0.5578] win[0.1866] tie[0.2556] ! -------------------------------------------------- ! ! You can see that the values of the features as well as their presence or ! absence (such as the home or away features) impact the probabilities assigned ! to each outcome. ! ! The use of the "-real" option to indicate real-valued data. In general you'll ! need to use the classes: RealBasicEventStream, RealValueFileEventStream, OnePassRealValueDataIndexer, and TwoPassRealValueDataIndexer. ! ! For all models, though the features appear in almost the same orderings in the data files, this is not important. You can list them in whatever order you like. *************** *** 119,125 **** post them to the maxent open discussion forum: ! https://sourceforge.net/forum/forum.php?forum_id=18384 ! ! or send mail to Tom Morton <tsm...@us...>. Posting to the ! forum is preferable. --- 149,152 ---- post them to the maxent open discussion forum: ! http://sourceforge.net/forum/forum.php?forum_id=18384 |
From: Thomas M. <tsm...@us...> - 2007-04-13 16:24:34
|
Update of /cvsroot/maxent/maxent/samples/sports In directory sc8-pr-cvs16:/tmp/cvs-serv12865/samples/sports Modified Files: CreateModel.java Predict.java Log Message: updates to support real-valued examples. Index: CreateModel.java =================================================================== RCS file: /cvsroot/maxent/maxent/samples/sports/CreateModel.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** CreateModel.java 24 Oct 2005 12:29:20 -0000 1.5 --- CreateModel.java 13 Apr 2007 16:24:06 -0000 1.6 *************** *** 17,23 **** ////////////////////////////////////////////////////////////////////////////// ! import opennlp.maxent.*; ! import opennlp.maxent.io.*; ! import java.io.*; /** --- 17,32 ---- ////////////////////////////////////////////////////////////////////////////// ! import java.io.File; ! import java.io.FileReader; ! ! import opennlp.maxent.BasicEventStream; ! import opennlp.maxent.EventStream; ! import opennlp.maxent.GIS; ! import opennlp.maxent.GISModel; ! import opennlp.maxent.OnePassRealValueDataIndexer; ! import opennlp.maxent.PlainTextByLineDataStream; ! import opennlp.maxent.RealBasicEventStream; ! import opennlp.maxent.io.GISModelWriter; ! import opennlp.maxent.io.SuffixSensitiveGISModelWriter; /** *************** *** 39,42 **** --- 48,56 ---- public static double SMOOTHING_OBSERVATION = 0.1; + private static void usage() { + System.err.println("java CreateModel [-real] dataFile"); + System.exit(1); + } + /** * Main method. Call as follows: *************** *** 45,49 **** */ public static void main (String[] args) { ! String dataFileName = new String(args[0]); String modelFileName = dataFileName.substring(0,dataFileName.lastIndexOf('.')) --- 59,75 ---- */ public static void main (String[] args) { ! int ai = 0; ! boolean real = false; ! while (args[ai].startsWith("-")) { ! if (args[ai].equals("-real")) { ! real = true; ! } ! else { ! System.err.println("Unknown option: "+args[ai]); ! usage(); ! } ! ai++; ! } ! String dataFileName = new String(args[ai]); String modelFileName = dataFileName.substring(0,dataFileName.lastIndexOf('.')) *************** *** 51,58 **** try { FileReader datafr = new FileReader(new File(dataFileName)); ! EventStream es = ! new BasicEventStream(new PlainTextByLineDataStream(datafr)); GIS.SMOOTHING_OBSERVATION = SMOOTHING_OBSERVATION; ! GISModel model = GIS.trainModel(es,USE_SMOOTHING); File outputFile = new File(modelFileName); --- 77,95 ---- try { FileReader datafr = new FileReader(new File(dataFileName)); ! EventStream es; ! if (!real) { ! es = new BasicEventStream(new PlainTextByLineDataStream(datafr)); ! } ! else { ! es = new RealBasicEventStream(new PlainTextByLineDataStream(datafr)); ! } GIS.SMOOTHING_OBSERVATION = SMOOTHING_OBSERVATION; ! GISModel model; ! if (!real) { ! model = GIS.trainModel(es,USE_SMOOTHING); ! } ! else { ! model = GIS.trainModel(100, new OnePassRealValueDataIndexer(es,0), USE_SMOOTHING); ! } File outputFile = new File(modelFileName); *************** *** 63,66 **** --- 100,104 ---- System.out.print("Unable to create model due to exception: "); System.out.println(e); + e.printStackTrace(); } } Index: Predict.java =================================================================== RCS file: /cvsroot/maxent/maxent/samples/sports/Predict.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** Predict.java 20 Nov 2001 17:07:17 -0000 1.2 --- Predict.java 13 Apr 2007 16:24:06 -0000 1.3 *************** *** 36,44 **** private void eval (String predicates) { ! double[] ocs = _model.eval(_cg.getContext(predicates)); ! System.out.println("For context: " + predicates ! + "\n" + _model.getAllOutcomes(ocs) + "\n"); } /** --- 36,59 ---- private void eval (String predicates) { ! eval(predicates,false); ! } ! ! private void eval (String predicates, boolean real) { ! String[] contexts = predicates.split(" "); ! double[] ocs; ! if (!real) { ! ocs = _model.eval(contexts); ! } ! else { ! float[] values = RealValueFileEventStream.parseContexts(contexts); ! ocs = _model.eval(contexts,values); ! } ! System.out.println("For context: " + predicates+ "\n" + _model.getAllOutcomes(ocs) + "\n"); } + + private static void usage() { + + } /** *************** *** 49,60 **** public static void main(String[] args) { String dataFileName, modelFileName; if (args.length > 0) { ! dataFileName = args[0]; ! if (args.length > 1) ! modelFileName = args[1]; ! else ! modelFileName = ! dataFileName.substring(0,dataFileName.lastIndexOf('.')) ! + "Model.txt"; } else { --- 64,86 ---- public static void main(String[] args) { String dataFileName, modelFileName; + boolean real = false; + int ai = 0; if (args.length > 0) { ! while (args[ai].startsWith("-")) { ! if (args[ai].equals("-real")) { ! real = true; ! } ! else { ! usage(); ! } ! ai++; ! } ! dataFileName = args[ai++]; ! if (args.length > ai) { ! modelFileName = args[ai++]; ! } ! else { ! modelFileName = dataFileName.substring(0,dataFileName.lastIndexOf('.')) + "Model.txt"; ! } } else { *************** *** 82,98 **** DataStream ds = new PlainTextByLineDataStream( ! new FileReader(new File(args[0]))); while (ds.hasNext()) { String s = (String)ds.nextToken(); ! predictor.eval(s.substring(0, s.lastIndexOf(' '))); } return; } catch (Exception e) { ! System.out.println("Unable to read from specified file: " ! + args[0]); ! System.out.println(); ! } } --- 108,123 ---- DataStream ds = new PlainTextByLineDataStream( ! new FileReader(new File(dataFileName))); while (ds.hasNext()) { String s = (String)ds.nextToken(); ! predictor.eval(s.substring(0, s.lastIndexOf(' ')),real); } return; } catch (Exception e) { ! System.out.println("Unable to read from specified file: "+modelFileName); ! System.out.println(); ! e.printStackTrace(); } } |
From: Thomas M. <tsm...@us...> - 2007-04-13 16:15:42
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs16:/tmp/cvs-serv8631/src/java/opennlp/maxent Modified Files: RealValueFileEventStream.java Log Message: made common method to parse real-valued contexts. Index: RealValueFileEventStream.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/RealValueFileEventStream.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** RealValueFileEventStream.java 4 Apr 2007 13:40:47 -0000 1.3 --- RealValueFileEventStream.java 13 Apr 2007 16:15:38 -0000 1.4 *************** *** 17,29 **** } ! public Event nextEvent() { ! StringTokenizer st = new StringTokenizer(line); ! String outcome = st.nextToken(); ! int count = st.countTokens(); ! String[] contexts = new String[count]; ! float[] values = new float[count]; boolean hasRealValue = false; ! for (int ci = 0; ci < count; ci++) { ! contexts[ci] = st.nextToken(); int ei = contexts[ci].lastIndexOf("="); if (ei > 0 && ei+1 < contexts[ci].length()) { --- 17,31 ---- } ! /** ! * Parses the specified contexts and re-populates context array with features and returns the values ! * for these features. ! * If all values are unspecified, then null is returned. ! * @param contexts The contexts with real values specified. ! * @return The value for each context or null if all values are unspecified. ! */ ! public static float[] parseContexts(String[] contexts) { boolean hasRealValue = false; ! float[] values = new float[contexts.length]; ! for (int ci = 0; ci < contexts.length; ci++) { int ei = contexts[ci].lastIndexOf("="); if (ei > 0 && ei+1 < contexts[ci].length()) { *************** *** 52,55 **** --- 54,65 ---- values = null; } + return values; + } + + public Event nextEvent() { + int si = line.indexOf(' '); + String outcome = line.substring(0,si); + String[] contexts = line.substring(si+1).split(" "); + float[] values = parseContexts(contexts); return (new Event(outcome, contexts, values)); } |
From: Thomas M. <tsm...@us...> - 2007-04-13 16:14:42
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs16:/tmp/cvs-serv8215/src/java/opennlp/maxent Modified Files: RealBasicEventStream.java Log Message: simplified code and fixed bug. Index: RealBasicEventStream.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/RealBasicEventStream.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** RealBasicEventStream.java 21 Mar 2007 19:04:37 -0000 1.1 --- RealBasicEventStream.java 13 Apr 2007 16:14:39 -0000 1.2 *************** *** 38,73 **** return null; else { ! String[] contexts = obs.substring(lastSpace+1).split("\\s+"); ! float[] values = new float[contexts.length]; ! boolean hasRealValue = false; ! for (int ci=0;ci<contexts.length;ci++) { ! int ei = contexts[ci].lastIndexOf("="); ! if (ei > 0 && ei+1 < contexts[ci].length()) { ! values[ci] = Float.parseFloat(contexts[ci].substring(ei+1)); ! if (values[ci] < 0) { ! // TODO: Throw corrpurt data exception ! return null; ! } ! contexts[ci] = contexts[ci].substring(0,ei); ! hasRealValue = true; ! } ! else { ! values[ci] = 1; ! } ! } ! if (!hasRealValue) { ! values = null; ! } return new Event(obs.substring(lastSpace+1),contexts,values); } } - /** - * @param args - */ - public static void main(String[] args) { - // TODO Auto-generated method stub - - } - } --- 38,46 ---- return null; else { ! String[] contexts = obs.substring(0,lastSpace).split("\\s+"); ! float[] values = RealValueFileEventStream.parseContexts(contexts); return new Event(obs.substring(lastSpace+1),contexts,values); } } } |
From: Thomas M. <tsm...@us...> - 2007-04-13 16:14:11
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs16:/tmp/cvs-serv8162/src/java/opennlp/maxent Modified Files: MaxentModel.java Log Message: extended interface to support real-values. Index: MaxentModel.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/MaxentModel.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** MaxentModel.java 9 Dec 2003 23:13:53 -0000 1.4 --- MaxentModel.java 13 Apr 2007 16:14:07 -0000 1.5 *************** *** 44,51 **** * @param probs An array which is populated with the probabilities for each of the different * outcomes, all of which sum to 1. ! * @return an array of the probabilities for each of the different ! * outcomes, all of which sum to 1. The <code>probs</code> is returned if it is appropiately sized. **/ public double[] eval(String[] context, double probs[]); /** --- 44,59 ---- * @param probs An array which is populated with the probabilities for each of the different * outcomes, all of which sum to 1. ! * @return an array of the probabilities for each of the different outcomes, all of which sum to 1. **/ public double[] eval(String[] context, double probs[]); + + /** + * Evaluates a contexts with the specified context values. + * @param context A list of String names of the contextual predicates + * which are to be evaluated together. + * @param values The values associated with each context. + * @return an array of the probabilities for each of the different outcomes, all of which sum to 1. + */ + public double[] eval(String[] context, float[] values); /** |
From: Thomas M. <tsm...@us...> - 2007-04-13 16:13:39
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs16:/tmp/cvs-serv7778/src/java/opennlp/maxent Modified Files: GIS.java Log Message: added additional training interface. Index: GIS.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/GIS.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** GIS.java 15 Nov 2006 21:42:04 -0000 1.8 --- GIS.java 13 Apr 2007 16:13:35 -0000 1.9 *************** *** 116,120 **** */ public static GISModel trainModel(int iterations, DataIndexer indexer, boolean smoothing) { ! return trainModel(iterations,indexer,false,smoothing,null,0); } --- 116,120 ---- */ public static GISModel trainModel(int iterations, DataIndexer indexer, boolean smoothing) { ! return trainModel(iterations,indexer,true,smoothing,null,0); } |
From: Thomas M. <tsm...@us...> - 2007-04-13 16:13:22
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs16:/tmp/cvs-serv7384/src/java/opennlp/maxent Modified Files: ComparableEvent.java Log Message: modified toString to show values when present. Index: ComparableEvent.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/ComparableEvent.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** ComparableEvent.java 21 Mar 2007 19:04:37 -0000 1.4 --- ComparableEvent.java 13 Apr 2007 16:13:00 -0000 1.5 *************** *** 76,82 **** public String toString() { ! String s = ""; ! for (int i=0; i<predIndexes.length; i++) s+= " "+predIndexes[i]; ! return s; } --- 76,87 ---- public String toString() { ! StringBuffer s = new StringBuffer().append(outcome).append(":"); ! for (int i=0; i<predIndexes.length; i++) { ! s.append(" ").append(predIndexes[i]); ! if (values != null) { ! s.append("=").append(values[i]); ! } ! } ! return s.toString(); } |
From: Thomas M. <tsm...@us...> - 2007-04-13 16:12:36
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs16:/tmp/cvs-serv7329/src/java/opennlp/maxent Modified Files: BasicContextGenerator.java Log Message: simplified code. Index: BasicContextGenerator.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/BasicContextGenerator.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** BasicContextGenerator.java 10 May 2004 03:11:54 -0000 1.3 --- BasicContextGenerator.java 13 Apr 2007 16:12:28 -0000 1.4 *************** *** 34,53 **** public class BasicContextGenerator implements ContextGenerator { ! /** ! * Builds up the list of contextual predicates given a String. ! */ ! public String[] getContext(Object o) { ! String s = (String)o; ! int prevIndex = -1; ! int index = s.indexOf(' '); ! List cuts = new ArrayList(); ! while (index != -1) { ! cuts.add(s.substring(prevIndex+1, index)); ! prevIndex = index; ! index = s.indexOf(' ', ++index); ! } ! cuts.add(s.substring(prevIndex+1, s.length())); ! return (String[])cuts.toArray(new String[cuts.size()]); ! } } --- 34,44 ---- public class BasicContextGenerator implements ContextGenerator { ! /** ! * Builds up the list of contextual predicates given a String. ! */ ! public String[] getContext(Object o) { ! String s = (String) o; ! return (String[]) s.split(" "); ! } } |
From: Thomas M. <tsm...@us...> - 2007-04-12 17:22:48
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs16:/tmp/cvs-serv4812/src/java/opennlp/maxent Modified Files: GISModel.java Log Message: fixed bug with initializing priors. Index: GISModel.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/GISModel.java,v retrieving revision 1.21 retrieving revision 1.22 diff -C2 -d -r1.21 -r1.22 *** GISModel.java 11 Apr 2007 15:58:26 -0000 1.21 --- GISModel.java 12 Apr 2007 17:22:31 -0000 1.22 *************** *** 69,72 **** --- 69,73 ---- this.evalParams = new EvalParameters(params,correctionParam,correctionConstant,ocNames.length); this.prior = prior; + prior.setLabels(ocNames, predLabels); } |
From: Thomas M. <tsm...@us...> - 2007-04-12 17:22:07
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs16:/tmp/cvs-serv4382/src/java/opennlp/maxent Modified Files: GISTrainer.java Log Message: removed unused variable. Index: GISTrainer.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/GISTrainer.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** GISTrainer.java 21 Mar 2007 19:04:37 -0000 1.26 --- GISTrainer.java 12 Apr 2007 17:21:59 -0000 1.27 *************** *** 128,132 **** int[] numfeats; /** Initial probability for all outcomes. */ - double iprob; EvalParameters evalParams; --- 128,131 ---- *************** *** 233,237 **** outcomeList = di.getOutcomeList(); numOutcomes = outcomeLabels.length; - iprob = Math.log(1.0 / numOutcomes); predLabels = di.getPredLabels(); --- 232,235 ---- |
From: Thomas M. <tsm...@us...> - 2007-04-11 16:44:09
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs16:/tmp/cvs-serv21202/src/java/opennlp/maxent Modified Files: FileEventStream.java TwoPassDataIndexer.java Log Message: changed encoding to UTF8. Index: FileEventStream.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/FileEventStream.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** FileEventStream.java 15 Mar 2007 04:51:02 -0000 1.4 --- FileEventStream.java 11 Apr 2007 16:44:05 -0000 1.5 *************** *** 56,64 **** this(fileName,null); } ! ! public FileEventStream(File file) throws IOException { ! this(file,null); ! } ! /** * Creates a new file event stream from the specified file. --- 56,60 ---- this(fileName,null); } ! /** * Creates a new file event stream from the specified file. *************** *** 66,76 **** * @throws IOException When the specified file can not be read. */ ! public FileEventStream(File file, String encoding) throws IOException { ! if (encoding == null) { ! reader = new BufferedReader(new FileReader(file)); ! } ! else { ! reader = new BufferedReader(new InputStreamReader(new FileInputStream(file),encoding)); ! } } --- 62,67 ---- * @throws IOException When the specified file can not be read. */ ! public FileEventStream(File file) throws IOException { ! reader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF8")); } Index: TwoPassDataIndexer.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/TwoPassDataIndexer.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** TwoPassDataIndexer.java 11 Apr 2007 16:22:28 -0000 1.10 --- TwoPassDataIndexer.java 11 Apr 2007 16:44:05 -0000 1.11 *************** *** 24,28 **** import java.io.File; import java.io.FileOutputStream; - import java.io.FileWriter; import java.io.IOException; import java.io.OutputStreamWriter; --- 24,27 ---- *************** *** 56,62 **** } - public TwoPassDataIndexer(EventStream eventStream, int cutoff) throws IOException { - this(eventStream,cutoff,null); - } /** * Two argument constructor for DataIndexer. --- 55,58 ---- *************** *** 67,71 **** * observed in order to be included in the model. */ ! public TwoPassDataIndexer(EventStream eventStream, int cutoff,String encoding) throws IOException { TObjectIntHashMap predicateIndex; List eventsToCompare; --- 63,67 ---- * observed in order to be included in the model. */ ! public TwoPassDataIndexer(EventStream eventStream, int cutoff) throws IOException { TObjectIntHashMap predicateIndex; List eventsToCompare; *************** *** 78,88 **** File tmp = File.createTempFile("events", null); tmp.deleteOnExit(); ! Writer osw; ! if (encoding != null) { ! osw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(tmp),encoding)); ! } ! else { ! osw = new BufferedWriter(new FileWriter(tmp)); ! } int numEvents = computeEventCounts(eventStream, osw, predicateIndex, cutoff); System.out.println("done. " + numEvents + " events"); --- 74,78 ---- File tmp = File.createTempFile("events", null); tmp.deleteOnExit(); ! Writer osw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(tmp),"UTF8")); int numEvents = computeEventCounts(eventStream, osw, predicateIndex, cutoff); System.out.println("done. " + numEvents + " events"); *************** *** 90,94 **** System.out.print("\tIndexing... "); ! eventsToCompare = index(numEvents, new FileEventStream(tmp,encoding), predicateIndex); // done with predicates predicateIndex = null; --- 80,84 ---- System.out.print("\tIndexing... "); ! eventsToCompare = index(numEvents, new FileEventStream(tmp), predicateIndex); // done with predicates predicateIndex = null; |
From: Thomas M. <tsm...@us...> - 2007-04-11 16:22:31
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs16:/tmp/cvs-serv12104/src/java/opennlp/maxent Modified Files: TwoPassDataIndexer.java Log Message: changed the throw IOException. Index: TwoPassDataIndexer.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/TwoPassDataIndexer.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** TwoPassDataIndexer.java 11 Apr 2007 15:58:50 -0000 1.9 --- TwoPassDataIndexer.java 11 Apr 2007 16:22:28 -0000 1.10 *************** *** 52,60 **** * seen in the training data. */ ! public TwoPassDataIndexer(EventStream eventStream) { this(eventStream, 0); } ! public TwoPassDataIndexer(EventStream eventStream, int cutoff) { this(eventStream,cutoff,null); } --- 52,60 ---- * seen in the training data. */ ! public TwoPassDataIndexer(EventStream eventStream) throws IOException { this(eventStream, 0); } ! public TwoPassDataIndexer(EventStream eventStream, int cutoff) throws IOException { this(eventStream,cutoff,null); } *************** *** 67,71 **** * observed in order to be included in the model. */ ! public TwoPassDataIndexer(EventStream eventStream, int cutoff,String encoding) { TObjectIntHashMap predicateIndex; List eventsToCompare; --- 67,71 ---- * observed in order to be included in the model. */ ! public TwoPassDataIndexer(EventStream eventStream, int cutoff,String encoding) throws IOException { TObjectIntHashMap predicateIndex; List eventsToCompare; |
From: Thomas M. <tsm...@us...> - 2007-04-11 15:58:53
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs16:/tmp/cvs-serv907/src/java/opennlp/maxent Modified Files: TwoPassDataIndexer.java Log Message: added buffered writer. Index: TwoPassDataIndexer.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/TwoPassDataIndexer.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** TwoPassDataIndexer.java 15 Mar 2007 04:51:02 -0000 1.8 --- TwoPassDataIndexer.java 11 Apr 2007 15:58:50 -0000 1.9 *************** *** 21,24 **** --- 21,25 ---- import gnu.trove.TObjectIntHashMap; + import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; *************** *** 77,86 **** File tmp = File.createTempFile("events", null); tmp.deleteOnExit(); ! OutputStreamWriter osw; if (encoding != null) { ! osw = new OutputStreamWriter(new FileOutputStream(tmp),encoding); } else { ! osw = new FileWriter(tmp); } int numEvents = computeEventCounts(eventStream, osw, predicateIndex, cutoff); --- 78,87 ---- File tmp = File.createTempFile("events", null); tmp.deleteOnExit(); ! Writer osw; if (encoding != null) { ! osw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(tmp),encoding)); } else { ! osw = new BufferedWriter(new FileWriter(tmp)); } int numEvents = computeEventCounts(eventStream, osw, predicateIndex, cutoff); |
From: Thomas M. <tsm...@us...> - 2007-04-11 15:58:30
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs16:/tmp/cvs-serv870/src/java/opennlp/maxent Modified Files: EvalParameters.java GISModel.java Log Message: made numfeats local for thread safety. Index: EvalParameters.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/EvalParameters.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** EvalParameters.java 15 Mar 2007 04:42:01 -0000 1.1 --- EvalParameters.java 11 Apr 2007 15:58:25 -0000 1.2 *************** *** 26,34 **** /** Log of 1/C; initial value of probabilities. */ final double iprob; ! ! /** Stores the number of features that get fired for each outcome in an event. ! * This is over-written for each event evaluation, but declared once for efficiency.*/ ! int[] numfeats; ! /** * Creates a set of paramters which can be evaulated with the eval method. --- 26,30 ---- /** Log of 1/C; initial value of probabilities. */ final double iprob; ! /** * Creates a set of paramters which can be evaulated with the eval method. *************** *** 42,46 **** this.correctionParam = correctionParam; this.numOutcomes = numOutcomes; - this.numfeats = new int[numOutcomes]; this.correctionConstant = correctionConstant; this.constantInverse = 1.0 / correctionConstant; --- 38,41 ---- Index: GISModel.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/GISModel.java,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -d -r1.20 -r1.21 *** GISModel.java 21 Mar 2007 19:04:37 -0000 1.20 --- GISModel.java 11 Apr 2007 15:58:26 -0000 1.21 *************** *** 124,130 **** public static double[] eval(int[] context, float[] values, double[] prior, EvalParameters model) { Context[] params = model.params; ! for (int oid = 0; oid < model.numOutcomes; oid++) { ! model.numfeats[oid] = 0; ! } int[] activeOutcomes; double[] activeParameters; --- 124,128 ---- public static double[] eval(int[] context, float[] values, double[] prior, EvalParameters model) { Context[] params = model.params; ! int numfeats[] = new int[model.numOutcomes]; int[] activeOutcomes; double[] activeParameters; *************** *** 140,144 **** for (int ai = 0; ai < activeOutcomes.length; ai++) { int oid = activeOutcomes[ai]; ! model.numfeats[oid]++; prior[oid] += activeParameters[ai] * value; } --- 138,142 ---- for (int ai = 0; ai < activeOutcomes.length; ai++) { int oid = activeOutcomes[ai]; ! numfeats[oid]++; prior[oid] += activeParameters[ai] * value; } *************** *** 149,153 **** for (int oid = 0; oid < model.numOutcomes; oid++) { if (model.correctionParam != 0) { ! prior[oid] = Math.exp(prior[oid]*model.constantInverse+((1.0 - ((double) model.numfeats[oid] / model.correctionConstant)) * model.correctionParam)); } else { --- 147,151 ---- for (int oid = 0; oid < model.numOutcomes; oid++) { if (model.correctionParam != 0) { ! prior[oid] = Math.exp(prior[oid]*model.constantInverse+((1.0 - ((double) numfeats[oid] / model.correctionConstant)) * model.correctionParam)); } else { |
From: Thomas M. <tsm...@us...> - 2007-04-04 13:40:53
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv1750/src/java/opennlp/maxent Modified Files: RealValueFileEventStream.java Log Message: added some error checking. Index: RealValueFileEventStream.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/RealValueFileEventStream.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** RealValueFileEventStream.java 21 Mar 2007 19:04:36 -0000 1.2 --- RealValueFileEventStream.java 4 Apr 2007 13:40:47 -0000 1.3 *************** *** 28,38 **** int ei = contexts[ci].lastIndexOf("="); if (ei > 0 && ei+1 < contexts[ci].length()) { ! values[ci] = Float.parseFloat(contexts[ci].substring(ei+1)); ! if (values[ci] < 0) { ! return null; ! //TODO: Throw corrpurt data exception } - contexts[ci] = contexts[ci].substring(0,ei); - hasRealValue = true; } else { --- 28,47 ---- int ei = contexts[ci].lastIndexOf("="); if (ei > 0 && ei+1 < contexts[ci].length()) { ! boolean gotReal = true; ! try { ! values[ci] = Float.parseFloat(contexts[ci].substring(ei+1)); ! } ! catch (NumberFormatException e) { ! gotReal = false; ! System.err.println("Unable to determine value in context:"+contexts[ci]); ! values[ci] = 1; ! } ! if (gotReal) { ! if (values[ci] < 0) { ! throw new RuntimeException("Negitive values are not allowed: "+contexts[ci]); ! } ! contexts[ci] = contexts[ci].substring(0,ei); ! hasRealValue = true; } } else { |
From: Thomas M. <tsm...@us...> - 2007-03-21 19:04:41
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv22057/src/java/opennlp/maxent Modified Files: RealValueFileEventStream.java UniformPrior.java GISTrainer.java ComparableEvent.java Prior.java GISModel.java Added Files: RealBasicEventStream.java Log Message: updates for real-valued features. --- NEW FILE: RealBasicEventStream.java --- package opennlp.maxent; public class RealBasicEventStream implements EventStream { ContextGenerator cg = new BasicContextGenerator(); DataStream ds; Event next; public RealBasicEventStream(DataStream ds) { this.ds = ds; if (this.ds.hasNext()) next = createEvent((String)this.ds.nextToken()); } public Event nextEvent() { while (next == null && this.ds.hasNext()) next = createEvent((String)this.ds.nextToken()); Event current = next; if (this.ds.hasNext()) { next = createEvent((String)this.ds.nextToken()); } else { next = null; } return current; } public boolean hasNext() { while (next == null && ds.hasNext()) next = createEvent((String)ds.nextToken()); return next != null; } private Event createEvent(String obs) { int lastSpace = obs.lastIndexOf(' '); if (lastSpace == -1) return null; else { String[] contexts = obs.substring(lastSpace+1).split("\\s+"); float[] values = new float[contexts.length]; boolean hasRealValue = false; for (int ci=0;ci<contexts.length;ci++) { int ei = contexts[ci].lastIndexOf("="); if (ei > 0 && ei+1 < contexts[ci].length()) { values[ci] = Float.parseFloat(contexts[ci].substring(ei+1)); if (values[ci] < 0) { // TODO: Throw corrpurt data exception return null; } contexts[ci] = contexts[ci].substring(0,ei); hasRealValue = true; } else { values[ci] = 1; } } if (!hasRealValue) { values = null; } return new Event(obs.substring(lastSpace+1),contexts,values); } } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub } } Index: RealValueFileEventStream.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/RealValueFileEventStream.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** RealValueFileEventStream.java 15 Mar 2007 04:51:26 -0000 1.1 --- RealValueFileEventStream.java 21 Mar 2007 19:04:36 -0000 1.2 *************** *** 21,32 **** String outcome = st.nextToken(); int count = st.countTokens(); ! String[] context = new String[count]; float[] values = new float[count]; boolean hasRealValue = false; for (int ci = 0; ci < count; ci++) { ! context[ci] = st.nextToken(); ! int ei = context[ci].lastIndexOf("="); ! if (ei > 0 && ei+1 < context[ci].length()) { ! values[ci] = Float.parseFloat(context[ci].substring(ei+1)); hasRealValue = true; } --- 21,37 ---- String outcome = st.nextToken(); int count = st.countTokens(); ! String[] contexts = new String[count]; float[] values = new float[count]; boolean hasRealValue = false; for (int ci = 0; ci < count; ci++) { ! contexts[ci] = st.nextToken(); ! int ei = contexts[ci].lastIndexOf("="); ! if (ei > 0 && ei+1 < contexts[ci].length()) { ! values[ci] = Float.parseFloat(contexts[ci].substring(ei+1)); ! if (values[ci] < 0) { ! return null; ! //TODO: Throw corrpurt data exception ! } ! contexts[ci] = contexts[ci].substring(0,ei); hasRealValue = true; } *************** *** 38,42 **** values = null; } ! return (new Event(outcome, context, values)); } --- 43,47 ---- values = null; } ! return (new Event(outcome, contexts, values)); } *************** *** 61,65 **** cutoff = Integer.parseInt(args[ai++]); } ! GISModel model = GIS.trainModel(es,iterations,cutoff); new SuffixSensitiveGISModelWriter(model, new File(eventFile+".bin.gz")).persist(); } --- 66,70 ---- cutoff = Integer.parseInt(args[ai++]); } ! GISModel model = GIS.trainModel(iterations,new OnePassRealValueDataIndexer(es,cutoff)); new SuffixSensitiveGISModelWriter(model, new File(eventFile+".bin.gz")).persist(); } Index: UniformPrior.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/UniformPrior.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** UniformPrior.java 15 Nov 2006 21:40:01 -0000 1.2 --- UniformPrior.java 21 Mar 2007 19:04:37 -0000 1.3 *************** *** 11,19 **** private double r; ! public void logPrior(double[] dist, int[] context) { for (int oi=0;oi<numOutcomes;oi++) { dist[oi] = r; } } public void setLabels(String[] outcomeLabels, String[] contextLabels) { --- 11,23 ---- private double r; ! public void logPrior(double[] dist, int[] context, float[] values) { for (int oi=0;oi<numOutcomes;oi++) { dist[oi] = r; } } + + public void logPrior(double[] dist, int[] context) { + logPrior(dist,context,null); + } public void setLabels(String[] outcomeLabels, String[] contextLabels) { Index: GISTrainer.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/GISTrainer.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** GISTrainer.java 15 Mar 2007 04:51:26 -0000 1.25 --- GISTrainer.java 21 Mar 2007 19:04:37 -0000 1.26 *************** *** 417,422 **** int numCorrect = 0; for (int ei = 0; ei < numUniqueEvents; ei++) { ! prior.logPrior(modelDistribution,contexts[ei]); ! GISModel.eval(contexts[ei], values[ei], modelDistribution, evalParams); for (int j = 0; j < contexts[ei].length; j++) { int pi = contexts[ei][j]; --- 417,428 ---- int numCorrect = 0; for (int ei = 0; ei < numUniqueEvents; ei++) { ! if (values != null) { ! prior.logPrior(modelDistribution,contexts[ei],values[ei]); ! GISModel.eval(contexts[ei], values[ei], modelDistribution, evalParams); ! } ! else { ! prior.logPrior(modelDistribution,contexts[ei]); ! GISModel.eval(contexts[ei], modelDistribution, evalParams); ! } for (int j = 0; j < contexts[ei].length; j++) { int pi = contexts[ei][j]; Index: ComparableEvent.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/ComparableEvent.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** ComparableEvent.java 15 Mar 2007 04:51:26 -0000 1.3 --- ComparableEvent.java 21 Mar 2007 19:04:37 -0000 1.4 *************** *** 37,41 **** public ComparableEvent(int oc, int[] pids, float[] values) { outcome = oc; ! Arrays.sort(pids); predIndexes = pids; } --- 37,47 ---- public ComparableEvent(int oc, int[] pids, float[] values) { outcome = oc; ! if (values == null) { ! Arrays.sort(pids); ! } ! else { ! sort(pids,values); ! } ! this.values = values; //needs to be sorted like pids predIndexes = pids; } *************** *** 47,51 **** public int compareTo(Object o) { ComparableEvent ce = (ComparableEvent)o; - if (outcome < ce.outcome) return -1; else if (outcome > ce.outcome) return 1; --- 53,56 ---- *************** *** 57,60 **** --- 62,69 ---- if (predIndexes[i] < ce.predIndexes[i]) return -1; else if (predIndexes[i] > ce.predIndexes[i]) return 1; + if (values != null) { + if (values[i] < ce.values[i]) return -1; + else if (values[i] > ce.values[i]) return 1; + } } *************** *** 71,74 **** --- 80,100 ---- return s; } + + private void sort(int[] pids, float[] values) { + for (int mi=0;mi<pids.length;mi++) { + int min = mi; + for (int pi=mi+1;pi<pids.length;pi++) { + if (pids[min] > pids[pi]) { + min = pi; + } + } + int pid = pids[mi]; + pids[mi] = pids[min]; + pids[min] = pid; + float val = values[mi]; + values[mi] = values[min]; + values[min] = val; + } + } } Index: Prior.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/Prior.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** Prior.java 15 Nov 2006 21:39:38 -0000 1.2 --- Prior.java 21 Mar 2007 19:04:37 -0000 1.3 *************** *** 15,18 **** --- 15,27 ---- */ public void logPrior(double[] dist, int[] context); + + /** + * Populates the specified array with the the log of the distribution for the specified context. + * The returned array will be overwritten and needs to be re-initialized with every call to this method. + * @param dist An array to be populated with the log of the prior distribution. + * @param context The indices of the contextual predicates for an event. + * @param values The values associated with the context. + */ + public void logPrior(double[] dist, int[] context, float[] values); /** Index: GISModel.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/GISModel.java,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** GISModel.java 15 Mar 2007 04:51:26 -0000 1.19 --- GISModel.java 21 Mar 2007 19:04:37 -0000 1.20 *************** *** 87,90 **** --- 87,94 ---- } + public final double[] eval(String[] context, float[] values) { + return(eval(context,values,new double[evalParams.numOutcomes])); + } + /** * Use this model to evaluate a context and return an array of the *************** *** 109,112 **** --- 113,117 ---- * @param context The integer values of the predicates which have been observed at * the present decision point. + * @param values The values for each of the parameters. * @param prior The prior distribution for the specified context. * @param model The set of parametes used in this computation. *************** *** 158,161 **** --- 163,170 ---- } + public final double[] eval(String[] context, double[] outsums) { + return eval(context,null,outsums); + } + /** * Use this model to evaluate a context and return an array of the *************** *** 171,181 **** * getOutcome(int i). */ ! public final double[] eval(String[] context, double[] outsums) { int[] scontexts = new int[context.length]; for (int i=0; i<context.length; i++) { scontexts[i] = pmap.get(context[i]); } ! prior.logPrior(outsums, scontexts); ! return GISModel.eval(scontexts,outsums,evalParams); } --- 180,190 ---- * getOutcome(int i). */ ! public final double[] eval(String[] context, float[] values, double[] outsums) { int[] scontexts = new int[context.length]; for (int i=0; i<context.length; i++) { scontexts[i] = pmap.get(context[i]); } ! prior.logPrior(outsums, scontexts,values); ! return GISModel.eval(scontexts,values,outsums,evalParams); } |
From: Thomas M. <tsm...@us...> - 2007-03-21 19:02:46
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv21206/src/java/opennlp/maxent Modified Files: Event.java Log Message: removed import Index: Event.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/Event.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** Event.java 15 Mar 2007 04:51:26 -0000 1.4 --- Event.java 21 Mar 2007 19:02:43 -0000 1.5 *************** *** 18,22 **** package opennlp.maxent; - import java.util.Arrays; /** --- 18,21 ---- |
From: Thomas M. <tsm...@us...> - 2007-03-21 19:02:26
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv21119/src/java/opennlp/maxent Modified Files: BasicEventStream.java Log Message: reformatted. Index: BasicEventStream.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/BasicEventStream.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** BasicEventStream.java 10 May 2004 03:11:54 -0000 1.2 --- BasicEventStream.java 21 Mar 2007 19:02:10 -0000 1.3 *************** *** 1,19 **** /////////////////////////////////////////////////////////////////////////////// ! // Copyright (C) 2001 Jason Baldridge // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! // GNU General Public License for more details. // ! // You should have received a copy of the GNU Lesser General Public ! // License along with this program; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ! ////////////////////////////////////////////////////////////////////////////// package opennlp.maxent; --- 1,19 ---- /////////////////////////////////////////////////////////////////////////////// ! //Copyright (C) 2001 Jason Baldridge // ! //This library is free software; you can redistribute it and/or ! //modify it under the terms of the GNU Lesser General Public ! //License as published by the Free Software Foundation; either ! //version 2.1 of the License, or (at your option) any later version. // ! //This library is distributed in the hope that it will be useful, ! //but WITHOUT ANY WARRANTY; without even the implied warranty of ! //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! //GNU General Public License for more details. // ! //You should have received a copy of the GNU Lesser General Public ! //License along with this program; if not, write to the Free Software ! //Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ! ////////////////////////////////////////////////////////////////////////////// package opennlp.maxent; *************** *** 29,84 **** * @author Jason Baldridge * @version $Revision$, $Date$ ! */ public class BasicEventStream implements EventStream { ! ContextGenerator _cg = new BasicContextGenerator(); ! DataStream _ds; ! Event _next; ! ! public BasicEventStream (DataStream ds) { ! _ds = ds; ! if (_ds.hasNext()) ! _next = createEvent((String)_ds.nextToken()); ! } ! ! /** ! * Returns the next Event object held in this EventStream. Each call to nextEvent advances the EventStream. ! * ! * @return the Event object which is next in this EventStream ! */ ! public Event nextEvent () { ! while (_next == null && _ds.hasNext()) ! _next = createEvent((String)_ds.nextToken()); ! ! Event current = _next; ! if (_ds.hasNext()) { ! _next = createEvent((String)_ds.nextToken()); ! } ! else { ! _next = null; ! } ! return current; ! } ! ! /** ! * Test whether there are any Events remaining in this EventStream. ! * ! * @return true if this EventStream has more Events ! */ ! public boolean hasNext () { ! while (_next == null && _ds.hasNext()) ! _next = createEvent((String)_ds.nextToken()); ! return _next != null; } ! ! private Event createEvent(String obs) { ! int lastSpace = obs.lastIndexOf(' '); ! if (lastSpace == -1) ! return null; ! else ! return new Event(obs.substring(lastSpace+1), ! _cg.getContext(obs.substring(0, lastSpace))); } ! ! } --- 29,84 ---- * @author Jason Baldridge * @version $Revision$, $Date$ ! */ public class BasicEventStream implements EventStream { ! ContextGenerator cg = new BasicContextGenerator(); ! DataStream ds; ! Event next; ! ! public BasicEventStream (DataStream ds) { ! this.ds = ds; ! if (this.ds.hasNext()) ! next = createEvent((String)this.ds.nextToken()); ! } ! ! /** ! * Returns the next Event object held in this EventStream. Each call to nextEvent advances the EventStream. ! * ! * @return the Event object which is next in this EventStream ! */ ! public Event nextEvent () { ! while (next == null && this.ds.hasNext()) ! next = createEvent((String)this.ds.nextToken()); ! ! Event current = next; ! if (this.ds.hasNext()) { ! next = createEvent((String)this.ds.nextToken()); } ! else { ! next = null; } ! return current; ! } ! ! /** ! * Test whether there are any Events remaining in this EventStream. ! * ! * @return true if this EventStream has more Events ! */ ! public boolean hasNext () { ! while (next == null && ds.hasNext()) ! next = createEvent((String)ds.nextToken()); ! return next != null; ! } ! ! private Event createEvent(String obs) { ! int lastSpace = obs.lastIndexOf(' '); ! if (lastSpace == -1) ! return null; ! else ! return new Event(obs.substring(lastSpace+1), ! cg.getContext(obs.substring(0, lastSpace))); ! } ! ! } |
From: Thomas M. <tsm...@us...> - 2007-03-21 19:00:25
|
Update of /cvsroot/maxent/maxent/samples/sports In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv20264/samples/sports Modified Files: README Log Message: start of updates for next release. Index: README =================================================================== RCS file: /cvsroot/maxent/maxent/samples/sports/README,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** README 24 Oct 2005 12:29:20 -0000 1.5 --- README 21 Mar 2007 19:00:16 -0000 1.6 *************** *** 1,4 **** This is a simple example of a use of maximum entropy and the OpenNLP ! Maxent toolkit. (It was designed to work with Maxent v2.4.0.) There are two example data sets provided, one for whether a game should be played indoors or outdoors and another for whether Arsenal or --- 1,4 ---- This is a simple example of a use of maximum entropy and the OpenNLP ! Maxent toolkit. (It was designed to work with Maxent v2.5.0.) There are two example data sets provided, one for whether a game should be played indoors or outdoors and another for whether Arsenal or *************** *** 10,18 **** own maxent implementation, though the context generator is about as simple as it gets. For more complex examples, look at the classes in ! the opennlp.grok.preprocess package, available at http://grok.sf.net. To play with this sample application, do the following: ! Be sure that opennlp.maxent and trove.jar (found in the lib directory) are in your classpath. --- 10,18 ---- own maxent implementation, though the context generator is about as simple as it gets. For more complex examples, look at the classes in ! the opennlp.tools packages, available at http://opennlp.sourceforge.net. To play with this sample application, do the following: ! Be sure that maxent-2.5.0.jar and trove.jar (found in the lib directory) are in your classpath. *************** *** 21,35 **** > javac *.java - or - - > jikes *.java - - (If you have it installed on your system, jikes is faster!) - Note: the following will avoid the need to setup you classpath in your environment (be sure to fix the maxent jar for the correct version number): ! > javac -classpath .:../../lib/trove.jar:../../output/maxent-2.4.0.jar *.java Now, build the models: --- 21,29 ---- > javac *.java Note: the following will avoid the need to setup you classpath in your environment (be sure to fix the maxent jar for the correct version number): ! > javac -classpath .:../../lib/trove.jar:../../output/maxent-2.5.0.jar *.java Now, build the models: *************** *** 85,89 **** chance at winning it than Arsenal. ! (For those who don't know, Beckham, Scholes, and Neville or ManU players and Ferguson is the coach, while Henry, Kanu, and Parlour are Arsenal players with Wengler as their coach. By "Beckham=false" I --- 79,83 ---- chance at winning it than Arsenal. ! (For those who don't know, Beckham, Scholes, and Neville are/were ManU players and Ferguson is the coach, while Henry, Kanu, and Parlour are Arsenal players with Wengler as their coach. By "Beckham=false" I *************** *** 101,116 **** the model will produce with different features active. On a side note, though the features appear in almost the same orderings in the data files, this is not important. You can list them in whatever order you like. - You can also play around with the smoothing option by setting the - boolean value USE_SMOOTHING to "true" in CreateModel.java. This makes - a difference in performance on the gameLocation decision. In - particular, when the only feature available is "Rainy" for a - decision. So, try training the model with smoothing and without - smoothing, and then testing it on "gameLocation.test" for both models - to see the difference for the input "Rainy". - If you have any suggestions, interesting modifications, or data sets for other examples to add to this sample maxent application, please --- 95,118 ---- the model will produce with different features active. + + In all the previous examples, the features we're binary values, meaning + that the feature was either on or off. You can also use features which + have real values (like 0.07). The features are formatted with the value + specified after an equals sign such as the "pdiff" and "ptwins" features + below. + + away pdiff=0.9375 ptwins=0.25 tie + away pdiff=0.6875 ptwins=0.6666 lose + home pdiff=1.0625 ptwins=0.3333 win + + Features which don't contains are not in this format are considered to + have a value of 1. Note feature values MUST BE POSITIVE. + + + On a side note, though the features appear in almost the same orderings in the data files, this is not important. You can list them in whatever order you like. If you have any suggestions, interesting modifications, or data sets for other examples to add to this sample maxent application, please |
From: Thomas M. <tsm...@us...> - 2007-03-15 04:51:30
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv6811/src/java/opennlp/maxent Modified Files: AbstractDataIndexer.java Event.java GISTrainer.java ComparableEvent.java OnePassDataIndexer.java DataIndexer.java GISModel.java Added Files: RealValueFileEventStream.java OnePassRealValueDataIndexer.java Log Message: added support for real-valued features. --- NEW FILE: RealValueFileEventStream.java --- package opennlp.maxent; import java.io.File; import java.io.IOException; import java.util.StringTokenizer; import opennlp.maxent.io.SuffixSensitiveGISModelWriter; public class RealValueFileEventStream extends FileEventStream { public RealValueFileEventStream(String fileName) throws IOException { super(fileName); } public RealValueFileEventStream(File file) throws IOException { super(file); } public Event nextEvent() { StringTokenizer st = new StringTokenizer(line); String outcome = st.nextToken(); int count = st.countTokens(); String[] context = new String[count]; float[] values = new float[count]; boolean hasRealValue = false; for (int ci = 0; ci < count; ci++) { context[ci] = st.nextToken(); int ei = context[ci].lastIndexOf("="); if (ei > 0 && ei+1 < context[ci].length()) { values[ci] = Float.parseFloat(context[ci].substring(ei+1)); hasRealValue = true; } else { values[ci] = 1; } } if (!hasRealValue) { values = null; } return (new Event(outcome, context, values)); } /** * Trains and writes a model based on the events in the specified event file. * the name of the model created is based on the event file name. * @param args eventfile [iterations cuttoff] * @throws IOException when the eventfile can not be read or the model file can not be written. */ public static void main(String[] args) throws IOException { if (args.length == 0) { System.err.println("Usage: RealValueFileEventStream eventfile [iterations cutoff]"); System.exit(1); } int ai=0; String eventFile = args[ai++]; EventStream es = new RealValueFileEventStream(eventFile); int iterations = 100; int cutoff = 5; if (ai < args.length) { iterations = Integer.parseInt(args[ai++]); cutoff = Integer.parseInt(args[ai++]); } GISModel model = GIS.trainModel(es,iterations,cutoff); new SuffixSensitiveGISModelWriter(model, new File(eventFile+".bin.gz")).persist(); } } --- NEW FILE: OnePassRealValueDataIndexer.java --- package opennlp.maxent; import gnu.trove.TIntArrayList; import gnu.trove.TLinkedList; import gnu.trove.TObjectIntHashMap; import java.util.ArrayList; import java.util.Arrays; import java.util.List; /** * An indexer for maxent model data which handles cutoffs for uncommon * contextual predicates and provides a unique integer index for each of the * predicates and maintains event values. * @author Tom Morton */ public class OnePassRealValueDataIndexer extends OnePassDataIndexer { float[][] values; /** * Two argument constructor for DataIndexer. * @param eventStream An Event[] which contains the a list of all the Events * seen in the training data. * @param cutoff The minimum number of times a predicate must have been * observed in order to be included in the model. */ public OnePassRealValueDataIndexer(EventStream eventStream, int cutoff) { super(eventStream,cutoff); } public float[][] getValues() { return values; } protected int sortAndMerge(List eventsToCompare) { int numUniqueEvents = super.sortAndMerge(eventsToCompare); values = new float[numUniqueEvents][]; int numEvents = eventsToCompare.size(); for (int i = 0, j = 0; i < numEvents; i++) { ComparableEvent evt = (ComparableEvent) eventsToCompare.get(i); if (null == evt) { continue; // this was a dupe, skip over it. } values[j] = evt.values; } return numUniqueEvents; } protected List index(TLinkedList events, TObjectIntHashMap predicateIndex) { TObjectIntHashMap omap = new TObjectIntHashMap(); int numEvents = events.size(); int outcomeCount = 0; List eventsToCompare = new ArrayList(numEvents); TIntArrayList indexedContext = new TIntArrayList(); for (int eventIndex=0; eventIndex<numEvents; eventIndex++) { Event ev = (Event)events.removeFirst(); String[] econtext = ev.getContext(); ComparableEvent ce; int ocID; String oc = ev.getOutcome(); if (omap.containsKey(oc)) { ocID = omap.get(oc); } else { ocID = outcomeCount++; omap.put(oc, ocID); } for (int i=0; i<econtext.length; i++) { String pred = econtext[i]; if (predicateIndex.containsKey(pred)) { indexedContext.add(predicateIndex.get(pred)); } } // drop events with no active features if (indexedContext.size() > 0) { ce = new ComparableEvent(ocID, indexedContext.toNativeArray(), ev.getValues()); eventsToCompare.add(ce); } else { System.err.println("Dropped event "+ev.getOutcome()+":"+Arrays.asList(ev.getContext())); } // recycle the TIntArrayList indexedContext.resetQuick(); } outcomeLabels = toIndexedStringArray(omap); predLabels = toIndexedStringArray(predicateIndex); return eventsToCompare; } } Index: AbstractDataIndexer.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/AbstractDataIndexer.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** AbstractDataIndexer.java 21 Nov 2006 21:25:46 -0000 1.4 --- AbstractDataIndexer.java 15 Mar 2007 04:51:26 -0000 1.5 *************** *** 71,82 **** /** ! * Sorts and uniques the array of comparable events. This method ! * will alter the eventsToCompare array -- it does an in place * sort, followed by an in place edit to remove duplicates. * * @param eventsToCompare a <code>ComparableEvent[]</code> value * @since maxent 1.2.6 */ ! protected void sortAndMerge(List eventsToCompare) { Collections.sort(eventsToCompare); int numEvents = eventsToCompare.size(); --- 71,83 ---- /** ! * Sorts and uniques the array of comparable events and return the number of unique events. ! * This method will alter the eventsToCompare array -- it does an in place * sort, followed by an in place edit to remove duplicates. * * @param eventsToCompare a <code>ComparableEvent[]</code> value + * @return The number of unique events in the specified list. * @since maxent 1.2.6 */ ! protected int sortAndMerge(List eventsToCompare) { Collections.sort(eventsToCompare); int numEvents = eventsToCompare.size(); *************** *** 84,88 **** if (numEvents <= 1) { ! return; // nothing to do; edge case (see assertion) } --- 85,89 ---- if (numEvents <= 1) { ! return numUniqueEvents; // nothing to do; edge case (see assertion) } *************** *** 117,120 **** --- 118,122 ---- ++j; } + return numUniqueEvents; } *************** *** 157,159 **** --- 159,167 ---- return array; } + + public float[][] getValues() { + return null; + } + + } Index: Event.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/Event.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** Event.java 9 Dec 2003 23:13:08 -0000 1.3 --- Event.java 15 Mar 2007 04:51:26 -0000 1.4 *************** *** 30,44 **** private String outcome; private String[] context; ! public Event(String oc, String[] c) { ! outcome = oc; ! context = c; } ! public String getOutcome() { return outcome; } ! public String[] getContext() { return context; } public String toString() { ! return outcome+" "+Arrays.asList(context); } --- 30,74 ---- private String outcome; private String[] context; + private float[] values; ! public Event(String outcome, String[] context) { ! this(outcome,context,null); } ! public Event(String outcome, String[] context, float[] values) { ! this.outcome = outcome; ! this.context = context; ! this.values = values; ! } ! ! public String getOutcome() { ! return outcome; ! } ! ! public String[] getContext() { ! return context; ! } ! ! public float[] getValues() { ! return values; ! } public String toString() { ! StringBuffer sb = new StringBuffer(); ! sb.append(outcome).append(" ["); ! if (context.length > 0) { ! sb.append(context[0]); ! if (values != null) { ! sb.append("="+values[0]); ! } ! } ! for (int ci=1;ci<context.length;ci++) { ! sb.append(" ").append(context[ci]); ! if (values != null) { ! sb.append("="+values[ci]); ! } ! } ! sb.append("]"); ! return sb.toString(); } Index: GISTrainer.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/GISTrainer.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** GISTrainer.java 21 Nov 2006 23:00:55 -0000 1.24 --- GISTrainer.java 15 Mar 2007 04:51:26 -0000 1.25 *************** *** 72,75 **** --- 72,78 ---- /** Records the array of predicates seen in each event. */ private int[][] contexts; + + /** The value associates with each context. If null then context values are assumes to be 1. */ + private float[][] values; /** Records the array of outcomes seen in each event. */ *************** *** 209,212 **** --- 212,216 ---- display("Incorporating indexed data for training... \n"); contexts = di.getContexts(); + values = di.getValues(); outcomes = di.getOutcomeList(); this.cutoff = cutoff; *************** *** 414,418 **** for (int ei = 0; ei < numUniqueEvents; ei++) { prior.logPrior(modelDistribution,contexts[ei]); ! GISModel.eval(contexts[ei], modelDistribution, evalParams); for (int j = 0; j < contexts[ei].length; j++) { int pi = contexts[ei][j]; --- 418,422 ---- for (int ei = 0; ei < numUniqueEvents; ei++) { prior.logPrior(modelDistribution,contexts[ei]); ! GISModel.eval(contexts[ei], values[ei], modelDistribution, evalParams); for (int j = 0; j < contexts[ei].length; j++) { int pi = contexts[ei][j]; Index: ComparableEvent.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/ComparableEvent.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** ComparableEvent.java 27 Dec 2001 19:20:26 -0000 1.2 --- ComparableEvent.java 15 Mar 2007 04:51:26 -0000 1.3 *************** *** 33,41 **** // has been seen. ! public ComparableEvent(int oc, int[] pids) { outcome = oc; Arrays.sort(pids); predIndexes = pids; } public int compareTo(Object o) { --- 33,47 ---- // has been seen. ! public float[] values; ! ! public ComparableEvent(int oc, int[] pids, float[] values) { outcome = oc; Arrays.sort(pids); predIndexes = pids; } + + public ComparableEvent(int oc, int[] pids) { + this(oc,pids,null); + } public int compareTo(Object o) { Index: OnePassDataIndexer.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/OnePassDataIndexer.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** OnePassDataIndexer.java 21 Nov 2006 21:29:59 -0000 1.4 --- OnePassDataIndexer.java 15 Mar 2007 04:51:26 -0000 1.5 *************** *** 18,29 **** package opennlp.maxent; ! import gnu.trove.*; ! import java.util.*; /** * An indexer for maxent model data which handles cutoffs for uncommon * contextual predicates and provides a unique integer index for each of the ! * predicates. The data structures built in the constructor of this class are ! * used by the GIS trainer. * * @author Jason Baldridge --- 18,36 ---- package opennlp.maxent; ! import gnu.trove.TIntArrayList; ! import gnu.trove.TLinkedList; ! import gnu.trove.TObjectIntHashMap; ! ! import java.util.ArrayList; ! import java.util.Arrays; ! import java.util.HashSet; ! import java.util.Iterator; ! import java.util.List; ! import java.util.Set; /** * An indexer for maxent model data which handles cutoffs for uncommon * contextual predicates and provides a unique integer index for each of the ! * predicates. * * @author Jason Baldridge *************** *** 111,115 **** } ! private List index(TLinkedList events, TObjectIntHashMap predicateIndex) { TObjectIntHashMap omap = new TObjectIntHashMap(); --- 118,122 ---- } ! protected List index(TLinkedList events, TObjectIntHashMap predicateIndex) { TObjectIntHashMap omap = new TObjectIntHashMap(); *************** *** 157,159 **** --- 164,167 ---- return eventsToCompare; } + } Index: DataIndexer.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/DataIndexer.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** DataIndexer.java 15 Nov 2006 21:36:59 -0000 1.14 --- DataIndexer.java 15 Mar 2007 04:51:26 -0000 1.15 *************** *** 58,60 **** --- 58,66 ---- */ public String[] getOutcomeLabels(); + + /** + * Returns the values associated with each event context or null if integer values are to be used. + * @return the values associated with each event context. + */ + public float[][] getValues(); } \ No newline at end of file Index: GISModel.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/GISModel.java,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** GISModel.java 15 Nov 2006 21:41:42 -0000 1.18 --- GISModel.java 15 Mar 2007 04:51:26 -0000 1.19 *************** *** 101,104 **** --- 101,121 ---- */ public static double[] eval(int[] context, double[] prior, EvalParameters model) { + return eval(context,null,prior,model); + } + + /** + * Use this model to evaluate a context and return an array of the + * likelihood of each outcome given the specified context and the specified parameters. + * @param context The integer values of the predicates which have been observed at + * the present decision point. + * @param prior The prior distribution for the specified context. + * @param model The set of parametes used in this computation. + * @return The normalized probabilities for the outcomes given the + * context. The indexes of the double[] are the outcome + * ids, and the actual string representation of the + * outcomes can be obtained from the method + * getOutcome(int i). + */ + public static double[] eval(int[] context, float[] values, double[] prior, EvalParameters model) { Context[] params = model.params; for (int oid = 0; oid < model.numOutcomes; oid++) { *************** *** 106,119 **** } int[] activeOutcomes; ! double[] activeParameters; ! for (int i = 0; i < context.length; i++) { ! if (context[i] >= 0) { ! Context predParams = params[context[i]]; activeOutcomes = predParams.getOutcomes(); activeParameters = predParams.getParameters(); ! for (int j = 0; j < activeOutcomes.length; j++) { ! int oid = activeOutcomes[j]; model.numfeats[oid]++; ! prior[oid] += activeParameters[j]; } } --- 123,140 ---- } int[] activeOutcomes; ! double[] activeParameters; ! double value = 1; ! for (int ci = 0; ci < context.length; ci++) { ! if (context[ci] >= 0) { ! Context predParams = params[context[ci]]; activeOutcomes = predParams.getOutcomes(); activeParameters = predParams.getParameters(); ! if (values != null) { ! value = values[ci]; ! } ! for (int ai = 0; ai < activeOutcomes.length; ai++) { ! int oid = activeOutcomes[ai]; model.numfeats[oid]++; ! prior[oid] += activeParameters[ai] * value; } } *************** *** 284,334 **** } } ! } ! ! /** ! * This class encapsulates the varibales used in producing probabilities from a model ! * and facilitaes passing these variables to the eval method. Variables are declared ! * non-private so that they may be accessed and updated without a method call for efficiency ! * reasons. ! * @author Tom Morton ! * ! */ ! class EvalParameters { ! ! /** Mapping between outcomes and paramater values for each context. ! * The integer representation of the context can be found using <code>pmap</code>.*/ ! Context[] params; ! /** The number of outcomes being predicted. */ ! final int numOutcomes; ! /** The maximum number of feattures fired in an event. Usually refered to a C. ! * This is used to normalize the number of features which occur in an event. */ ! double correctionConstant; ! ! /** Stores inverse of the correction constant, 1/C. */ ! final double constantInverse; ! /** The correction parameter of the model. */ ! double correctionParam; ! /** Log of 1/C; initial value of probabilities. */ ! final double iprob; ! ! /** Stores the number of features that get fired for each outcome in an event. ! * This is over-written for each event evaluation, but declared once for efficiency.*/ ! int[] numfeats; ! ! /** ! * Creates a set of paramters which can be evaulated with the eval method. ! * @param params The parameters of the model. ! * @param correctionParam The correction paramter. ! * @param correctionConstant The correction constant. ! * @param numOutcomes The number of outcomes. ! */ ! public EvalParameters(Context[] params, double correctionParam, double correctionConstant, int numOutcomes) { ! this.params = params; ! this.correctionParam = correctionParam; ! this.numOutcomes = numOutcomes; ! this.numfeats = new int[numOutcomes]; ! this.correctionConstant = correctionConstant; ! this.constantInverse = 1.0 / correctionConstant; ! this.iprob = Math.log(1.0/numOutcomes); ! } ! } --- 305,307 ---- } } ! } \ No newline at end of file |