From: Daniel H. <dh...@us...> - 2008-04-10 07:54:21
|
Update of /cvsroot/jboost/jboost/src/jboost/booster In directory sc8-pr-cvs17.sourceforge.net:/tmp/cvs-serv20362/src/jboost/booster Modified Files: AdaBoost.java Booster.java BrownBoost.java YabaBoost.java AbstractBooster.java Log Message: Merged from jboost_unstable. Index: AbstractBooster.java =================================================================== RCS file: /cvsroot/jboost/jboost/src/jboost/booster/AbstractBooster.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** AbstractBooster.java 25 Mar 2008 01:00:27 -0000 1.8 --- AbstractBooster.java 10 Apr 2008 07:54:17 -0000 1.9 *************** *** 3,8 **** import java.io.Serializable; import jboost.controller.Configuration; - import jboost.controller.ControllerConfiguration; - import jboost.monitor.Monitor; /** --- 3,6 ---- *************** *** 21,235 **** public abstract class AbstractBooster implements Booster, Serializable { ! protected static final String PREFIX= "booster_"; ! ! /** ! * Factory method to build a booster instance according to ! * given configuration. Uses reflection to do this. ! * ! * @param c set of options for the booster ! * @param num_labels the number of m_labels in the data ! * @param isMultiLabel true if multilabled data ! * @return Booster ! */ ! public static Booster getInstance(Configuration c, int num_labels, ! boolean isMultiLabel) ! throws ClassNotFoundException, InstantiationException, ! IllegalAccessException, Exception { ! ! AbstractBooster result = null; ! ! // Get the booster type from configuration and ! // create a class of that type. ! String boosterType= c.getString(PREFIX + "type", ! "jboost.booster.AdaBoost"); ! System.out.println("Booster type: " + boosterType); ! Class boosterClass = Class.forName(boosterType); ! result = (AbstractBooster) boosterClass.newInstance(); ! result.init(c); ! ! // Get the runtime of the booster (if applicable). ! // If the booster is a discrete iterative scheme, the ! // number of iterations is dealt with elsewhere. ! if (result instanceof jboost.booster.BrownBoost) { ! double eps = 0.001; ! double runtime = Double.parseDouble(c.getString("boostingRuntime", ! "0.0")); ! if (runtime <= eps) { ! String str = "Need to specify runtime for m_booster " + result ! + ". Runtime must be larger than " + eps + "."; ! Monitor.log(str); ! throw new Exception(str); ! } ! ! jboost.booster.BrownBoost brown = (jboost.booster.BrownBoost) result; ! brown.setRuntime(runtime); ! result = brown; ! if (result instanceof jboost.booster.YabaBoost) { ! jboost.booster.YabaBoost yaba = (jboost.booster.YabaBoost) result; ! double c1=0, c2=0, theta=0, nc=0; ! double rpos=0, c1pos=0, c2pos=0, thetapos=0, ncpos=0; ! double rneg=0, c1neg=0, c2neg=0, thetaneg=0, ncneg=0; ! ControllerConfiguration conf = (ControllerConfiguration)c; ! try { ! c1 = Double.parseDouble(c.getString("c1", "Z1.0")); ! c2 = Double.parseDouble(c.getString("c2", "Z1.0")); ! theta = Double.parseDouble(c.getString("theta", "Z0.15")); ! nc = Double.parseDouble(c.getString("nc", "Z0.15")); ! yaba.setParams(c1,c2,theta,nc); ! if (conf.getCostSensitive()) { ! /* ! System.out.println(" runtime: " + rpos + "," + rneg + ! " c1: " + c1pos + "," + c1neg + ! " c2: " + c2pos + "," + c2neg + ! " theta: " + thetapos + "," + thetaneg + ! " nc: " + ncpos + "," + ncneg ! ); ! */ ! rpos = Double.parseDouble(c.getString("pos_c","Z")); ! rneg = Double.parseDouble(c.getString("neg_c","Z")); ! c1pos = Double.parseDouble(c.getString("pos_c1","Z")); ! c1neg = Double.parseDouble(c.getString("neg_c1","Z")); ! /* ! System.out.println(" runtime: " + rpos + "," + rneg + ! " c1: " + c1pos + "," + c1neg + ! " c2: " + c2pos + "," + c2neg + ! " theta: " + thetapos + "," + thetaneg + ! " nc: " + ncpos + "," + ncneg ! ); ! */ ! c2pos = Double.parseDouble(c.getString("pos_c2","Z")); ! c2neg = Double.parseDouble(c.getString("neg_c2","Z")); ! thetapos = Double.parseDouble(c.getString("pos_theta","Z")); ! thetaneg = Double.parseDouble(c.getString("neg_theta","Z")); ! ncpos = Double.parseDouble(c.getString("pos_nc","Z")); ! ncneg = Double.parseDouble(c.getString("neg_nc","Z")); ! /* ! System.out.println(" runtime: " + rpos + "," + rneg + ! " c1: " + c1pos + "," + c1neg + ! " c2: " + c2pos + "," + c2neg + ! " theta: " + thetapos + "," + thetaneg + ! " nc: " + ncpos + "," + ncneg ! ); ! */ ! yaba.setCostSensitiveParams(rpos, c1pos, c2pos, thetapos, ncpos, ! rneg, c1neg, c2neg, thetaneg, ncneg); ! } ! } catch (NumberFormatException e) { ! System.err.println("Need to supply appropriate parameters!"); ! System.err.println("For YabaBoost normal, we need r, c1, c2, nc, and theta!"); ! System.err.println("For YabaBoost cost sensitive, we need neg_c, pos_c, neg_c1, pos_c1, neg_c2, pos_c2, neg_theta, pos_theta, neg_nc, pos_nc"); ! throw new InstantiationException("Need more params for yaba"); ! } ! result = yaba; ! } ! } ! // If we have a multilable or multiclass problem, we need to wrap it. ! if (num_labels > 2 || isMultiLabel) { ! result= new MulticlassWrapMH(result, num_labels, isMultiLabel); ! } ! // If we are debugging, then wrap in paranoia ! boolean paranoid= c.getBool(PREFIX + "paranoid", false); ! if (paranoid) { ! result= new DebugWrap(result); ! } ! return result; } ! public int getNumExamples(){ ! return 0; } ! public String getParamString() { ! return "No parameters defined"; ! } - /** - * Create and return a new Bag which initially contains the - * elements in the list. - * - * @param list initial items to add to the Bag - */ - public Bag newBag(int[] list) { - Bag bag= newBag(); - bag.addExampleList(list); - return bag; - } ! /** ! * Clone a bag ! * ! * @param orig the bag to clone ! * @return new bag ! */ ! public Bag newBag(Bag orig) { ! Bag newbag= newBag(); ! newbag.copyBag(orig); ! return newbag; ! } - /** - * Find the best binary split for a sorted list of example indices - * with given split points. - * @param l an array of example indices, sorted. - * @param sp an array with true in position i when a split between - * positions i-1 and i should be checked - * @param b0 - a bag with all points below the best split (upon return) - * @param b1 - a bag with all points at or above the best split (upon return) - * @return the index in l where the best split occurred (possibly - * 0 if the best split puts all points on one side) - */ - public int findBestSplit(Bag b0, Bag b1, int[] l, boolean[] sp) { - Bag[] bags= new Bag[2]; ! bags[0]= newBag(); // init an empty bag ! bags[1]= newBag(l); // init a full bag ! b0.reset(); ! b1.copyBag(bags[1]); ! if (l.length == 0) ! return 0; ! double bestLoss= getLoss(bags); ! int bestIndex= 0; ! double loss; ! for (int i= 0; i < l.length - 1; i++) { ! bags[1].subtractExample(l[i]); ! bags[0].addExample(l[i]); ! if (sp[i + 1]) { // if this is a potential split point ! if ((loss= getLoss(bags)) < bestLoss) { ! bestLoss= loss; ! bestIndex= i + 1; ! b0.copyBag(bags[0]); ! b1.copyBag(bags[1]); ! } ! } } ! return bestIndex; } ! /** ! * Compute the loss associated with an array of bags where small ! * loss is considered "better". We assume that loss is additive ! * for a set of bags. ! * ! * @param bags array of bags whose losses will be added up and returned ! * @return loss the sum of the losses for all the bags ! */ ! public double getLoss(Bag[] bags) { ! double loss = 0; ! for (int i=0; i < bags.length; i++) { ! loss += bags[i].getLoss(); ! } ! return loss; } } --- 19,157 ---- public abstract class AbstractBooster implements Booster, Serializable { ! protected static final String PREFIX= "booster_"; + /** + * Factory method to build a booster instance according to + * given configuration. Uses reflection to do this. + * + * @param c set of options for the booster + * @param num_labels the number of m_labels in the data + * @param isMultiLabel true if multilabled data + * @return Booster + */ + public static Booster getInstance(Configuration c, int num_labels, + boolean isMultiLabel) + throws ClassNotFoundException, InstantiationException, + IllegalAccessException, Exception { + AbstractBooster result = null; ! // Get the booster type from configuration and ! // create a class of that type. ! String boosterType= c.getString(PREFIX + "type", ! "jboost.booster.AdaBoost"); ! System.out.println("Booster type: " + boosterType); ! Class boosterClass = Class.forName(boosterType); ! result = (AbstractBooster) boosterClass.newInstance(); ! result.init(c); ! // Get the runtime of the booster (if applicable). ! // If the booster is a discrete iterative scheme, the ! // number of iterations is dealt with elsewhere. ! // If we have a multilable or multiclass problem, we need to wrap it. ! if (num_labels > 2 || isMultiLabel) { ! result= new MulticlassWrapMH(result, num_labels, isMultiLabel); } ! // If we are debugging, then wrap in paranoia ! boolean paranoid= c.getBool(PREFIX + "paranoid", false); ! if (paranoid) { ! result= new DebugWrap(result); } + return result; + } ! public int getNumExamples(){ ! return 0; ! } + public String getParamString() { + return "No parameters defined"; + } ! /** ! * Create and return a new Bag which initially contains the ! * elements in the list. ! * ! * @param list initial items to add to the Bag ! */ ! public Bag newBag(int[] list) { ! Bag bag= newBag(); ! bag.addExampleList(list); ! return bag; ! } + /** + * Clone a bag + * + * @param orig the bag to clone + * @return new bag + */ + public Bag newBag(Bag orig) { + Bag newbag= newBag(); + newbag.copyBag(orig); + return newbag; + } ! /** ! * Find the best binary split for a sorted list of example indices ! * with given split points. ! * @param l an array of example indices, sorted. ! * @param sp an array with true in position i when a split between ! * positions i-1 and i should be checked ! * @param b0 - a bag with all points below the best split (upon return) ! * @param b1 - a bag with all points at or above the best split (upon return) ! * @return the index in l where the best split occurred (possibly ! * 0 if the best split puts all points on one side) ! */ ! public int findBestSplit(Bag b0, Bag b1, int[] l, boolean[] sp) { ! Bag[] bags= new Bag[2]; ! bags[0]= newBag(); // init an empty bag ! bags[1]= newBag(l); // init a full bag ! b0.reset(); ! b1.copyBag(bags[1]); ! if (l.length == 0) ! return 0; ! double bestLoss= getLoss(bags); ! int bestIndex= 0; ! double loss; ! ! for (int i= 0; i < l.length - 1; i++) { ! bags[1].subtractExample(l[i]); ! bags[0].addExample(l[i]); ! if (sp[i + 1]) { // if this is a potential split point ! if ((loss= getLoss(bags)) < bestLoss) { ! bestLoss= loss; ! bestIndex= i + 1; ! b0.copyBag(bags[0]); ! b1.copyBag(bags[1]); } ! } } + return bestIndex; + } ! /** ! * Compute the loss associated with an array of bags where small ! * loss is considered "better". We assume that loss is additive ! * for a set of bags. ! * ! * @param bags array of bags whose losses will be added up and returned ! * @return loss the sum of the losses for all the bags ! */ ! public double getLoss(Bag[] bags) { ! double loss = 0; ! for (int i=0; i < bags.length; i++) { ! loss += bags[i].getLoss(); } + return loss; + } } Index: YabaBoost.java =================================================================== RCS file: /cvsroot/jboost/jboost/src/jboost/booster/YabaBoost.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** YabaBoost.java 25 Mar 2008 01:00:28 -0000 1.8 --- YabaBoost.java 10 Apr 2008 07:54:17 -0000 1.9 *************** *** 1,21 **** package jboost.booster; - import java.io.BufferedWriter; - import java.io.FileWriter; import java.io.*; - import java.io.IOException; - import java.io.PrintWriter; - - import java.text.DecimalFormat; - import java.text.NumberFormat; [...1566 lines suppressed...] ! } ! public Bag newBag(Bag bag) { ! return new YabaBag((YabaBag) bag); ! } ! /** ! * Returns the prediction associated with a bag representing a subset of the ! * data. ! */ ! protected Prediction getPrediction(Bag b) { ! return ((YabaBag) b).calcPrediction(); ! } ! protected MixedBinaryPrediction getZeroPred() { ! return new MixedBinaryPrediction(0,0); ! } Index: AdaBoost.java =================================================================== RCS file: /cvsroot/jboost/jboost/src/jboost/booster/AdaBoost.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** AdaBoost.java 25 Mar 2008 01:00:27 -0000 1.6 --- AdaBoost.java 10 Apr 2008 07:54:16 -0000 1.7 *************** *** 28,32 **** protected double[] m_margins; /** permanent storage for old m_margins */ ! protected double[] m_oldMargins; /** permanent storage for example m_weights */ protected double[] m_weights; --- 28,32 ---- protected double[] m_margins; /** permanent storage for old m_margins */ ! protected double[] m_oldMargins; // XXX DJH: this is never used in AdaBoost (e.g. not cleared in clear()) /** permanent storage for example m_weights */ protected double[] m_weights; *************** *** 73,77 **** /** temporary location for storing the examples as they are read in */ ! protected List m_tmpList; --- 73,77 ---- /** temporary location for storing the examples as they are read in */ ! protected List<TmpData> m_tmpList; *************** *** 90,94 **** */ AdaBoost(double smooth) { ! m_tmpList= new ArrayList(); m_numExamples= 0; m_smooth= smooth; --- 90,94 ---- */ AdaBoost(double smooth) { ! m_tmpList= new ArrayList<TmpData>(); m_numExamples= 0; m_smooth= smooth; *************** *** 104,113 **** } ! /** ! * Add an example to the data set of this booster ! * @param index ! * @param label ! * @param weight ! */ public void addExample(int index, Label label, double weight) { int l= label.getSingleValue(); --- 104,113 ---- } ! /** ! * Add an example to the data set of this booster ! * @param index ! * @param label ! * @param weight ! */ public void addExample(int index, Label label, double weight) { int l= label.getSingleValue(); *************** *** 119,128 **** if (l==POSITIVE_LABEL) m_numPosExamples++; } else { ! failed= "AdaBoost.addExample received index " + index + ", when it expected index " + ! m_numExamples; } } else { ! failed= "Adaboost.addExample expected a label which is either 0 or 1. It received " + ! l; } --- 119,128 ---- if (l==POSITIVE_LABEL) m_numPosExamples++; } else { ! // XXX DJH: determine class name at runtime ! failed= getClass().getName() + ".addExample received index " + index + ", when it expected index " + m_numExamples; } } else { ! // XXX DJH: determine class name at runtime ! failed= getClass().getName() + ".addExample expected a label which is either 0 or 1. It received " + l; } *************** *** 181,185 **** m_negExamples[m_negIndex++] = index; else { ! System.err.println("Label of example is unknown to adaboost"); System.exit(2); } --- 181,186 ---- m_negExamples[m_negIndex++] = index; else { ! // XXX DJH: determine class name at runtime ! System.err.println("Label of example is unknown to " + this.getClass().getName()); System.exit(2); } *************** *** 248,278 **** ! /** ! * Returns a string with all the weights, margins, etc ! */ ! public String getExampleData() { ! StringBuffer ret = new StringBuffer(""); ! ret.append(getParamString()); ! for (int i=0; i<m_margins.length; i++){ ! ret.append(String.format("[%d];[%.4f];[%.4f];[%.4f];\n", ! m_labels[i], m_margins[i], m_weights[i], ! m_potentials[i])); ! } ! return ret.toString(); } ! public String getParamString() { ! String ret = String.format("None (AdaBoost)"); ! return ret; ! } /** output AdaBoost contents as a human-readable string */ public String toString() { String s= ! "Adaboost. No of examples = " ! + m_numExamples ! + ", m_epsilon = " ! + m_epsilon; s += "\nindex\tmargin\tweight\told weight\tlabel\n"; NumberFormat f= new DecimalFormat("0.00"); --- 249,282 ---- ! /** ! * Returns a string with all the weights, margins, etc ! */ ! public String getExampleData() { ! StringBuffer ret = new StringBuffer(""); ! ret.append(getParamString()); ! for (int i=0; i<m_margins.length; i++){ ! ret.append(String.format("[%d];[%.4f];[%.4f];[%.4f];\n", ! m_labels[i], m_margins[i], m_weights[i], ! m_potentials[i])); } + return ret.toString(); + } ! public String getParamString() { ! // XXX DJH: determine class name at runtime ! String ret = String.format("None (" + getClass().getName() + ")"); ! return ret; ! } /** output AdaBoost contents as a human-readable string */ public String toString() { + // XXX DJH: determine class name at runtime String s= ! getClass().getName() + ! ". No of examples = " ! + m_numExamples ! + ", m_epsilon = " ! + m_epsilon; s += "\nindex\tmargin\tweight\told weight\tlabel\n"; NumberFormat f= new DecimalFormat("0.00"); *************** *** 289,293 **** + f.format(m_sampleWeights[i]) + "\t\t" + m_labels[i] ! + "\n"; } return s; --- 293,297 ---- + f.format(m_sampleWeights[i]) + "\t\t" + m_labels[i] ! + "\n"; } return s; *************** *** 353,357 **** * @return +1 if label matches hyp, -1 if label doesn't match hyp, 0 if no hyp */ ! public double getStep(short simple_label, double hyp_pred) { double step = getLabel(simple_label)*hyp_pred; double EPS = 0.000001; --- 357,362 ---- * @return +1 if label matches hyp, -1 if label doesn't match hyp, 0 if no hyp */ ! // XXX DJH: changed from 'public' to 'protected' ! protected double getStep(short simple_label, double hyp_pred) { double step = getLabel(simple_label)*hyp_pred; double EPS = 0.000001; *************** *** 360,364 **** } ! public double getLabel(short simple_label) { return sign(-simple_label+0.5); } --- 365,370 ---- } ! // XXX DJH: changed from 'public' to 'protected' ! protected double getLabel(short simple_label) { return sign(-simple_label+0.5); } *************** *** 369,373 **** double total_weight = 0.0; ! // Keep track of which hypotheses had hypotheses associated with them. boolean[] examplesWithHyp = new boolean[m_margins.length]; m_hypPredictions = new double[m_margins.length]; --- 375,379 ---- double total_weight = 0.0; ! // Keep track of which examples had hypotheses associated with them. boolean[] examplesWithHyp = new boolean[m_margins.length]; m_hypPredictions = new double[m_margins.length]; *************** *** 378,384 **** int example = index[j]; if (this instanceof BrownBoost) ! m_hypPredictions[example] = ((BrownBag)b).calcPrediction(1.0,1.0).getClassScores()[0]; ! else ! m_hypPredictions[example] = b.calcPrediction().getClassScores()[0]; } } --- 384,390 ---- int example = index[j]; if (this instanceof BrownBoost) ! m_hypPredictions[example] = ((BrownBag)b).calcPrediction(1.0,1.0).getClassScores()[0]; ! else ! m_hypPredictions[example] = b.calcPrediction().getClassScores()[0]; } } *************** *** 386,390 **** int numExamplesWithHyps = 0; - double weight; // Get all examples that have a hypothesis associated with them for (int i= 0; i < exampleIndex.length; i++) { --- 392,395 ---- *************** *** 442,446 **** } }*/ - double gamma = getHypErr(bags, exampleIndex); return getPredictions(bags); } --- 447,450 ---- *************** *** 452,456 **** */ public double calculateWeight(double margin) { ! return Math.exp(-1 * margin); } --- 456,460 ---- */ public double calculateWeight(double margin) { ! return Math.exp(-1 * margin); } *************** *** 579,583 **** double EPS = 0.0000001; if (m_w[0] < EPS && m_w[1] < EPS) { ! return true; } return false; --- 583,587 ---- double EPS = 0.0000001; if (m_w[0] < EPS && m_w[1] < EPS) { ! return true; } return false; *************** *** 665,672 **** BinaryPrediction p = new BinaryPrediction( ! m_w[1] == m_w[0] ! ? 0.0 ! : // handle case that w0=w1=0 ! 0.5 * Math.log((m_w[1] + smoothFactor) / (m_w[0] + smoothFactor))); return p; } --- 669,676 ---- BinaryPrediction p = new BinaryPrediction( ! m_w[1] == m_w[0] ! ? 0.0 ! : // handle case that w0=w1=0 ! 0.5 * Math.log((m_w[1] + smoothFactor) / (m_w[0] + smoothFactor))); return p; } Index: Booster.java =================================================================== RCS file: /cvsroot/jboost/jboost/src/jboost/booster/Booster.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** Booster.java 25 Mar 2008 01:00:27 -0000 1.4 --- Booster.java 10 Apr 2008 07:54:17 -0000 1.5 *************** *** 71,79 **** ! public abstract double[][] getWeights(); ! public abstract double[][] getPotentials(); ! public abstract double getTotalWeight(); ! public int getNumExamples(); ! public abstract String getParamString(); --- 71,79 ---- ! public abstract double[][] getWeights(); ! public abstract double[][] getPotentials(); ! public abstract double getTotalWeight(); ! public int getNumExamples(); ! public abstract String getParamString(); *************** *** 93,96 **** --- 93,99 ---- * partition of the data. */ + // XXX DJH: What is 'partition'? Similar to 'elements' below? + // XXX YF: I think you are probably correct in your interpretation, need to check the inplementation, verify, and update the + // XXX Description of this method and change "examples" to "elements" in the javadoc for "update". public abstract Prediction[] getPredictions(Bag[] b, int[][] partition); Index: BrownBoost.java =================================================================== RCS file: /cvsroot/jboost/jboost/src/jboost/booster/BrownBoost.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** BrownBoost.java 25 Mar 2008 01:00:27 -0000 1.11 --- BrownBoost.java 10 Apr 2008 07:54:17 -0000 1.12 *************** *** 3,13 **** import java.text.DecimalFormat; import java.text.NumberFormat; - import java.util.ArrayList; - import java.util.List; - import jboost.booster.AdaBoost.BinaryBag; import jboost.controller.Configuration; ! import jboost.examples.Label; ! import jboost.NotSupportedException; [...1462 lines suppressed...] ! super(); ! } ! /** constructor that copies an existing bag */ ! protected BrownBag(BrownBag bag) { ! super(bag); ! } ! /** Output the weights in the bag */ ! public String toString() { ! String s= "BrownBag.\t w0=" + m_w[0] + "\t w1=" + m_w[1] + "\n"; ! return s; ! } ! } /* End BrownBag */ |