From: Aaron A. <aa...@us...> - 2007-10-23 22:45:44
|
Update of /cvsroot/jboost/jboost/src/jboost/booster In directory sc8-pr-cvs6.sourceforge.net:/tmp/cvs-serv4913/booster Modified Files: AbstractBooster.java AdaBoost.java BrownBoost.java YabaBoost.java Log Message: Cost sensitive boosting, new data output format Index: AbstractBooster.java =================================================================== RCS file: /cvsroot/jboost/jboost/src/jboost/booster/AbstractBooster.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** AbstractBooster.java 18 Sep 2007 03:25:58 -0000 1.5 --- AbstractBooster.java 23 Oct 2007 22:45:40 -0000 1.6 *************** *** 67,75 **** if (result instanceof jboost.booster.YabaBoost) { ! double c1 = Double.parseDouble(c.getString("c1", "1.0")); ! double c2 = Double.parseDouble(c.getString("c2", "1.0")); ! double theta = Double.parseDouble(c.getString("theta", "0.15")); jboost.booster.YabaBoost yaba = (jboost.booster.YabaBoost) result; yaba.setParams(c1,c2,theta); result = yaba; } --- 67,90 ---- if (result instanceof jboost.booster.YabaBoost) { ! double c1=0, c2=0, theta=0; ! double rpos=0, c1pos=0, c2pos=0, thetapos=0; ! double rneg=0, c1neg=0, c2neg=0, thetaneg=0; ! try { ! c1 = Double.parseDouble(c.getString("c1", "Z1.0")); ! c2 = Double.parseDouble(c.getString("c2", "Z1.0")); ! theta = Double.parseDouble(c.getString("theta", "Z0.15")); ! //c1 = Double.parseDouble(c.getString("pos_r", "Z1.0")); ! //c1 = Double.parseDouble(c.getString("pos_c1", "Z1.0")); ! //c2 = Double.parseDouble(c.getString("pos_c2", "Z1.0")); ! //theta = Double.parseDouble(c.getString("pos_theta", "Z0.15")); ! } catch (NumberFormatException e) { ! String s = "Need to supply r, c1, c2, and theta!"; ! System.err.println(s); ! throw new InstantiationException(s); ! } jboost.booster.YabaBoost yaba = (jboost.booster.YabaBoost) result; yaba.setParams(c1,c2,theta); + yaba.setCostSensitiveParams(rpos, c1pos, c2pos, thetapos, + rneg, c1neg, c2neg, thetaneg); result = yaba; } Index: YabaBoost.java =================================================================== RCS file: /cvsroot/jboost/jboost/src/jboost/booster/YabaBoost.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** YabaBoost.java 2 Oct 2007 02:32:07 -0000 1.5 --- YabaBoost.java 23 Oct 2007 22:45:40 -0000 1.6 *************** *** 34,44 **** /** - * This is true if and only if we are cost sensitive boosting. If - * this is set, then m_{pos,neg}c{1,2} and m_{pos,neg}Theta must - * be set. - */ - protected boolean m_isCostSensitive; - - /** * Parameter for yaba that allows for decrease of potential * by translation of potential curve. --- 34,37 ---- *************** *** 135,140 **** ! public void setCostSensativeParams(double pc1, double pc2, double ptheta, ! double nc1, double nc2, double ntheta) { m_posc1 = pc1; m_posc2 = pc2; --- 128,134 ---- ! public void setCostSensitiveParams(double pc, double pc1, double pc2, double ptheta, ! double nc, double nc1, double nc2, double ntheta) { ! m_posc = pc; m_posc1 = pc1; m_posc2 = pc2; *************** *** 142,145 **** --- 136,140 ---- m_posOrigTheta = ptheta; + m_negc = pc; m_negc1 = pc1; m_negc2 = pc2; *************** *** 183,187 **** * @see jboost.booster.BrownBoost#calc_constraints(int[][], double, double) */ ! protected ErfVars calc_constraints(double alpha, double t) { ErfVars vars = new ErfVars(); --- 178,182 ---- * @see jboost.booster.BrownBoost#calc_constraints(int[][], double, double) */ ! protected ErfVars calc_constraints(double alpha, double t, int[] examples) { ErfVars vars = new ErfVars(); *************** *** 202,207 **** double margin, orig_margin, step, new_margin, new_weight, new_pot, orig_pot; int example; ! for (int i= 0; i < m_hypPredictions.length; i++) { ! example = i; margin = m_margins[example]; orig_margin = margin; --- 197,202 ---- double margin, orig_margin, step, new_margin, new_weight, new_pot, orig_pot; int example; ! for (int i= 0; i < examples.length; i++) { ! example = examples[i]; margin = m_margins[example]; orig_margin = margin; *************** *** 229,236 **** //System.out.println("aj: " + aj + ", dj: " + dj + ", dj^2/sd^2: " + (dj*dj/(sd*sd)) + ", bj: " + bj); //System.out.println("N(mu,sigma):" + mu + "," + sd + ", B:" + vars.B + ", E:" + vars.E); ! } vars.B /= totalWeight; ! vars.E /= m_hypPredictions.length; ! vars.Potential /= m_hypPredictions.length; return vars; } --- 224,231 ---- //System.out.println("aj: " + aj + ", dj: " + dj + ", dj^2/sd^2: " + (dj*dj/(sd*sd)) + ", bj: " + bj); //System.out.println("N(mu,sigma):" + mu + "," + sd + ", B:" + vars.B + ", E:" + vars.E); ! } vars.B /= totalWeight; ! vars.E /= examples.length; ! vars.Potential /= examples.length; return vars; } *************** *** 283,288 **** * @return alpha - An appropriate value of alpha that satisfies constraints. */ ! protected double solve_constraints(double hyp_err) ! { /* * If the game has a small amount of time remaining, quit now. --- 278,282 ---- * @return alpha - An appropriate value of alpha that satisfies constraints. */ ! protected double solve_constraints(double hyp_err, int[] examples) { /* * If the game has a small amount of time remaining, quit now. *************** *** 313,317 **** double t_step = 0.1; double t=0.3; ! double alpha=0; /* --- 307,311 ---- double t_step = 0.1; double t=0.3; ! double alpha=0.1; /* *************** *** 386,390 **** // calculate constraints for values of alpha and t ! vars = calc_constraints (alpha, t); // reverse alpha search direction --- 380,384 ---- // calculate constraints for values of alpha and t ! vars = calc_constraints (alpha, t, examples); // reverse alpha search direction *************** *** 533,537 **** ! if (m_hypPredictions.length != m_margins.length) { System.err.println("WARNING: m_hypPredictions is not the same length as the margins"); } --- 527,531 ---- ! if (m_hypPredictions.length != m_margins.length || m_hypPredictions.length != m_numExamples) { System.err.println("WARNING: m_hypPredictions is not the same length as the margins"); } *************** *** 618,622 **** public String getParamString() { ! String ret = String.format("s=%.4f,c=%.4f,c1=%.4f,c2=%.4f,theta=%.4f,confidence=%b", m_s, m_c, m_c1, m_c2, m_theta, USE_CONFIDENCE); return ret; } --- 612,616 ---- public String getParamString() { ! String ret = String.format("YabaBoost s=%.4f c=%.4f c1=%.4f c2=%.4f theta=%.4f confidence=%b ", m_s, m_c, m_c1, m_c2, m_theta, USE_CONFIDENCE); return ret; } Index: AdaBoost.java =================================================================== RCS file: /cvsroot/jboost/jboost/src/jboost/booster/AdaBoost.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** AdaBoost.java 13 Oct 2007 04:32:28 -0000 1.3 --- AdaBoost.java 23 Oct 2007 22:45:40 -0000 1.4 *************** *** 34,37 **** --- 34,46 ---- protected double[] m_potentials; + /** */ + protected int[] m_posExamples; + /** */ + protected int[] m_negExamples; + /** */ + protected int m_numPosExamples; + /** */ + protected int m_numNegExamples; + /** sampling weights for the examples */ protected double[] m_sampleWeights; *************** *** 92,95 **** --- 101,105 ---- m_numExamples++; m_tmpList.add(new TmpData(index, (short) l, weight)); + if (l==1) m_numPosExamples++; } else { failed= "AdaBoost.addExample received index " + index + ", when it expected index " + *************** *** 139,143 **** --- 149,157 ---- m_sampleWeights= new double[m_numExamples]; m_epsilon= m_smooth / m_numExamples; + m_posExamples = new int[m_numPosExamples]; + m_numNegExamples = m_numExamples - m_numPosExamples; + m_negExamples = new int[m_numNegExamples]; + int m_posIndex=0, m_negIndex=0; for (int i= 0; i < m_tmpList.size(); i++) { TmpData a= (TmpData) m_tmpList.get(i); *************** *** 146,149 **** --- 160,167 ---- m_weights[index]= m_oldWeights[index]= defaultWeight; m_labels[index]= a.getLabel(); + if (a.getLabel()==1) + m_posExamples[m_posIndex++] = index; + else + m_negExamples[m_negIndex++] = index; m_sampleWeights[index]= a.getWeight(); } Index: BrownBoost.java =================================================================== RCS file: /cvsroot/jboost/jboost/src/jboost/booster/BrownBoost.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** BrownBoost.java 2 Oct 2007 02:32:07 -0000 1.7 --- BrownBoost.java 23 Oct 2007 22:45:40 -0000 1.8 *************** *** 20,23 **** --- 20,28 ---- /** The total running time of the BrownBoost game. */ protected double m_c; + + /** In case we ever want to make BrownBoost cost sensitive. Also useful for Yaba. */ + protected double m_posc; + protected double m_negc; + /** The time remaining in the BrownBoost game. */ protected double m_s; *************** *** 28,31 **** --- 33,45 ---- /** The last value of alpha */ protected double m_lastAlpha; + + + /** + * This is true if and only if we are cost sensitive boosting. If + * this is set, then m_{pos,neg}c{1,2} and m_{pos,neg}Theta must + * be set in YabaBoost and m_c{pos,neg} must be set in BrownBoost. + */ + protected boolean m_isCostSensitive; + /** *************** *** 46,49 **** --- 60,68 ---- protected double m_totalPotential; + /** + * Records the potentials. Similar to m_margins and m_weights. + */ + protected int[] m_examples; + /** * Default constructor just calls AdaBoost to *************** *** 57,62 **** --- 76,83 ---- public void finalizeData() { super.finalizeData(); + m_examples = new int[m_numExamples]; for (int i=0; i<m_numExamples; i++) { m_potentials[i] = calculatePotential(0,m_c); + m_examples[i] = i; } } *************** *** 248,252 **** ! protected double solve_constraints(double hyp_err) { if( m_s < 0.001){ --- 269,273 ---- ! protected double solve_constraints(double hyp_err, int[] examples) { if( m_s < 0.001){ *************** *** 265,320 **** double new_t = 0.0; - /* - int k = 0; - while(k < 200){ - k += 1; - - vars = calc_constraints(alpha, t); - - double det = 2*(vars.V*vars.W - vars.U*vars.B); - double EPS = 0.0001; - - // If correlation and difference in potential are small, - // then we're done! - if (vars.B < EPS && vars.E < EPS){ - System.out.println("Solved constrains by minimizing B and E."); - break; - } - - if (Math.abs(det) < EPS){ - System.out.println("DETERMINANT IS TOO SMALL -- SUGGESTS JACOBIAN IS ALMOST SINGULAR!!!"); - } - - new_alpha = alpha + (c*vars.W*vars.B + Math.sqrt(Math.PI*c)*vars.U*vars.E) / det; - new_t = t + (c*vars.B*vars.B + Math.sqrt(Math.PI*c)*vars.V*vars.E) / det; - - // if newton raphson succeeded, exit loop - EPS = 0.001; - if (vars.E < EPS && vars.W < EPS){ - System.out.println("Solved constrains by minimizing E and W."); - break; - } - - EPS = 0.0001; - if (Math.abs(alpha-new_alpha) < EPS){ - System.out.println("Solved constraints by similar old and new alpha."); - break; - } - - alpha = new_alpha; - t = new_t; - } - - if (k>=100){ - System.err.println("WE DIDN'T ACTUAL FINISH ANYTHING!!! WE JUST TIMED OUT!!!"); - } - - double nr_alpha = alpha; - double nr_t = t; - System.out.println("Newton-Raphson gives alpha=" + nr_alpha + ", t=" + nr_t); - */ - - - // try binary search // find the maximal value for t for which there exists a value --- 286,289 ---- *************** *** 416,434 **** } - /* - public String surfingData() { - StringBuffer ret = new StringBuffer(""); - ret.append(String.format("BrownBoost Params: %.4f %.4f\n", m_c, m_s)); - for (int i=0; i<m_margins.length; i++){ - ret.append(String.format("%.4f\t%.4f\t%.4f\n", m_margins[i], m_weights[i], m_potentials[i])); - } - return ret.toString(); - } - */ - - - public String getParamString() { ! String ret = String.format("BrownBoost Params: %.4f %.4f", m_c, m_s); return ret; } --- 385,390 ---- } public String getParamString() { ! String ret = String.format("BrownBoost r=%.4f s=%.4f ", m_c, m_s); return ret; } *************** *** 529,537 **** /** compute the binary prediction associated with this bag */ public BinaryPrediction calcPrediction(double alpha) { ! BinaryPrediction ret = new BinaryPrediction(m_w[1] > m_w[0] ? 1.0 : -1.0 ); ret.scale(alpha); return ret; } /** Place holder to ensure that this function is not used in BrownBoost. */ --- 485,507 ---- /** compute the binary prediction associated with this bag */ public BinaryPrediction calcPrediction(double alpha) { ! BinaryPrediction ret; ! ret = new BinaryPrediction(m_w[1] > m_w[0] ? 1.0 : -1.0 ); ret.scale(alpha); return ret; } + /** compute the binary prediction associated with this bag */ + public BinaryPrediction calcPrediction(double posAlpha, double negAlpha) { + BinaryPrediction ret; + if (m_w[1] > m_w[0]) { + ret = new BinaryPrediction(1.0); + ret.scale(posAlpha); + } else { + ret = new BinaryPrediction(-1.0); + ret.scale(negAlpha); + } + return ret; + } + /** Place holder to ensure that this function is not used in BrownBoost. */ *************** *** 677,684 **** // initial guess for alpha. double gamma = getHypErr(bags, exampleIndex); ! double alpha = solve_constraints(gamma); ! ! for (int i= 0; i < bags.length; i++) { ! p[i]= ((BrownBag) bags[i]).calcPrediction(alpha); } return p; --- 647,666 ---- // initial guess for alpha. double gamma = getHypErr(bags, exampleIndex); ! ! if (m_isCostSensitive) { ! System.out.println("Solving positive example constraints"); ! double posAlpha = solve_constraints(gamma, m_posExamples); ! System.out.println("Solving negative example constraints"); ! double negAlpha = solve_constraints(gamma, m_negExamples); ! for (int i= 0; i < bags.length; i++) { ! p[i]= ((BrownBag) bags[i]).calcPrediction(posAlpha, negAlpha); ! System.out.println("p[" + i + "] = " + p[i]); ! } ! } else { ! double alpha = solve_constraints(gamma, m_examples); ! for (int i= 0; i < bags.length; i++) { ! p[i]= ((BrownBag) bags[i]).calcPrediction(alpha); ! System.out.println("p[" + i + "] = " + p[i]); ! } } return p; |