From: Aaron A. <aa...@us...> - 2007-05-16 21:16:32
|
Update of /cvsroot/jboost/jboost/scripts In directory sc8-pr-cvs6.sourceforge.net:/tmp/cvs-serv29800/scripts Modified Files: margin.py Added Files: error.py Log Message: Putting in error.py. This file is Updated build to be more platform independent Controller now does numRounds+1 iterations Monitor now conforms to error.py output format jboost.config has been moved to demo. Aaron Index: margin.py =================================================================== RCS file: /cvsroot/jboost/jboost/scripts/margin.py,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -C2 -d -r1.1.1.1 -r1.2 *** margin.py 16 May 2007 04:06:02 -0000 1.1.1.1 --- margin.py 16 May 2007 21:16:29 -0000 1.2 *************** *** 31,34 **** --- 31,76 ---- + def write_files(examples, datafile, sample, margins, falsepos, truepos, falseneg, trueneg): + + f= open(datafile + '.margin.report', 'w') + f.write('total\t tp\t tn\t fp\t fn\n') + f.write(str(len(margins)) + \ + '\t ' + str(len(truepos)) + \ + '\t ' + str(len(trueneg)) + \ + '\t ' + str(len(falsepos)) + \ + '\t ' + str(len(falseneg)) + \ + '\n' + ) + f.write('Sensitivity: ' + str(float(len(truepos))/(len(truepos)+len(falseneg))) + '\n'); + f.write('False Pos Rate: ' + str(float(len(falsepos))/(len(trueneg)+len(falsepos))) + '\n'); + + + list= [[margins[w],examples[w]] for w in falseneg.keys()] + sortByColumns(list, [0], True) + writeList(list, datafile + '.false.neg') + + if (sample!=None): + sample= [list[x] for x in range(0, len(list), 50)] + f.write('Sampling '+ str(len(sample)) + ' false negative examples.\n') + writeList(sample, datafile + '.false.neg.samples') + + + + list= [[margins[w],examples[w]] for w in falsepos.keys()] + sortByColumns(list, [0], True) + writeList(list, datafile+'.false.pos') + + if (sample!=None): + sample= [list[x] for x in range(0, len(list), 5)] + f.write('Sampling '+ str(len(sample)) + ' false positive examples.\n') + writeList(sample, datafile + '.false.pos.samples') + + + f.close() + + os.system('cat ' + datafile +'.margin.report') + + + *************** *** 112,118 **** --- 154,167 ---- margins= map(computeMargin, scores, labels) + + if len(examples) != len(margins): + print "Don't have margin for each example" + sys.exit(2) print 'Matching margins with data' + + pos_label, neg_label = getLabels() + falsepos = dict([(w,[margins[w],examples[w],labels[w]]) for w in range(len(margins)) *************** *** 131,170 **** ! ! f= open(datafile + '.margin.report', 'w') ! f.write('total\t tp\t tn\t fp\t fn\n') ! f.write(str(len(examples)) + \ ! '\t ' + str(len(truepos)) + \ ! '\t ' + str(len(trueneg)) + \ ! '\t ' + str(len(falsepos)) + \ ! '\t ' + str(len(falseneg)) + \ ! '\n' ! ) ! f.write('Sensitivity: ' + str(float(len(truepos))/(len(truepos)+len(falseneg))) + '\n'); ! f.write('False Pos Rate: ' + str(float(len(falsepos))/(len(trueneg)+len(falsepos))) + '\n'); ! ! ! list= [[margins[w],examples[w]] for w in falseneg.keys()] ! sortByColumns(list, [0], True) ! writeList(list, datafile + '.false.neg') ! ! if (sample!=None): ! sample= [list[x] for x in range(0, len(list), 50)] ! f.write('Sampling '+ str(len(sample)) + ' false negative examples.\n') ! writeList(sample, datafile + '.false.neg.samples') ! ! list= [[margins[w],examples[w]] for w in falsepos.keys()] ! sortByColumns(list, [0], True) ! writeList(list, datafile+'.false.pos') ! ! if (sample!=None): ! sample= [list[x] for x in range(0, len(list), 5)] ! f.write('Sampling '+ str(len(sample)) + ' false positive examples.\n') ! writeList(sample, datafile + '.false.pos.samples') - f.close() - - os.system('cat ' + datafile +'.margin.report') --- 180,186 ---- ! write_files(examples, datafile, sample, margins, falsepos, truepos, falseneg, trueneg) --- NEW FILE: error.py --- #! /usr/bin/env python import string import getopt import sys import os def usage(): print 'Usage: margin.py ' print '--info=info_file scores file as output by jboost' print '--logaxis should the axis be log-scaled (default: false)' print '--bound show the bound on training error' def main(): # Usage: see usage() try: opts, args= getopt.getopt(sys.argv[1:], '', ['info=','logaxis','bound']) except getopt.GetoptError: usage() sys.exit(2) infofilename = logaxis = showbound = None for opt,arg in opts: if (opt == '--info'): infofilename = arg elif (opt == '--logaxis'): logaxis = True elif (opt == '--bound'): showbound = True if(infofilename == None): usage() sys.exit(2) print 'Reading info file' f = open(infofilename,'r') lines = f.readlines() f.close() def get_data(line): list = line.split('\t') ret = {} ret["iter"] = list[0] ret["bound"] = list[1] ret["train"] = list[2] ret["test"] = list[3] return ret read_iter = False data = [] for line in lines: line = line.strip() #print line if (line[:8] =='End time'): break if (read_iter): data.append(get_data(line)) if (line=='iter \tbound \ttrain \ttest'): read_iter = True outlines = [] for line in data: out = '' out += line['iter'] + ' ' out += line['train'] + ' ' out += line['test'] + ' ' if (showbound): out += line['bound'] + ' ' outlines.append(out+'\n') f = open('error.dat', 'w') f.writelines(outlines) f.close() outlines = [] outlines.append('set terminal png notransparent medium\n') outlines.append('set output \'error.png\'\n') outlines.append('set xlabel "Iteration"\n') outlines.append('set ylabel "Error"\n') outlines.append('set title ""\n') if (logaxis): outlines.append('set logscale x 10\n') out = '' out += 'plot "error.dat" using 1:2 title \'train\' with lines' out += ', "error.dat" using 1:3 title \'test\' with lines' if showbound: out += ', "error.dat" using 1:4 title \'bound\' with lines' out += '\n' outlines.append(out) f = open('error.gnuplot', 'w') f.writelines(outlines) f.close() if __name__ == "__main__": main() |