From: Aaron A. <aa...@us...> - 2007-05-17 07:55:20
|
Update of /cvsroot/jboost/jboost/scripts In directory sc8-pr-cvs6.sourceforge.net:/tmp/cvs-serv1959/scripts Modified Files: atree2dot2ps.pl error.py margin.py nfold.py Log Message: Can now print out multiple margin lines. Can also print out error curves. atree2dot2ps.pl outputs multiple formats shell wrapper script jboost. -Aaron Index: margin.py =================================================================== RCS file: /cvsroot/jboost/jboost/scripts/margin.py,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** margin.py 16 May 2007 21:16:29 -0000 1.2 --- margin.py 17 May 2007 07:55:16 -0000 1.3 *************** *** 6,9 **** --- 6,10 ---- import pickle import os + import re def usage(): *************** *** 11,14 **** --- 12,16 ---- print '--scores=filename scores file as output by jboost' print '--data=filename train/test filename' + print '--spec=filename spec filename' print '--iteration=NUMBER the iteration number to inspect' print '--sample are we interested in a sample of false pos/neg' *************** *** 74,78 **** ! def main(): # Usage: see usage() --- 76,199 ---- ! def write_gnuplot(labels, scores_list, iters, datafile): ! ! margin_list = [] ! for i in range(len(iters)): ! scores = scores_list[i] ! margins = get_margins(labels, scores); ! margins.sort() ! margin_list.append(margins) ! ! lines = [] ! for w in range(len(margins)): ! line = "" ! line += str(float(w)/len(margins)) ! for i in range(len(margin_list)): ! margins = margin_list[i] ! marg_max = max(margins) ! line += ' ' ! line += str(margins[w]/marg_max) ! line += ' \n' ! lines.append(line) ! ! f= open('margin.dat', 'w') ! f.writelines(lines) ! f.close() ! ! ! ! outlines = [] ! ! outlines.append('set terminal png notransparent small\n') ! outlines.append('set output \'margin.png\'\n') ! outlines.append('set xlabel "Margin"\n') ! outlines.append('set ylabel "Cumulative Distribution"\n') ! outlines.append('set title "' + datafile + ' Error"\n') ! out = '' ! out += 'plot ' ! for i in range(len(iters)): ! out += ' "margin.dat" using ' + str(i+2) + ':1' ! out += ' title \'Iteration: ' + str(iters[i]) + '\' with lines ,' ! out = out[:-1] ! out += '\n' ! outlines.append(out) ! ! f = open('margin.gnuplot', 'w') ! f.writelines(outlines) ! f.close() ! ! ! ! ! ! def getLabels(specfilename): ! f = open(specfilename, 'r') ! lines = f.readlines() ! f.close() ! ! label1 = '' ! label2 = '' ! ! p = re.compile('[ \t]*labels[ \t]*(\\(.*\\), \\(.*\\))') ! ! for line in lines: ! if line.find('labels', 0, 5) > 0: ! print line ! m = p.match(line) ! ! ! label1 = m.group ! return label1, label2 ! ! ! def get_margins(labels, scores): ! def computeMargin(score, label): ! return score*int(label) ! ! margins= map(computeMargin, scores, labels) ! return margins ! ! ! ! def process_data(examples, labels, scores, datafile, sample): ! ! print 'Examples ' + str(len(examples)) ! print 'Scores ' + str(len(scores)) ! print 'Labels ' + str(len(labels)) ! print 'Generating margins' ! ! margins = get_margins(labels, scores) ! ! if len(examples) != len(margins): ! print "Don't have margin for each example" ! sys.exit(2) ! ! print 'Matching margins with data' ! ! ! #pos_label, neg_label = getLabels(specfile) ! ! falsepos = dict([(w,[margins[w],examples[w],labels[w]]) ! for w in range(len(margins)) ! if margins[w] < 0.0 and labels[w].strip() == '-1']) ! truepos = dict([(w,[margins[w],examples[w],labels[w]]) ! for w in range(len(margins)) ! if margins[w] > 0.0 and labels[w].strip() == '+1']) ! ! ! falseneg = dict([(w,[margins[w],examples[w],labels[w]]) ! for w in range(len(margins)) ! if margins[w] < 0.0 and labels[w].strip() == '+1']) ! trueneg = dict([(w,[margins[w],examples[w],labels[w]]) ! for w in range(len(margins)) ! if margins[w] > 0.0 and labels[w].strip() == '-1']) ! ! ! write_files(examples, datafile, sample, margins, falsepos, truepos, falseneg, trueneg) ! ! ! ! ! def main(): # Usage: see usage() *************** *** 82,94 **** try: ! opts, args= getopt.getopt(sys.argv[1:], '', ['scores=','data=','labels=', 'iteration=', 'sample']) except getopt.GetoptError: usage() sys.exit(2) ! scoresfile = datafile = labelsfile = sample = iteration = None for opt,arg in opts: if (opt == '--scores'): scoresfile = arg elif (opt == '--data'): datafile = arg --- 203,217 ---- try: ! opts, args= getopt.getopt(sys.argv[1:], '', ['scores=','data=','spec=','labels=', 'iteration=', 'sample']) except getopt.GetoptError: usage() sys.exit(2) ! scoresfile = datafile = specfile = labelsfile = sample = iteration = None for opt,arg in opts: if (opt == '--scores'): scoresfile = arg + if (opt == '--spec'): + specfile = arg elif (opt == '--data'): datafile = arg *************** *** 101,114 **** ! if(scoresfile == None or datafile == None): ! usage() ! sys.exit(2) if(labelsfile == None): ! labelsfile = datafile + '.labels' ! if os.path.exists(labelsfile): ! print 'Using label file:', labelsfile ! # do nothing ! else: command = 'cat ' + datafile + ' | sed \'s/.*\(.1\);/\\1/g\' > ' + labelsfile print command --- 224,238 ---- ! if(scoresfile == None or datafile == None or specfile == None): ! print 'Need score, data, and spec file.' ! usage() ! sys.exit(2) if(labelsfile == None): ! labelsfile = datafile + '.labels' ! #if os.path.exists(labelsfile): ! # print 'Using label file:', labelsfile ! # # do nothing ! #else: command = 'cat ' + datafile + ' | sed \'s/.*\(.1\);/\\1/g\' > ' + labelsfile print command *************** *** 118,123 **** os.system(command) ! ! print 'Reading score values' f= open(scoresfile,'r') --- 242,250 ---- os.system(command) ! print 'Reading data' ! f= open(datafile,'r') ! examples= f.readlines() ! f.close() ! print 'Reading score values' f= open(scoresfile,'r') *************** *** 125,184 **** f.close() elements= int((string.split(data[0],'='))[2]) - - if (iteration != None): - iter = int(iteration) - scores= [float(x) for x in data[iter*(elements+1)+1:(iter+1)*(elements+1)]] - else: - scores= [float(x) for x in data[-elements:]] - - - - print 'Reading training data' - f= open(datafile,'r') - examples= f.readlines() - f.close() print 'Reading labels' f= open(labelsfile,'r') ! data= f.readlines() f.close() - labels= data[-elements:] - - print 'Examples ' + str(len(examples)) - print 'Scores ' + str(len(scores)) - print 'Labels ' + str(len(labels)) - print 'Generating margins' - def computeMargin(score, label): - return score*int(label) - - margins= map(computeMargin, scores, labels) - - if len(examples) != len(margins): - print "Don't have margin for each example" - sys.exit(2) - - print 'Matching margins with data' - - - pos_label, neg_label = getLabels() ! falsepos = dict([(w,[margins[w],examples[w],labels[w]]) ! for w in range(len(margins)) ! if margins[w] < 0.0 and labels[w].strip() == '-1']) ! truepos = dict([(w,[margins[w],examples[w],labels[w]]) ! for w in range(len(margins)) ! if margins[w] > 0.0 and labels[w].strip() == '+1']) ! ! ! falseneg = dict([(w,[margins[w],examples[w],labels[w]]) ! for w in range(len(margins)) ! if margins[w] < 0.0 and labels[w].strip() == '+1']) ! trueneg = dict([(w,[margins[w],examples[w],labels[w]]) ! for w in range(len(margins)) ! if margins[w] > 0.0 and labels[w].strip() == '-1']) ! write_files(examples, datafile, sample, margins, falsepos, truepos, falseneg, trueneg) --- 252,275 ---- f.close() elements= int((string.split(data[0],'='))[2]) print 'Reading labels' f= open(labelsfile,'r') ! labels= f.readlines()[-elements:] f.close() ! score_list = [] ! if (iteration != None): ! iters = map(int, iteration.split(',')) ! for iter in iters: ! scores= [float(x) for x in data[iter*(elements+1)+1:(iter+1)*(elements+1)]] ! score_list.append(scores) ! else: ! scores= [float(x) for x in data[-elements:]] ! score_list.append(scores) + process_data(examples, labels, score_list[len(score_list)-1], datafile, sample) ! write_gnuplot(labels, score_list, iters, datafile) Index: error.py =================================================================== RCS file: /cvsroot/jboost/jboost/scripts/error.py,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** error.py 16 May 2007 21:16:29 -0000 1.1 --- error.py 17 May 2007 07:55:16 -0000 1.2 *************** *** 77,85 **** outlines = [] ! outlines.append('set terminal png notransparent medium\n') outlines.append('set output \'error.png\'\n') outlines.append('set xlabel "Iteration"\n') outlines.append('set ylabel "Error"\n') ! outlines.append('set title ""\n') if (logaxis): outlines.append('set logscale x 10\n') --- 77,85 ---- outlines = [] ! outlines.append('set terminal png notransparent small\n') outlines.append('set output \'error.png\'\n') outlines.append('set xlabel "Iteration"\n') outlines.append('set ylabel "Error"\n') ! outlines.append('set title "' + infofilename + ' Error"\n') if (logaxis): outlines.append('set logscale x 10\n') Index: atree2dot2ps.pl =================================================================== RCS file: /cvsroot/jboost/jboost/scripts/atree2dot2ps.pl,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -C2 -d -r1.1.1.1 -r1.2 *** atree2dot2ps.pl 16 May 2007 04:06:02 -0000 1.1.1.1 --- atree2dot2ps.pl 17 May 2007 07:55:16 -0000 1.2 *************** *** 140,144 **** close(OUT); ! system("dot -Tps $filename.$threshold.dot -o $filename.$threshold.ps"); system("ps2pdf $filename.$threshold.ps"); --- 140,146 ---- close(OUT); ! system("dot -Tpng $filename.$threshold.dot -o $filename.$threshold.png"); ! system("dot -Tgif $filename.$threshold.dot -o $filename.$threshold.gif"); ! system("dot -Tps2 $filename.$threshold.dot -o $filename.$threshold.ps"); system("ps2pdf $filename.$threshold.ps"); Index: nfold.py =================================================================== RCS file: /cvsroot/jboost/jboost/scripts/nfold.py,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -C2 -d -r1.1.1.1 -r1.2 *** nfold.py 16 May 2007 04:06:02 -0000 1.1.1.1 --- nfold.py 17 May 2007 07:55:16 -0000 1.2 *************** *** 25,30 **** def learner(atreeoption, k, rounds): # XXX: put in description! config= os.getenv('JBOOST_CONFIG') ! command = 'java -Xmx1000M -cp ' + os.getenv('CLASSPATH') + ' jboost.controller.Controller -p 3 -a -2 -S trial'+str(k)+' -n trial.spec -ATreeType '+ atreeoption +' -numRounds ' + str(rounds) if (config != None): command = command + ' -CONFIG ' + config --- 25,37 ---- def learner(atreeoption, k, rounds): # XXX: put in description! + pwd = os.getcwd() + caller = sys.argv[0] + end = caller.rfind('/') + jboost_command = '../jboost' + if(end > 0): + jboost_command = pwd + '/../' + caller[:end+1] + '/../jboost' + config= os.getenv('JBOOST_CONFIG') ! command = jboost_command + ' -p 3 -a -2 -S trial'+str(k)+' -n trial.spec -ATreeType '+ atreeoption +' -numRounds ' + str(rounds) if (config != None): command = command + ' -CONFIG ' + config |