From: Aaron A. <aa...@us...> - 2007-07-10 01:34:20
|
Update of /cvsroot/jboost/jboost/scripts In directory sc8-pr-cvs6.sourceforge.net:/tmp/cvs-serv21939/scripts Added Files: surfing.py Log Message: added --- NEW FILE: surfing.py --- #! /usr/bin/env python import string import getopt import sys import pickle import os import re import math def usage(): print 'Usage: margin.py ' print '\t--surfing=filename surfing file as output by jboost (-a -2 switch)' print '\t--scores=filename scores file as output by jboost (-a -2 switch)' print '\t--data=filename train/test filename' print '\t--spec=filename spec filename' print '\t--booster=type type={normal, brown}' print '\t--framerate=num an integer specifying framerate' def erf(z): t = 1.0 / (1.0 + 0.5 * abs(z)); ans = 1 - t * math.exp( -z*z - 1.26551223 + \ t * ( 1.00002368 + \ t * ( 0.37409196 + \ t * ( 0.09678418 + \ t * (-0.18628806 + \ t * ( 0.27886807 + \ t * (-1.13520398 + \ t * ( 1.48851587 + \ t * (-0.82215223 + \ t * ( 0.17087277)))))))))) if z < 0: ans = - ans return ans; def get_weight_line(params, start, end, max_bin): weights = [] s = c= c1 = c2 = theta = 0 if(params[0] == 'brown'): c = float(params[1]) s = float(params[2]) #time remaining elif(params[0] == 'normal'): c = float(params[1]) s = c-float(params[2]) #time played start = -1.0 end = 1.0 c1 = float(params[3]) c2 = float(params[4]) theta = float(params[5]) num_steps = 100 step = (end - start) / num_steps x = start for i in range(0,num_steps): mu = var = 0 if params[0] == 'brown': mu = -s var = c elif params[0] == 'normal': mu = theta - c1 * (math.exp(-s) - math.exp(-c)) var = c2 * (math.exp(-2*s) - math.exp(-2*c)) + 0.05 norm = 1 / math.sqrt(var*math.pi) y = math.exp(-math.pow(x-mu,2)/(var)) * max_bin p = (1 - erf((x-mu)/math.sqrt(var)))/2 weights.append((x,y,p)) x += step return weights def get_margin_hist(margins, is_cum): num_bins = 0 if is_cum: num_bins = 100 else: num_bins = 30 marg_max = max(margins) marg_min = min(margins) bin_size = (marg_max - marg_min) / num_bins b = marg_min i = 0 hist = [] x_axis = [] j = 0 EPS = bin_size / 10000 total_seen = 0 while b <= marg_max: hist.append(0) while i < len(margins) and (margins[i] <= b + bin_size + EPS): i += 1 hist[j] += 1 x_axis.append(b + bin_size) if is_cum: tmp = hist[j] hist[j] += total_seen total_seen += tmp b += bin_size j += 1 s = 0 if is_cum: s = max(hist) else: s = sum(hist) return [(x, float(h) / s) for (x,h) in zip(x_axis,hist)] def paste_gnuplot(num_iterations, framerate): print 'Starting animated gif creation' os.system('convert -rotate 90 -delay 5 -loop 0 surfing*.eps surfing.gif') print 'Finished gif creation' print 'See surfing.gif for animation' line_compare = lambda x,y : cmp(float(string.split(x,'\t')[0]), float(string.split(y,'\t')[0])) def write_gnuplot(lines, iter, datafile, params): lines.sort(line_compare); margins = [ float(string.split(x,'\t')[0]) for x in lines] weights = [ float(string.split(x,'\t')[1]) for x in lines] potentials = [ float(string.split(x,'\t')[2]) for x in lines] marg_max = max(margins); marg_min = min(margins); lines = get_margin_hist(margins, 1); max_bin = max([l[1] for l in lines]) lines = [str(l[0]) + ' ' + str(l[1]) + '\n' for l in lines] f= open('surfing_hist.dat', 'w') f.writelines(lines) f.close() weight_line = get_weight_line(params, marg_min, marg_max, max_bin) # lines = [ str(m) + ' ' + str(p) + ' ' + str(w) + '\n' for (m,w,p) in weight_line] lines = [ str(m) + ' ' + str(p) + '\n' for (m,w,p) in weight_line] f= open('surfing_weight.dat', 'w') f.writelines(lines) f.close() xrange = max([abs(marg_min), abs(marg_max)]) yrange = max_bin epsoutlines = [] epsoutlines.append('set terminal post\n') epsoutlines.append('set output \'surfing%05d.eps\'\n' % (iter)) epsoutlines.append('set title "'+ params[0] + 'boost ' + datafile + ' Surfing: Iteration ' + str(iter) + '" font "Times,20"\n') epsoutlines.append('set key left top\n') epsoutlines.append('set yzeroaxis lt -1\n') if params[0]=='normal': epsoutlines.append('set xrange [-1:1]\n') epsoutlines.append('set yrange [0:1]\n') else: epsoutlines.append('set xrange [%0.2f:%0.2f]\n' % (-xrange, xrange)) epsoutlines.append('set yrange [0:1]\n') epsoutlines.append('set xlabel "Margin" font "Times,20"\n') epsoutlines.append('set ylabel "Cumulative Distribution" font "Times,20"\n') epsoutlines.append('plot "surfing_hist.dat" using 1:2 title "Margin CDF" with lines, ' + \ '"surfing_weight.dat" using 1:2 title "Potential" with lines \n') # if params[0]=='normal': # epsoutlines.append('set parametric\n') # epsoutlines.append('set trange [0:1]\n') # epsoutlines.append('replot ' + str(params[5]) + ', t \n') pngoutlines = epsoutlines[:] pngoutlines[0] = 'set terminal png notransparent small\n' pngoutlines[1] = ('set output \'surfing%05d.png\'\n' % (iter)) gifoutlines = epsoutlines[:] gifoutlines[0] = 'set terminal gif notransparent\n' gifoutlines[1] = ('set output \'surfing%05d.gif\'\n' % (iter)) f = open('surfing.png.gnuplot', 'w') f.writelines(pngoutlines) f.close() f = open('surfing.eps.gnuplot', 'w') f.writelines(epsoutlines) f.close() f = open('surfing.gif.gnuplot', 'w') f.writelines(gifoutlines) f.close() os.system('gnuplot surfing.eps.gnuplot') print 'Finished with iteration ' + str(iter) def main(): # Usage: see usage() # Looks at all the examples that have negative margins # the output can be used to find the examples that are probably mislabeled # and also the examples that might need more features try: opts, args= getopt.getopt(sys.argv[1:], '', ['surfing=', 'scores=','data=','spec=','labels=', 'iteration=', 'framerate=', 'booster=', 'sample']) except getopt.GetoptError, inst: print 'Received an illegal argument:', inst usage() sys.exit(2) surffile = scoresfile = datafile = framerate = specfile = labelsfile = sample = iteration = booster = None for opt,arg in opts: if (opt == '--surfing'): surffile = arg if (opt == '--booster'): booster = arg if (opt == '--framerate'): framerate = int(arg) if (opt == '--scores'): scoresfile = arg if (opt == '--spec'): specfile = arg if (opt == '--data'): datafile = arg if(surffile == None or scoresfile == None or datafile == None or specfile == None): print 'Need score, data, and spec file.' usage() sys.exit(2) if(framerate==None): print 'Need frame rate.' usage() sys.exit(2) if(booster==None or (booster!='normal' and booster!='brown')): print 'Only accepts `normal` and `brown` boost.' usage() sys.exit(2) print 'Reading surfing file' f= open(surffile,'r') data= f.readlines() f.close() num_elements= int((string.split(data[0],'='))[2]) num_iterations = int(string.split(string.split(data[-num_elements-2], ',')[0], '=')[1]) os.system('rm surfing*.eps surfing*.png') for iter in range(0, num_iterations+1, framerate): if (booster=='brown'): total_time = string.split(data[iter*(num_elements+2)+1], ' ')[2] time_left = string.split(data[iter*(num_elements+2)+1], ' ')[3] params = ('brown', total_time, time_left) if (booster=='normal'): total_time = string.split(data[iter*(num_elements+2)+1], ' ')[2] time_left = string.split(data[iter*(num_elements+2)+1], ' ')[3] c1 = 1.818 c2 = 1 theta = 0.1 #c1 = string.split(data[iter*(num_elements+2)+1], ' ')[4] #c2 = string.split(data[iter*(num_elements+2)+1], ' ')[5] #theta = string.split(data[iter*(num_elements+2)+1], ' ')[6] params = ('normal', total_time, time_left, c1, c2, theta) lines = [x for x in data[iter*(num_elements+2)+2:(iter+1)*(num_elements+2)]] write_gnuplot(lines, iter, datafile, params) paste_gnuplot(num_iterations, framerate) if __name__ == "__main__": main() |