From: Aaron A. <aa...@us...> - 2007-12-12 23:51:18
|
Update of /cvsroot/jboost/jboost/scripts In directory sc8-pr-cvs6.sourceforge.net:/tmp/cvs-serv11316 Modified Files: error.py Log Message: Can now show separate error for positive and negative examples Index: error.py =================================================================== RCS file: /cvsroot/jboost/jboost/scripts/error.py,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** error.py 23 Oct 2007 22:50:42 -0000 1.4 --- error.py 12 Dec 2007 23:50:29 -0000 1.5 *************** *** 12,36 **** print '\t--bound show the bound on training error' print '\t--separate separate the positive and negative examples' ! print '\t--scores=score_file the score file (only needed if --separate used)' ! print '\t--margins=marg_file the margin file (only needed if --separate used)' ! def do_separate(score_filename, margin_filename): ! print 'Parsing scores file' ! f = open(score_filename,'r') ! lines = f.readlines() ! f.close() ! ! read_iter = False ! data = [] ! for line in lines: ! line = line.strip() ! if (line[:8] =='End time'): ! break ! if (read_iter): ! data.append(get_score(line)) ! ! --- 12,88 ---- print '\t--bound show the bound on training error' print '\t--separate separate the positive and negative examples' ! print '\t--boost-info=file The boosting.info file for a data set' ! SEPARATOR = ':' ! def get_margin(line): ! m = line.split(SEPARATOR)[1] ! m.replace(']','') ! m.replace(';','') ! m.replace(SEPARATOR,'') ! m.replace(' ','') ! m.replace('\t','') ! return float(m) ! ! def get_score(line): ! m = line.split(SEPARATOR)[2] ! m.replace(']','') ! m.replace(';','') ! m.replace(SEPARATOR,'') ! m.replace(' ','') ! m.replace('\t','') ! return float(m) + def getErrorsSingleIter(data): + tp = 0 + fp = 0 + tn = 0 + fn = 0 + + scores = map(get_score, data) + margins = map(get_margin, data) + for score, margin in zip(scores, margins): + if score < 0 and margin < 0: + fn += 1 + if score < 0 and margin > 0: + tn += 1 + if score > 0 and margin < 0: + fp += 1 + if score > 0 and margin > 0: + tp += 1 + return (tp,fp,tn,fn) + + def getErrors(boostfilename): + f= open(boostfilename,'r') + data= f.readlines() + f.close() + score_elements = int((string.split(data[0],SEPARATOR))[1].split('=')[1]) + num_iterations = int((string.split(data[-score_elements-1],SEPARATOR))[0].split('=')[1]) + print num_iterations + + errors = [] + for iter in range(num_iterations+1): + tp,fp,tn,fn = getErrorsSingleIter([x for x in data[iter*(score_elements+1)+1:(iter+1)*(score_elements+1)]]) + print iter,tp,fp,tn,fn + ret = {} + if tp + fn > 0: + ret['recall'] = ret['sensitivity'] = float(tp) / (tp + fn) + else: + ret['recall'] = ret['sensitivity'] = 0 + if tp + fp > 0: + ret['precision'] = float(tp) / (tp + fp) + else: + ret['precision'] = 0 + if fp + tn > 0: + ret['fpr'] = float(fp) / (fp + tn) + else: + ret['fpr'] = 0 + ret['specificity'] = 1 - ret['fpr'] + ret['neg_err'] = ret['fpr'] + ret['pos_err'] = 1 - ret['sensitivity'] + ret['err'] = float(fp+fn) / (fp+tp+fn+tn) + ret['iter'] = iter + errors.append(ret) + return errors *************** *** 39,53 **** try: ! opts, args= getopt.getopt(sys.argv[1:], '', ['info=','logaxis','bound','separate','scores=','margins=']) except getopt.GetoptError: usage() sys.exit(2) ! info_filename = logaxis = showbound = show_separate = margin_filename = score_filename = None for opt,arg in opts: if (opt == '--info'): info_filename = arg ! if (opt == '--margins'): ! margin_filename = arg if (opt == '--info'): score_filename = arg --- 91,105 ---- try: ! opts, args= getopt.getopt(sys.argv[1:], '', ['info=','logaxis','bound','separate','boost-info=']) except getopt.GetoptError: usage() sys.exit(2) ! info_filename = logaxis = showbound = show_separate = boost_filename = None for opt,arg in opts: if (opt == '--info'): info_filename = arg ! if (opt == '--boost-info'): ! boost_filename = arg if (opt == '--info'): score_filename = arg *************** *** 60,69 **** ! if (show_separate and (margin_filename==None or score_filename==None)): print 'ERROR: Need to specify score and margin file if showing positive and negative elements separately' usage() sys.exit(2) ! if(info_filename == None): print 'ERROR: Need to specify info file' usage() --- 112,121 ---- ! if show_separate and boost_filename==None: print 'ERROR: Need to specify score and margin file if showing positive and negative elements separately' usage() sys.exit(2) ! if info_filename == None: print 'ERROR: Need to specify info file' usage() *************** *** 100,115 **** outlines = [] for line in data: ! out = '' ! out += line['iter'] + ' ' ! out += line['train'] + ' ' ! out += line['test'] + ' ' if (showbound): out += line['bound'] + ' ' outlines.append(out+'\n') - f = open('error.dat', 'w') f.writelines(outlines) f.close() outlines = [] --- 152,173 ---- outlines = [] for line in data: ! out = line['iter'] + ' ' + line['train'] + ' ' + line['test'] + ' ' if (showbound): out += line['bound'] + ' ' outlines.append(out+'\n') f = open('error.dat', 'w') f.writelines(outlines) f.close() + if show_separate: + lines = getErrors(boost_filename) + outlines = [] + for line in lines: + out = str(line['iter']) + ' ' + str(line['neg_err']) + ' ' + str(line['pos_err']) + ' ' + str(line['err']) + outlines.append(out+'\n') + f = open('error_sep.dat', 'w') + f.writelines(outlines) + f.close() + outlines = [] *************** *** 119,129 **** outlines.append('set ylabel "Error"\n') outlines.append('set title "' + info_filename + ' Error"\n') if (logaxis): outlines.append('set logscale x 10\n') out = '' ! out += 'plot "error.dat" using 1:2 title \'train\' with lines' ! out += ', "error.dat" using 1:3 title \'test\' with lines' ! if showbound: ! out += ', "error.dat" using 1:4 title \'bound\' with lines' out += '\n' outlines.append(out) --- 177,197 ---- outlines.append('set ylabel "Error"\n') outlines.append('set title "' + info_filename + ' Error"\n') + if show_separate: + outlines.append('set title "' + info_filename + ' Error (POSITIVE EXAMPLE ERROR MAY BE NEGATIVE EXAMPLE ERROR)"\n') if (logaxis): outlines.append('set logscale x 10\n') + outlines.append('set style line 10 lt 1 lw 1 pt 5 ps 0.65\n') + outlines.append('set style line 11 lt 3 lw 1 pt 1 ps 0.65\n') + outlines.append('set style line 12 lt 5 lw 1 pt 1 ps 0.65\n') out = '' ! if show_separate: ! out += 'plot "error_sep.dat" using 1:2 title \'Negative Example Error\' with lines linestyle 10' ! out += ', "error_sep.dat" using 1:3 title \'Positive Example Error\' with lines linestyle 11' ! out += ', "error_sep.dat" using 1:4 title \'Error\' with lines linestyle 12' ! else: ! out += 'plot "error.dat" using 1:2 title \'train\' with lines linestyle 10' ! out += ', "error.dat" using 1:3 title \'test\' with lines linestyle 11' ! if showbound: ! out += ', "error.dat" using 1:4 title \'bound\' with lines' out += '\n' outlines.append(out) *************** *** 136,148 **** os.system('gnuplot ' + pngfilename) - ##################################### - # Now do scores file for separate option - ##################################### - - if show_separate: - do_separate() - - - if __name__ == "__main__": --- 204,207 ---- |