From: Sunsern C. <sch...@us...> - 2009-03-12 23:42:50
|
Update of /cvsroot/jboost/jboost/scripts In directory fdv4jf1.ch3.sourceforge.com:/tmp/cvs-serv17190/scripts Modified Files: VisualizeScores.README VisualizeScores.DEMO.README VisualizeScores.py Log Message: * New VisualizeScores.py Index: VisualizeScores.py =================================================================== RCS file: /cvsroot/jboost/jboost/scripts/VisualizeScores.py,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** VisualizeScores.py 17 Jan 2009 02:00:31 -0000 1.8 --- VisualizeScores.py 12 Mar 2009 23:42:40 -0000 1.9 *************** *** 1,16 **** import sys, os, os.path, re, math, glob - import jarray - from java.util import Vector def usage(): ! print("Usage: VisualizeScores <info-files-path>") ! print("info-file-path is a directory containing files with names like trial0.test.boosting.info, trial2.train.boosting.info ...") ! print("Example: perform the following commands from the jboost root directory") ! print("<setup classpath to point to jython>") ! print("source scripts/setClassPath.sh") ! print("cd demo") ! print("cat spambase.test spambase.train > spambase.data") ! print("../scripts/nfold.py --folds=3 --data=spambase.data --spec=spambase.spec --rounds=43 --tree=ADD_ALL --generate --booster=LogLossBoost") ! print("jython ../scripts/VisualizeScores.py cvdata-09-09-17-00-55/ADD_ALL/trial*") print("") print("IMPORTANT NOTE: jboost should of been run on data and spec files passed through AddRandomIndex.py") --- 1,10 ---- + #!/usr/bin/python + import sys, os, os.path, re, math, glob def usage(): ! print("Usage: VisualizeScores.py <info-files-path>") ! print(" <info-file-path> is a directory containing files with names") ! print(" like trial0.test.boosting.info, trial2.train.boosting.info ...") print("") print("IMPORTANT NOTE: jboost should of been run on data and spec files passed through AddRandomIndex.py") *************** *** 26,163 **** sys.exit(2) - header_p = re.compile('iteration=(\d+): elements=(\d+):') - scoreline_p = re.compile('([-+\d.]+):\s+') #([+-1]+):') - - testfiles = glob.glob(globpath + "*.test.boosting.info") trainfiles = glob.glob(globpath + "*.train.boosting.info") - - #testfiles = glob.glob("/Users/yoavfreund/Downloads/dc_boost_active_output/iter?/*.test.boosting.info") - - #testfiles = glob.glob("/Users/yoavfreund/projects/jboost/demo/cvdata-09-09-17-00-55/ADD_ALL/trial*.test.boosting.info") - - print(testfiles) - exampleData = {} - - #find the iteration indices - - infile = open(testfiles[0], 'r') - iterList = [] - iterDict = {} - iterationIndex = 0 - for line in infile: - [(iter, elements)] = header_p.findall(line) - elements = int(elements) - - if iterDict.has_key(iter): - sys.exit("data file " + testfiles[0] + " has two lists corresponding to iteration " + iter) - iterList.append(iter) - iterDict[iter] = iterationIndex - iterationIndex = iterationIndex + 1 - - for count in range(elements): - line = infile.readline() - - from jboost.visualization import DataSet, DataElement, HistogramFrame - - d = DataSet(iterList) - d.setOutputFilename(globpath + "/selectedExamples.txt") - - # read the data into the Dataset data structures. - for filename in testfiles: - print(filename) - infile = open(filename, 'r') - iterList = []; - for line in infile: - [(iter, elements)] = header_p.findall(line) - iterationIndex = iterDict[iter] - elements = int(elements) - - for count in range(elements): - line = infile.readline() - #print line - a = scoreline_p.findall(line) - index = int(a[1]) - score = float(a[3]) - label = int(a[ - 1]) - if not exampleData.has_key(index): - exampleData[index] = {"label":label, "scores":{} } - exampleData[index]["scores"][int(iter)] = {"test":score, "train":[]} - e = DataElement(score, index, label) - d.addDataElement(e, iterationIndex) - - d.preProcessDataset() - lowestScore = d.getMin() - highestScore = d.getMax() - print("lowestScore=%f, highestScore=%f\n" % (lowestScore, highestScore)) - - print(len(exampleData)) - - for filename in trainfiles: - print(filename) - infile = open(filename, 'r') - for line in infile: - [(iter, elements)] = header_p.findall(line) - iterationIndex = iterDict[iter] - elements = int(elements) - - for count in range(elements): - line = infile.readline() - #print line - a = scoreline_p.findall(line) - index = int(a[1]) - score = float(a[3]) - label = int(a[ - 1]) - if exampleData.has_key(index): - exampleData[index]["scores"][int(iter)]["train"].append(score) - #else: - #print "missing index=%d\n" % index - - - # changed first index below from 0 to exampleData.keys()[0] : by boyko - iterList = exampleData[exampleData.keys()[0]]["scores"].keys() - iterList.sort() - - fluctBins = 20 - percentage = 0.05 - barHeight = 6 - binWidth = (highestScore - lowestScore) / fluctBins - fluct = {} - for iter in iterList: - fluct[iter] = [ {"trainScores":[]} for i in range(fluctBins)] - - for iterNo in range(len(iterList)): - iter = iterList[iterNo] - print("iteration %d\n" % iter) - for index in exampleData.keys(): - testScore = exampleData[index]["scores"][iter]["test"] - bin = int((testScore - lowestScore) / binWidth) - bin = min(bin, fluctBins - 1) - for trainScore in exampleData[index]["scores"][iter]["train"]: - fluct[iter][bin]["trainScores"].append(trainScore) - - y = 0 - for bin in range(fluctBins): - fluct[iter][bin]["trainScores"].sort() - n = len(fluct[iter][bin]["trainScores"]) - if n > 10: - bottom = fluct[iter][bin]["trainScores"][int(n * percentage)] - top = fluct[iter][bin]["trainScores"][int(n * (1 - percentage))] - yMax = y - yMin = y - barHeight - y = y - 1.1 * barHeight - fluct[iter][bin]["bottom"] = bottom - fluct[iter][bin]["top"] = top - fluct[iter][bin]["yMin"] = yMin - fluct[iter][bin]["yMax"] = yMax - - print("[%f,%f] -> [%f,%f] X [%f,%f]\n" % (lowestScore + bin * binWidth, lowestScore + (bin + 1) * binWidth, bottom, top, yMin, yMax)) - d.addFluctItems(iterNo, lowestScore + bin * binWidth, lowestScore + (bin + 1) * binWidth, bottom, top, yMin, yMax) - fluct[iter][bin]["trainScores"] = [] - - - v = HistogramFrame(d) - v.show() if __name__ == "__main__": main() --- 20,40 ---- sys.exit(2) testfiles = glob.glob(globpath + "*.test.boosting.info") trainfiles = glob.glob(globpath + "*.train.boosting.info") + #print testfiles + #print trainfiles + + cmd = "java jboost.visualization.HistogramFrame " + str(len(testfiles)) + " " + for f in testfiles: + cmd = cmd + f + " " + cmd = cmd + str(len(trainfiles)) + " " + for f in trainfiles: + cmd = cmd + f + " " + + os.system(cmd) + + + if __name__ == "__main__": main() Index: VisualizeScores.DEMO.README =================================================================== RCS file: /cvsroot/jboost/jboost/scripts/VisualizeScores.DEMO.README,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** VisualizeScores.DEMO.README 25 Jan 2009 08:52:38 -0000 1.2 --- VisualizeScores.DEMO.README 12 Mar 2009 23:42:40 -0000 1.3 *************** *** 3,7 **** Assumptions ========== ! 0. you have java, jython, and python installed. 1. you have downloaded the jboost dist from sourceforge.net and built jboost from source or you downloaded the pre-release. (ie. you have jboost 1.4.1 or greater) --- 3,7 ---- Assumptions ========== ! 0. you have java and python installed. 1. you have downloaded the jboost dist from sourceforge.net and built jboost from source or you downloaded the pre-release. (ie. you have jboost 1.4.1 or greater) *************** *** 15,19 **** 1. add some libs to your (existing) java CLASSPATH: ! > export CLASSPATH=$CLASSPATH:$JBOOST_DIR/lib/jcommon-1.0.8.jar:$JBOOST_DIR/lib/jfreechart-1.0.10.jar:$JBOOST_DIR/lib/swing-layout-1.0.jar 2. cd to the demo directory --- 15,19 ---- 1. add some libs to your (existing) java CLASSPATH: ! > export CLASSPATH=$CLASSPATH:$JBOOST_DIR/lib/jcommon-1.0.8.jar:$JBOOST_DIR/lib/jfreechart-1.0.10.jar 2. cd to the demo directory *************** *** 66,72 **** 5. the results are placed in the directory ./spambase_idx.data.folds_2/cvdata-mm-dd-hh-mm-ss/<TREE-TYPE> ! 6. Now run the visualizer in the scripts. Assuming jython is not on your path, be explicit: ! > ~/jython2.2.1/jython ../scripts/VisualizeScores.py spambase_idx.data.folds_2/cvdata-mm-dd-hh-mm-ss/ADD_ALL/trial (note, this example output shows the cvdata-mm-dd-hh-mm-ss for my test run) --- 66,72 ---- 5. the results are placed in the directory ./spambase_idx.data.folds_2/cvdata-mm-dd-hh-mm-ss/<TREE-TYPE> ! 6. Now run the visualizer in the scripts. ! > ../scripts/VisualizeScores.py spambase_idx.data.folds_2/cvdata-mm-dd-hh-mm-ss/ADD_ALL/trial (note, this example output shows the cvdata-mm-dd-hh-mm-ss for my test run) Index: VisualizeScores.README =================================================================== RCS file: /cvsroot/jboost/jboost/scripts/VisualizeScores.README,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** VisualizeScores.README 25 Jan 2009 08:52:38 -0000 1.2 --- VisualizeScores.README 12 Mar 2009 23:42:40 -0000 1.3 *************** *** 5,9 **** Requirements ============ ! * Jython * build of jboost 1.4.1 (NOT JBOOST VERSION 1.4 found on sourceforge) * test | train.boosting.info files which are indexed (see AddRandomIndex.py) --- 5,9 ---- Requirements ============ ! * Python * build of jboost 1.4.1 (NOT JBOOST VERSION 1.4 found on sourceforge) * test | train.boosting.info files which are indexed (see AddRandomIndex.py) *************** *** 15,23 **** Usage ===== ! You must add the files from $JBOOST_DIR/lib that are not on your CLASSPATH to your CLASSPATH. If you followed the install instructions from the JBoost website, this would mean adding $JBOOST_DIR/lib/jcommon-1.0.8.jar, $JBOOST_DIR/lib/jfreechart-1.0.10.jar, and $JBOOST_DIR/lib/swing-layout-1.0.jar to the existing classpath (see example below). ! Once classpath is set, from command line invoke the script from Jython: ! [user@host jython2.2.1]$ ./jython $JBOOST_DIR/scripts/VisualizeScores.py <path to *.boosting.info files> Where <path to *.boosting.info files> is the full path to the .boosting.info files stem you would like to process. For example: /Users/jsmith/boostingdata/cvdata/ADD_ALL/trial will glob trial0.train.boosting.info, trial0.test.boosting.info, trial1.train.boosting.info, etc. --- 15,23 ---- Usage ===== ! You must add the files from $JBOOST_DIR/lib that are not on your CLASSPATH to your CLASSPATH. If you followed the install instructions from the JBoost website, this would mean adding $JBOOST_DIR/lib/jcommon-1.0.8.jar, $JBOOST_DIR/lib/jfreechart-1.0.10.jar to the existing classpath (see example below). ! Once classpath is set, from command line invoke the script: ! $JBOOST_DIR/scripts/VisualizeScores.py <path to *.boosting.info files> Where <path to *.boosting.info files> is the full path to the .boosting.info files stem you would like to process. For example: /Users/jsmith/boostingdata/cvdata/ADD_ALL/trial will glob trial0.train.boosting.info, trial0.test.boosting.info, trial1.train.boosting.info, etc. *************** *** 25,37 **** Example ======= - (this example is run from within Jython dist and assumes ): ! [user@host jython2.2.1]$ export JBOOST_DIR=/Users/jsmith/workspace/jboost/ (export for a bash shell. ignore if already done) ! [user@host jython2.2.1]$ export CLASSPATH=$CLASSPATH:$JBOOST_DIR/lib/jcommon-1.0.8.jar:$JBOOST_DIR/lib/jfreechart-1.0.10.jar:$JBOOST_DIR/lib/swing-layout-1.0.jar ! (NOTE: ASSUMES you have already added jboost.jar and concurrent.jar to your classpath) ! [user@host jython2.2.1]$ ./jython $JBOOST_DIR/scripts/VisualizeScores.py /Users/jsmith/boostingdata/<path to specific cvdata>/ADD_ALL/trial After loading, parsing, and analyzing the boosting.info files, the GUI will launch. Boosting iteration shown in upper right. A histogram showing example count by boosting score for each label is predominate and in the center of the window. Sliders below the histogram allow you select lower and upper score ranges. The region within this range on the ROC curve is shown in the upper left of the window. Pressing the button in the lower left (save selected examples) will write to a file the example index and boosting score for each example within the selected range. This file is saved to the directory indicated in step 1 above. --- 25,36 ---- Example ======= ! [user@host ~]$ export JBOOST_DIR=/Users/jsmith/workspace/jboost/ (export for a bash shell. ignore if already done) ! [user@host ~]$ export CLASSPATH=$CLASSPATH:$JBOOST_DIR/lib/jcommon-1.0.8.jar:$JBOOST_DIR/lib/jfreechart-1.0.10.jar ! (NOTE: ASSUME you have already added jboost.jar and concurrent.jar to your classpath) ! [user@host ~]$ $JBOOST_DIR/scripts/VisualizeScores.py /Users/jsmith/boostingdata/<path to specific cvdata>/ADD_ALL/trial After loading, parsing, and analyzing the boosting.info files, the GUI will launch. Boosting iteration shown in upper right. A histogram showing example count by boosting score for each label is predominate and in the center of the window. Sliders below the histogram allow you select lower and upper score ranges. The region within this range on the ROC curve is shown in the upper left of the window. Pressing the button in the lower left (save selected examples) will write to a file the example index and boosting score for each example within the selected range. This file is saved to the directory indicated in step 1 above. |