From: Aaron A. <aa...@us...> - 2008-07-31 22:17:00
|
Update of /cvsroot/jboost/jboost/scripts In directory sc8-pr-cvs17.sourceforge.net:/tmp/cvs-serv19445 Modified Files: atreeplot.R Added Files: marginplot.R parse.R perlre.R Log Message: marginplot is now finished, made atreeplot a proper function instead of a command line script Index: atreeplot.R =================================================================== RCS file: /cvsroot/jboost/jboost/scripts/atreeplot.R,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** atreeplot.R 30 Jul 2008 05:42:06 -0000 1.1 --- atreeplot.R 31 Jul 2008 22:16:52 -0000 1.2 *************** *** 1,121 **** ! tree2graph <- function() { ! ! } ! ! print.usage <- function() { ! show("\t -i (--info) file.info \t File containing runtime information (required) \n") ! show("\t -s (--spec) file.spec \t Spec file (optional, will default to spec file in info file) \n") ! show("\t -t (--tree) file.tree \t File containing the ADTree in text format (required) \n") ! show("\t -d (--dir) directory \t Directory to use (optional, defaults to '.') \n") ! show("\t -l (--labels) \t Flip the labels (-1 becomes +1) (optional)\n") ! show("\t --truncate \t Truncate threshold values to increase readability\n") ! show("\t --threshold num \t A given depth to stop the tree from becoming too large. (optional) \n") ! show("\t -h (--help) \t Print this usage information \n") ! } ! ! parse.args <- function(argv) { ! tree = "" ! flip = FALSE ! depth = 0 ! spec = "" ! for (i in 1:length(argv)) { ! arg = argv[i] ! if (arg=="--tree") { ! tree = argv[i+1] ! } else if (arg=="--flip") { ! flip = TRUE ! } else if (arg=="--depth") { ! depth = as.numeric(argv[i+1]) ! } else if (arg=="--spec") { ! spec = argv[i+1] ! } ! } ! return(list(tree=tree, flip=flip, depth=depth, spec=spec)) ! } ! ! parse.labels <- function(specfile) { ! lines = readLines(specfile) ! labels = list() ! for (i in 1:length(lines)) { ! line = lines[i] ! line = sub("^.*labels\\s*\\((.*)\\).*$", "labels \\1", line, perl=TRUE) ! if (substr(line, 1, 6)=="labels") { ! labels = unlist(strsplit(substr(line,7,9999999),",")) ! } ! } ! labels <- ! return(labels) ! } ! ! ! argv <- commandArgs() ! tmp <- parse.args(argv) ! treefile <- tmp$tree ! specfile <- tmp$spec ! flip.labels <- tmp$flip ! depth <- tmp$depth ! ! if (treefile=="" || specfile=="") { ! print.usage() ! q("no") ! } ! ! show(paste("Tree File:", treefile)) ! show(paste("Spec File:", specfile)) ! ! labels = parse.labels(specfile) ! num.labels = length(labels); ! if (num.labels < 2) { ! show("There don't seem to be enough labels in the spec file...") ! show("Labels found:") ! show(labels) ! q("no") ! } ! show(c("Labels are:", labels)) ! ! lines = readLines(treefile) ! ! ! perl.re <- function(str, expr) { ! ret = c("FALSE") ! #show(c(str, expr)) ! is.present <- grep(expr, c(str,"as"), perl=TRUE) ! #show(is.present) ! if (length(is.present) > 0) { ! # get number of captures ! expr <- unlist(strsplit(expr, NULL)) ! #show(expr) ! out <- "TRUE" ! k <- 0 ! for (i in 1:length(expr)) { ! if (paste(expr[i-1],expr[i],sep="")=="\\(") { next; } ! if (expr[i]=="(") { ! k <- k + 1 ! out = paste(out, paste("\\",k,sep=""), sep="|") ! #show(out) ! } ! } ! expr <- paste(expr, collapse="") ! ret <- unlist(strsplit(sub(expr, out, str), "|")) ! #show(c(expr, out, str)) ! ret <- sub(expr, out, str) ! ret <- unlist(strsplit(ret, "\\|")) ! #show(ret) ! } ! return(ret) ! } ! ! ! ! ! ! ! ! ! ! library(igraph) - g <- graph.empty() - g$layout <- layout.reingold.tilford --- 1,5 ---- ! source("parse.R") ! source("perlre.R") library(igraph) *************** *** 143,146 **** --- 27,31 ---- #prediction 1: BinaryPrediction. p(1)= -0.8696119797754972 + # Example of binary tree #0 [R] prediction = BinaryPrediction. p(1)= 0.11523437500000003 #1 [R.0] Splitter = InequalitySplitter. char_freq_bang < 0.0785 *************** *** 151,295 **** #29 [R.0:1.0:1] prediction = BinaryPrediction. p(1)= -0.19531250000000003 ! index.to.vertex <- list() ! i <- 1 ! depth <- 0 ! width <- 0 ! while (i <= length(lines)) { ! index <- "" ! type <- "" ! label <- "" ! ! line = lines[i] ! #show(sprintf("Line is: %s", line)) ! x <- perl.re(line, "^([0-9]+)\\s*\\[(.*)\\].*Splitter\\s*=\\s*(.*)$") ! if (x[1]=="TRUE") { ! #show("Setting type to splitter") ! type = "splitter"; ! iteration = x[2]; ! index = x[3]; ! label = x[4]; ! #show(sprintf("Label of splitter is: %s", label)) } ! x <- perl.re(line, "^([0-9]+)\\s*\\[(.*)\\].*prediction\\s*=\\s*(.*)$") ! if (x[1]=="TRUE") { ! #show("Setting type to predictor") ! type = "predictor"; ! iteration = x[2] ! index = x[3] ! label = x[4] ! predType = x[4]; ! #show(c(type, iteration, index, label)) ! x <- perl.re(predType, ".*MultiPrediction.*") ! y <- perl.re(predType, ".*BinaryPrediction.*=(.*)") if (x[1]=="TRUE") { ! label = ""; ! for (j in 1:num.labels) { ! i <- i + 1 ! line = lines[i] ! #prediction 0: BinaryPrediction. p(1)= -0.6496414920651304 ! x <- perl.re(line,"^.*prediction.*=\\s*(.*)") ! #show(sprintf("Prediction Binary value is %0.4f", as.numeric(x[2]))) ! if (x[1]=="TRUE") { ! pred = x[2] ! } else { ! break } ! if (flip.labels) { ! pred = paste("-",pred,sep="") } - label = paste(label, labels[j], ": ", pred, "\n", sep=""); - } - } else if (y[1]=="TRUE") { - label = y[2] - if(flip.labels) { - label = as.numeric(label) - label = - label - label = as.character(label) } } - } ! #show(sprintf("Type is: %s", type)) ! #show(sprintf("Index is: %s", index)) ! #show(sprintf("Label is: %s", label)) ! if (type=="splitter") { ! g <- add.vertices(g, 1) ! V(g)$frame.color[length(V(g))] <- "blue" ! V(g)$color[length(V(g))] <- "white" ! V(g)$label[length(V(g))] <- paste(iteration, label, sep=": ") ! V(g)$label.cex <- 1.4 ! V(g)$shape[length(V(g))] <- "rectangle" ! V(g)$label.color <- "black" ! ! this.vertex <- length(V(g))-1 ! index.to.vertex[[index]] = this.vertex ! ! parent.index <- substr(index,1,nchar(index)-2) ! parent.vertex <- index.to.vertex[[parent.index]] ! ! g <- add.edges(g, c(parent.vertex,this.vertex)) ! E(g)$color[length(E(g))] <- "black" ! E(g)$width[length(E(g))] <- 5 ! } ! ! if (type=="predictor") { ! g <- add.vertices(g, 1) ! V(g)$frame.color[length(V(g))] <- "yellow" ! V(g)$color[length(V(g))] <- "white" ! V(g)$label[length(V(g))] <- label ! V(g)$shape[length(V(g))] <- "rectangle" ! V(g)$label.cex <- 1 ! V(g)$label.color <- "black" ! this.vertex <- length(V(g))-1 ! index.to.vertex[[index]] = this.vertex - if (index!="R") { parent.index <- substr(index,1,nchar(index)-2) parent.vertex <- index.to.vertex[[parent.index]] g <- add.edges(g, c(parent.vertex,this.vertex)) ! ans <- substr(index,nchar(index), nchar(index)) ! if (ans=="0") { ! E(g)$color[length(E(g))] <- "red" ! E(g)$label[length(E(g))] <- "False" ! } else if (ans=="1") { ! E(g)$color[length(E(g))] <- "green" ! E(g)$label[length(E(g))] <- "True" ! } else { ! error(paste("The splitter has an invalid return value!\n 1 or 0 expected, recieved", ans)) ! } ! E(g)$label.cex <- 1.5 ! E(g)$label.color <- "black" ! E(g)$width[length(E(g))] <- 10 ! } ! } ! w <- warnings() ! if (length(w)>0) { ! show(w) } - i <- i + 1 - } ! # Flip so that the root is at the top ! coords <- g$layout(g) ! coords[,2] <- max(coords[,2])-coords[,2] ! ! fname <- sprintf("%s.eps", treefile) ! show(sprintf("Writing file to '%s'", fname)) ! postscript(file=sprintf("%s.eps", treefile), fonts=c("serif", "Palatino"), paper="special", width=70, height=40, horizontal=TRUE) ! plot(1, type="n", axes=FALSE, xlab=NA, ylab=NA, xlim=c(-1,1), ylim=c(-1,1)) ! sw <- strwidth(paste("", V(g)$label, "")) ! sh <- strheight(paste("", V(g)$label, "")) ! plot(g, add=TRUE, layout=coords, vertex.size=sw*110, vertex.size2=sh*100*3) ! dev.off() --- 36,190 ---- #29 [R.0:1.0:1] prediction = BinaryPrediction. p(1)= -0.19531250000000003 ! ! plot.atree <- function(treefile, specfile, flip.labels=F, depth=-1, width=-1, height=-1) { ! labels = parse.labels(specfile) ! num.labels = length(labels); ! if (num.labels < 2) { ! show("There don't seem to be enough labels in the spec file...") ! show("Labels found:") ! show(labels) ! return } + show(c("Labels are:", labels)) ! lines = readLines(treefile) ! ! g <- graph.empty() ! g$layout <- layout.reingold.tilford ! index.to.vertex <- list() ! i <- 1 ! while (i <= length(lines)) { ! index <- "" ! type <- "" ! label <- "" ! ! line = lines[i] ! #show(sprintf("Line is: %s", line)) ! x <- perl.re(line, "^([0-9]+)\\s*\\[(.*)\\].*Splitter\\s*=\\s*(.*)$") if (x[1]=="TRUE") { ! #show("Setting type to splitter") ! type = "splitter"; ! iteration = x[2]; ! index = x[3]; ! label = x[4]; ! #show(sprintf("Label of splitter is: %s", label)) ! } ! ! x <- perl.re(line, "^([0-9]+)\\s*\\[(.*)\\].*prediction\\s*=\\s*(.*)$") ! if (x[1]=="TRUE") { ! #show("Setting type to predictor") ! type = "predictor"; ! iteration = x[2] ! index = x[3] ! label = x[4] ! predType = x[4]; ! #show(c(type, iteration, index, label)) ! x <- perl.re(predType, ".*MultiPrediction.*") ! y <- perl.re(predType, ".*BinaryPrediction.*=(.*)") ! if (x[1]=="TRUE") { ! label = ""; ! for (j in 1:num.labels) { ! i <- i + 1 ! line = lines[i] ! #prediction 0: BinaryPrediction. p(1)= -0.6496414920651304 ! x <- perl.re(line,"^.*prediction.*=\\s*(.*)") ! #show(sprintf("Prediction Binary value is %0.4f", as.numeric(x[2]))) ! if (x[1]=="TRUE") { ! pred = x[2] ! } else { ! break ! } ! if (flip.labels) { ! pred = paste("-",pred,sep="") ! } ! label = paste(label, labels[j], ": ", pred, "\n", sep=""); } ! } else if (y[1]=="TRUE") { ! label = y[2] ! if(flip.labels) { ! label = as.numeric(label) ! label = - label ! label = as.character(label) } } } ! #show(sprintf("Type is: %s", type)) ! #show(sprintf("Index is: %s", index)) ! #show(sprintf("Label is: %s", label)) ! if (type=="splitter") { ! g <- add.vertices(g, 1) ! V(g)$frame.color[length(V(g))] <- "blue" ! V(g)$color[length(V(g))] <- "white" ! V(g)$label[length(V(g))] <- paste(iteration, label, sep=": ") ! V(g)$label.cex <- 1.4 ! V(g)$shape[length(V(g))] <- "rectangle" ! V(g)$label.color <- "black" ! this.vertex <- length(V(g))-1 ! index.to.vertex[[index]] = this.vertex parent.index <- substr(index,1,nchar(index)-2) parent.vertex <- index.to.vertex[[parent.index]] g <- add.edges(g, c(parent.vertex,this.vertex)) + E(g)$color[length(E(g))] <- "black" + E(g)$width[length(E(g))] <- 5 + } ! if (type=="predictor") { ! g <- add.vertices(g, 1) ! V(g)$frame.color[length(V(g))] <- "yellow" ! V(g)$color[length(V(g))] <- "white" ! V(g)$label[length(V(g))] <- label ! V(g)$shape[length(V(g))] <- "rectangle" ! V(g)$label.cex <- 1 ! V(g)$label.color <- "black" ! this.vertex <- length(V(g))-1 ! index.to.vertex[[index]] = this.vertex + if (index!="R") { + parent.index <- substr(index,1,nchar(index)-2) + parent.vertex <- index.to.vertex[[parent.index]] + g <- add.edges(g, c(parent.vertex,this.vertex)) + ans <- substr(index,nchar(index), nchar(index)) + if (ans=="0") { + E(g)$color[length(E(g))] <- "red" + E(g)$label[length(E(g))] <- "False" + } else if (ans=="1") { + E(g)$color[length(E(g))] <- "green" + E(g)$label[length(E(g))] <- "True" + } else { + error(paste("The splitter has an invalid return value!\n 1 or 0 expected, recieved", ans)) + } + E(g)$label.cex <- 1.5 + E(g)$label.color <- "black" + E(g)$width[length(E(g))] <- 10 + } + } ! w <- warnings() ! if (length(w)>0) { ! show(w) ! } ! i <- i + 1 } + # Flip so that the root is at the top + coords <- g$layout(g) + coords[,2] <- max(coords[,2])-coords[,2] ! fname <- sprintf("%s.eps", treefile) ! show(sprintf("Writing file to '%s'", fname)) ! postscript(file=sprintf("%s.eps", treefile), fonts=c("serif", "Palatino"), paper="special", width=70, height=40, horizontal=TRUE) ! plot(1, type="n", axes=FALSE, xlab=NA, ylab=NA, xlim=c(-1,1), ylim=c(-1,1)) ! sw <- strwidth(paste("", V(g)$label, "")) ! sh <- strheight(paste("", V(g)$label, "")) ! plot(g, add=TRUE, layout=coords, vertex.size=sw*110, vertex.size2=sh*100*3) ! dev.off() ! } --- NEW FILE: marginplot.R --- source("parse.R") get.pdfs <- function(x, show.scores, show.sep) { pdfs <- list() for (i in 1:length(x)) { pdfs[[i]] <- list() margins <- sapply(x[[i]]$data, function(d){return(d$margin)}) if (show.scores || show.sep) { scores <- sapply(x[[i]]$data, function(d){return(d$score)}) #scores * margins } else { pdfs[[i]]$x <- sort(margins) pdfs[[i]]$y <- (1:length(margins))/length(margins) } } return(pdfs) } plot.margin <- function(infofile, specfile, datafile="", iteration=-1, outname=paste(infofile,"margin","eps",sep="."), show.scores=F, show.sep=F, ...) { # Get the data and needed iterations x <- parse.info(infofile, specfile) x <- get.iters(x, iteration) # Get the cumulative pdfs for the score/margin pdfs <- get.pdfs(x,show.scores,show.sep) # Set up some plot parameters xlabel <- "Margins" # y \\Sigma_t \\alpha h_t(x)" if (show.scores) { xlabel <- "Scores" # \\Sigma_t \\alpha h_t(x)" } ylabel <- "Cumulative Distribution" title <- paste("Margins for", infofile) # Plot the margin pdfs postscript(file=outname) plot(0, 0, type="n", xlim=c(-1,1), ylim=c(0,1), main=title, xlab=xlabel, ylab=ylabel, ...) for (i in 1:length(pdfs)) { lines(pdfs[[i]]$x, pdfs[[1]]$y) } dev.off() } --- NEW FILE: perlre.R --- perl.re <- function(str, expr) { ret = c("FALSE") #show(c(str, expr)) is.present <- grep(expr, c(str,"as"), perl=TRUE) #show(is.present) if (length(is.present) > 0) { # get number of captures expr <- unlist(strsplit(expr, NULL)) #show(expr) out <- "TRUE" k <- 0 for (i in 1:length(expr)) { if (paste(expr[i-1],expr[i],sep="")=="\\(") { next; } if (expr[i]=="(") { k <- k + 1 out = paste(out, paste("\\",k,sep=""), sep="|") #show(out) } } expr <- paste(expr, collapse="") ret <- unlist(strsplit(sub(expr, out, str), "|")) #show(c(expr, out, str)) ret <- sub(expr, out, str) ret <- unlist(strsplit(ret, "\\|")) #show(ret) } return(ret) } --- NEW FILE: parse.R --- parse.spec <- function(specfile) { lines = readLines(specfile) labels = list() for (i in 1:length(lines)) { line = lines[i] line = sub("^.*labels\\s*\\((.*)\\).*$", "labels \\1", line, perl=TRUE) if (substr(line, 1, 6)=="labels") { labels = unlist(strsplit(substr(line,7,9999999),",")) } } return(list(labels=labels)) } parse.labels <- function(specfile) { lines = readLines(specfile) labels = list() for (i in 1:length(lines)) { line = lines[i] line = sub("^.*labels\\s*\\((.*)\\).*$", "labels \\1", line, perl=TRUE) if (substr(line, 1, 6)=="labels") { labels = unlist(strsplit(substr(line,7,9999999),",")) } } return(labels) } parse.infoline <- function(line, num.classes) { s <- unlist(strsplit(line, ":")) id <- as.numeric(s[1]) margin <- as.numeric(s[2]) if (num.classes <= 2) { score <- as.numeric(s[3]) label <- as.numeric(s[4]) } else { score <- as.numeric(unlist(strsplit(s[3],","))) label <- as.numeric(unlist(strsplit(s[4],","))) } return(list(id=id, margin=margin, score=score, label=label)) } parse.params <- function(param.line) { s <- unlist(strsplit(param.line, ":")) iterstr <- unlist(strsplit(s[1], "=")) iter <- as.numeric(iterstr[2]) elts <- unlist(strsplit(s[2], "=")) num.examples <- as.numeric(elts[2]) boost.params <- unlist(strsplit(s[3], "=")) boost.params <- c("booster", boost.params[2:length(boost.params)]) boost.params <- paste(boost.params, collapse="=") return( list(iter=iter, num.examples=num.examples, boost.params=boost.params) ) } parse.info <- function(fname,specname) { labels <- parse.labels(specname) num.classes <- length(labels) con <- file(fname, open="r") param.line <- scan(con, what="raw", nlines=1, sep="\n", quiet=T) params <- parse.params(param.line) num.examples <- params$num.examples example.lines <- scan(con, what="raw", nlines=num.examples, sep="\n", quiet=T) #show(example.lines) parsed.iters = list() i <- 0 while(TRUE) { i <- i + 1 parsed.iters[[i]] <- list() parsed.iters[[i]]$params <- parse.params(param.line) parsed.iters[[i]]$data <- lapply(example.lines, parse.infoline, num.classes=num.classes) param.line <- scan(con, what="raw", nlines=1, sep="\n", quiet=T) if (length(param.line)==0) { break; } example.lines <- scan(con, what="raw", nlines=num.examples, sep="\n", quiet=T) } close(con) return(parsed.iters) } get.iters <- function(x, iters) { ret <- list() if (length(iters)==1) { if (iters < 1) { ret[[1]] <- x[[length(x)]] } else { ret[[1]] <- x[[iters]] } } else { i <- 1 for (j in 1:length(x)) { if (j %in% iters) { ret[[i]] <- x[[j]] i <- i + 1 } } } return(ret) } print.usage <- function() { show("\t -i (--info) file.info \t File containing runtime information (required) \n") show("\t -s (--spec) file.spec \t Spec file (optional, will default to spec file in info file) \n") show("\t -t (--tree) file.tree \t File containing the ADTree in text format (required) \n") show("\t -d (--dir) directory \t Directory to use (optional, defaults to '.') \n") show("\t -l (--labels) \t Flip the labels (-1 becomes +1) (optional)\n") show("\t --truncate \t Truncate threshold values to increase readability\n") show("\t --threshold num \t A given depth to stop the tree from becoming too large. (optional) \n") show("\t -h (--help) \t Print this usage information \n") } parse.args <- function(argv) { tree = "" flip = FALSE depth = 0 spec = "" for (i in 1:length(argv)) { arg = argv[i] if (arg=="--tree") { tree = argv[i+1] } else if (arg=="--flip") { flip = TRUE } else if (arg=="--depth") { depth = as.numeric(argv[i+1]) } else if (arg=="--spec") { spec = argv[i+1] } } return(list(tree=tree, flip=flip, depth=depth, spec=spec)) } |