[R-gregmisc-users] SF.net SVN: r-gregmisc: [1271] trunk/gplots
Brought to you by:
warnes
From: <wa...@us...> - 2008-05-20 00:14:56
|
Revision: 1271 http://r-gregmisc.svn.sourceforge.net/r-gregmisc/?rev=1271&view=rev Author: warnes Date: 2008-05-19 17:14:51 -0700 (Mon, 19 May 2008) Log Message: ----------- Add Venn Diagram plot submitted by Steffen Moeller Modified Paths: -------------- trunk/gplots/DESCRIPTION trunk/gplots/NAMESPACE Added Paths: ----------- trunk/gplots/R/venn.R trunk/gplots/man/venn.Rd Modified: trunk/gplots/DESCRIPTION =================================================================== --- trunk/gplots/DESCRIPTION 2008-05-13 03:16:08 UTC (rev 1270) +++ trunk/gplots/DESCRIPTION 2008-05-20 00:14:51 UTC (rev 1271) @@ -2,10 +2,13 @@ Title: Various R programming tools for plotting data Description: Various R programming tools for plotting data Depends: R (>= 1.9.0), gtools, gdata, stats, caTools -Recommends: datasets -Suggests: gtools, gdata -Version: 2.6.0 +Recommends: datasets, grid +Suggests: gtools, grid +Version: 2.7.0 Author: Gregory R. Warnes. Includes R source code and/or documentation - contributed by Ben Bolker and Thomas Lumley + contributed by (in alphabetical order): + Ben Bolker, Lodewijk Bonebakker, Robert Gentleman, Wolfgang + Huber Andy Liaw, Thomas Lumley, Martin Maechler, Arni + Magnusson, Steffen Moeller, Marc Schwartz, Bill Venables Maintainer: Gregory R. Warnes <wa...@bs...> License: GPL-2 Modified: trunk/gplots/NAMESPACE =================================================================== --- trunk/gplots/NAMESPACE 2008-05-13 03:16:08 UTC (rev 1270) +++ trunk/gplots/NAMESPACE 2008-05-20 00:14:51 UTC (rev 1271) @@ -25,6 +25,7 @@ smartlegend, space, textplot, + venn, wapply ) Added: trunk/gplots/R/venn.R =================================================================== --- trunk/gplots/R/venn.R (rev 0) +++ trunk/gplots/R/venn.R 2008-05-20 00:14:51 UTC (rev 1271) @@ -0,0 +1,437 @@ +# This code plots Venn Diagrams for up to 5 sets. The +# function getVennCounts is passed a list of vectors. +# This is transformed into a table indicating the +# number of members for each intersection. This table +# is generated for any number of sets. + +# The function drawVennDiagram plots circles (up to three +# sets) or ellipses (4 and 5 sets) to depict the sets. +# The sum of values placed is the number of entries of +# each set. + +# transform base +# v = value of base 10 to be transformed +# b = new base +# l = minimal length of returned array (default is 1) +# return value: array of factors, highest exponent first +baseOf<-function(v,b,l=1) { + remainder<-v + i<-l + ret<-NULL + while(remainder>0 || i>0) { + #print(paste("i=",i," remainder=",remainder)) + m<-remainder%%b + if (is.null(ret)) { + ret<-m + } + else { + ret<-c(m,ret) + } + remainder <- remainder %/% b + i<-i-1 + } + return(ret) +} + +# Function to determine values of a venn diagram +# It works for an arbitrary large set of input sets. +# + +getVennCounts <- function(l, universe, ...) + UseMethod("getVennCounts") + +getVennCounts.data.frame <- function(l, universe=NA, ...) + { + if( !all(unique(unlist(l)) %in% c(0,1)) ) + stop("Only indicator columns permitted") + + l <- sapply( l, function(x) which(as.logical(x))) + getVennCounts.list(l) + } + +getVennCounts.list<-function(l,universe=NA) { + numSets<-length(l) + result.table<-NULL + result.table.names<-NULL + for (i in 0:(-1 + 2^numSets)) { + # i2 is a binary representation of that number + i2<-baseOf(i,2,numSets) + + # some debug output + #print(paste(i,":",paste(i2,collapse="",sep=""))) + + # p.pos determines the position in number + # which is also the set that is inspected + + sel<-universe + + # positive selection first + for (p.pos in which(1 == i2) ) { + current.set<-l[[p.pos]] + #print(paste("set ",p.pos,", val=1: ",paste(current.set,collapse=","))) + if (is.null(sel)) { + #print("Sel is null") + } else if (1 == length(sel) && is.na(sel)) { + sel<-current.set + } + else { + w<-which(sel %in% current.set) + if (length(w)>0) { + sel<-sel[w] + } + else { + sel<-NULL + } + } + } + + # something should be in sel now, otherwise + # the number will be 0 + + # negative selection + for (p.pos in which(0 == i2) ) { + if (is.null(sel) || ( 1 == length(sel) && is.na(sel))) { + # The complement is not known, hence no checks done + } + else { + current.set<-l[[p.pos]] + w<-which( ! sel %in% current.set) + #print(paste("set ",p.pos,", val=1: ",paste(current.set,collapse=","))) + if (length(w)>0) { + sel<-sel[w] + } + else { + sel<-NULL + } + } + } + #print(paste("sel:",paste(sel,collapse=","))) + + if(is.null(sel) || (1 == length(sel) && is.na(sel))) { + sel<-NULL + } + + r<-length(sel) + r.name<-paste(i2,collapse="") + result.row<-c(r,i2) + dim(result.row)<-c(1,length(result.row)) + rownames(result.row)<-c(r.name) + #print(paste("Adding ",r.name)) + if (is.null(result.table)) { + result.table<-result.row + } + else { + result.table<-rbind(result.table,result.row) + } + #if (is.null(result.table)) { + # result.table<-r + # result.table.names<-r.name + #} + #else { + # result.table<-c(result.table,r) + # result.table.names<-c(result.table.names,r.name) + #} + } + #names(result.table)<-result.table.names + if (is.null(names(l))) { + colnames(result.table)<-c("num",LETTERS[1:numSets]) + } + else{ + colnames(result.table)<-c("num",names(l)) + } + return(result.table) +} + +#print(getVennCounts(list(A,B,C,D))) +#print(getVennCounts(list(a=A,b=B,c=C,d=D))) + + +## data should be a matrix. +## - The first column of the matrix is the +## count of the number of objects with the specified pattern. +## - The second and subsequent columns contain 0-1 indicators +## giving the pattern of group membership + + +drawVennDiagram <-function(data,small=0.7, + showSetLogicLabel=FALSE,simplify=FALSE) { + numCircles<-NA + data.colnames<-NULL + data.rownames<-NULL + if(is.matrix(data)) { + numCircles<-ncol(data)-1 + data.colnames<-colnames(data)[2:(ncol(data))] + # Order is reverted since later indexing starts with + # the "lowest bit" and that is expected at the left + data.rownames<-rownames(data) + } + else { + cat("Testing only, presuming first argument to specify", + "the number of circles to draw.\n") + numCircles<-data + } + + m<-(0:(-1+2^numCircles)) + + if (! is.matrix(data)) { + ##cat("prepare randomised data\n") + data<-t(sapply(X=m,FUN=function(v){ + l<-baseOf(v,2,numCircles) + #print(l) + return(l) + })) + + #print(data) + + #data.names<-apply(data,1,function(X){ + # return(paste(X),collapse="") + #}) + for(i in m) { + n<-paste(data[i+1,],collapse="") + if (is.null(data.rownames)) { + data.rownames<-n + } + else { + data.rownames<-c(data.rownames,n) + } + } + #print(data.rownames) + data<-cbind(sample(1:100,size=2^numCircles,replace=TRUE),data) + #print(data) + rownames(data)<-data.rownames + data.colnames<-LETTERS[1:numCircles] + colnames(data)<-c("num",data.colnames) + } + + if ((2 <= numCircles && numCircles <= 3) || (4 == numCircles && simplify)) { + ##cat("drawing circles\n") + # draw circles with radius 1.7 equally distributed + # with centers on a circle of radius 1 + + degrees<-2*pi/numCircles*(1:numCircles) + + # scaling factor + s<-1/8 + + x<-sapply(degrees,FUN=sin)*s + 0.5 + y<-sapply(degrees,FUN=cos)*s + 0.5 + + + if(!require(grid)) { + stop("Need access to 'grid' library.") + } + grid.newpage() + grid.circle(x,y,3/12,name="some name") + + ##cat("filling data\n") + + distFromZero<-rep(NA,2^numCircles) + degrees<-rep(NA,2^numCircles) + + degrees[(2^numCircles)]<-0 + distFromZero[(2^numCircles)]<-0 + + for (i in 0:(numCircles-1)) { + distFromZero[2^i+1] <- 4/12 + degrees[2^i+1] <- 2*pi/numCircles*i + d<-degrees[2^i+1] + + #print(data.colnames) + + grid.text( + # starting from the lowest bit, hence reading + # lables from the right + label=data.colnames[numCircles - i], + x=sin(d)*5/12+0.5, + y=cos(d)*5/12+0.5, + rot=0 + ) + + } + + if (4==numCircles) { + for (i in 0:(numCircles-1)) { + # Current set bit plus the bit left of it and the bit right of it + distFromZero[2^i + +2^((i+numCircles-1)%%numCircles) + +2^((i+1)%%numCircles)+1] <- 2/12 + degrees[2^i + +2^((i+numCircles-1)%%numCircles) + +2^((i+1)%%numCircles)+1] <- degrees[2^i+1] + } + } + + #degrees[2^i+1] + degrees[2^((i+1)%%numCircles)+1])/2 + + if (3 <=numCircles) { + for (i in 0:(numCircles-1)) { + distFromZero[(2^i+2^((i+1)%%numCircles))+1]<- 3/12 + if (i == (numCircles-1)) { + degrees[(2^i+2^((i+1)%%numCircles))+1] <- ( + degrees[2^i+1] + 2*pi+ degrees[1+1])/2 + } + else { + degrees[(2^i+2^((i+1)%%numCircles))+1] <- ( + degrees[2^i+1] + degrees[2^((i+1)%%numCircles)+1])/2 + } + + } + } + + for(i in 1:2^numCircles) { + n<-paste(baseOf((i-1),2,numCircles),collapse="") + v<-data[n,1] + d<-degrees[i] + if (1 == length(d) && is.na(d)) { + if (v>0) warning("Not shown: ",n,"is",v,"\n") + } + else { + l<-distFromZero[i] + x<-sin(d)*l+0.5 + y<-cos(d)*l+0.5 + #cat("i=",i," x=",x," y=",y," label=",n,"\n") + l<-v + if (showSetLogicLabel) l<-paste(n,"\n",v,sep="") + grid.text(label=l,x=x,y=y,rot=0) + } + } + } + else if (4 <= numCircles && numCircles <= 5 && !simplify) { + + grid.newpage() + # Function to turn and move ellipses + relocate_elp <- function(e, alpha, x, y){ + phi=(alpha/180)*pi; + xr=e[,1]*cos(phi)+e[,2]*sin(phi) + yr=-e[,1]*sin(phi)+e[,2]*cos(phi) + xr=x+xr; + yr=y+yr; + return(cbind(xr, yr)) + } + + lab<-function (identifier, data, showLabel=showSetLogicLabel) { + r<-data[identifier,1] + if (showLabel) { + return(paste(identifier,r,sep="\n")) + } + else { + return(r) + } + } + + plot(c(0, 400), c(0, 400), type="n", axes=F, ylab="", xlab="") + if (4 == numCircles) { + elps=cbind(162*cos(seq(0,2*pi,len=1000)), 108*sin(seq(0,2*pi,len=1000))); + + plot(c(0, 400), c(0, 400), type="n", axes=F, ylab="", xlab=""); + polygon(relocate_elp(elps, 45,130, 170)); + polygon(relocate_elp(elps, 45,200, 200)); + polygon(relocate_elp(elps, 135,200, 200)); + polygon(relocate_elp(elps, 135,270, 170)); + + text( 35, 315, data.colnames[1],cex=1.5) + text(138, 347, data.colnames[2],cex=1.5) + text(262, 347, data.colnames[3],cex=1.5) + text(365, 315, data.colnames[4],cex=1.5) + + elps <- cbind(130*cos(seq(0,2*pi,len=1000)), + 80*sin(seq(0,2*pi,len=1000))) + + text( 35, 250, lab("1000",data)); + text(140, 315, lab("0100",data)); + text(260, 315, lab("0010",data)); + text(365, 250, lab("0001",data)); + + text( 90, 280, lab("1100",data), cex=small) + text( 95, 110,lab("1010",data) ) + text(200, 50, lab("1001",data), cex=small) + text(200, 290, lab("0110",data)) + text(300, 110, lab("0101",data)) + text(310, 280, lab("0011",data), cex=small) + + text(130, 230, lab("1110",data)) + text(245, 75, lab("1101",data),cex=small) + text(155, 75, lab("1011",data),cex=small) + text(270, 230, lab("0111",data)) + + text(200,150,lab("1111",data)) + } + else if (5 == numCircles) { + + elps <- cbind(150*cos(seq(0,2*pi,len=1000)), + 60*sin(seq(0,2*pi,len=1000))) + + polygon(relocate_elp(elps, 90,200, 250)) + polygon(relocate_elp(elps, 162,250, 220)) + polygon(relocate_elp(elps, 234,250, 150)) + polygon(relocate_elp(elps, 306,180, 125)) + polygon(relocate_elp(elps, 378,145, 200)) + + text( 50, 280, data.colnames[1],cex=1.5) + text(150, 400, data.colnames[2],cex=1.5) + text(350, 300, data.colnames[3],cex=1.5) + text(350, 20, data.colnames[4],cex=1.5) + text( 50, 10, data.colnames[5],cex=1.5) + + text( 61, 228, lab("10000",data)); + text(194, 329, lab("01000",data)); + text(321, 245, lab("00100",data)); + text(290, 81, lab("00010",data)); + text(132, 69, lab("00001",data)); + + text(146, 250, lab("11000",data), cex=small) + text(123, 188, lab("10100",data), cex=small) + text(275, 152, lab("10010",data), cex=small) + text(137, 146, lab("10001",data), cex=small) + text(243, 268, lab("01100",data), cex=small) + text(175, 267, lab("01010",data), cex=small) + text(187, 117, lab("01001",data), cex=small) + text(286, 188, lab("00110",data), cex=small) + text(267, 235, lab("00101",data), cex=small) + text(228, 105, lab("00011",data), cex=small) + + text(148, 210, lab("11100",data),cex=small) + text(159, 253, lab("11010",data),cex=small) + text(171, 141, lab("11001",data),cex=small) + text(281, 175, lab("10110",data),cex=small) + text(143, 163, lab("10101",data),cex=small) + text(252, 145, lab("10011",data),cex=small) + text(205, 255, lab("01110",data),cex=small) + text(254, 243, lab("01101",data),cex=small) + text(211, 118, lab("01011",data),cex=small) + text(267, 211, lab("00111",data),cex=small) + + text(170, 231,lab("11110",data),cex=small) + text(158, 169,lab("11101",data),cex=small) + text(212, 139,lab("11011",data),cex=small) + text(263, 180,lab("10111",data),cex=small) + text(239, 232,lab("01111",data),cex=small) + + text(204,190,lab("11111",data)) + } + } + else { + stop(paste("The printing of ",numCircles," circles is not yet supported.")) + } + +} + + +venn <- function(data, + universe=NA, + small=0.7, + showSetLogicLabel=FALSE, + simplify=FALSE, + show.plot=TRUE) +{ + counts <- getVennCounts(data) + + if(show.plot) + drawVennDiagram(data=counts, + small=small, + showSetLogicLabel=showSetLogicLabel, + simplify=simplify + ) + + invisible(counts) +} Added: trunk/gplots/man/venn.Rd =================================================================== --- trunk/gplots/man/venn.Rd (rev 0) +++ trunk/gplots/man/venn.Rd 2008-05-20 00:14:51 UTC (rev 1271) @@ -0,0 +1,78 @@ +\name{venn} +\alias{venn} +\title{Plot a Venn diagram} +\description{ +Plot Venn diagrams for up to 5 sets +} +\usage{ +venn(data, small = 0.7, showSetLogicLabel = FALSE, simplify = FALSE) +} +\arguments{ + \item{data}{data to be plotted (see below)} + \item{small}{Character size of group labels} + \item{showSetLogicLabel}{Logical flag indicating whether the internal + group label should be displayed} + \item{simplify}{Logical flag indicating whether unobserved group + should be omitted.} +} +\details{ + \code{data} should be either a named list of vectors containing + indexes of group members (1, 2, 3,..) , or a data frame containing indicator + variables (TRUE, FALSE, TRUE, ..) for group membership. Group names + will be taken from the component vector or column names. +} +\value{ + A matrix of all possible sets of groups, and the observed numer of + items beloinging to each set of groups is returned invisibly. + The fist column contains observed counts, subsequent columns contain + 0-1 indicators of group membership. +} +\author{Steffen Moeller \email{ste...@gm...}, + with cleanup and packaging by Gregory R. Warnes + \email{gr...@ra...}.} +\examples{ + +## +## Example using a list of item indexes belonging to the +## specified group. +## +A<- 1:20 +B<- 1:20 +C<- 2:20 +D<- 3:21 +input<-list(A,B,C,D) +input + +venn(input) + +## +## Example using a data frame of indicator columns +## +A<- as.logical(rbinom(100, 1, 0.2)) +B<- as.logical(rbinom(100, 1, 0.7)) +C<- as.logical(rbinom(100, 1, 0.2)) +D<- as.logical(rbinom(100, 1, 0.1)) +input<-data.frame(A,B,C,D) +venn(input) + + +## Omit un-observed groupings +tmp <- venn(input, simplify=T) + +## show details +tmp + +## Show internal binary group labels +venn(input, showSetLogicLabel=TRUE) + +## Omit un-observed groupings, and show internal binary group labels +venn(input, simplify=T, showSetLogicLabel=TRUE) + + + + +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ ~kwd1 } +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |