[R-gregmisc-users] SF.net SVN: r-gregmisc:[1486] trunk/gplots
Brought to you by:
warnes
|
From: <wa...@us...> - 2011-09-01 19:37:45
|
Revision: 1486
http://r-gregmisc.svn.sourceforge.net/r-gregmisc/?rev=1486&view=rev
Author: warnes
Date: 2011-09-01 19:37:38 +0000 (Thu, 01 Sep 2011)
Log Message:
-----------
Improvements to ci2d():
- Add option to utilize KernDensity::bkde2D to estimate the 2-d
density (now the default).
- Add option to display points for original data on generated plots
- Name elements of returned contour list according to the significance
level to make it easier to select desired contours.
- Improve handling of x and y labels
Modified Paths:
--------------
trunk/gplots/R/ci2d.R
trunk/gplots/man/ci2d.Rd
Modified: trunk/gplots/R/ci2d.R
===================================================================
--- trunk/gplots/R/ci2d.R 2011-09-01 19:26:23 UTC (rev 1485)
+++ trunk/gplots/R/ci2d.R 2011-09-01 19:37:38 UTC (rev 1486)
@@ -1,3 +1,5 @@
+## $Id$
+
## first(...) selects the first element of which(...)
first <- function(x,...)
{
@@ -19,24 +21,78 @@
}
## non-parametric 2 dimensional approximate confidence interval
-## based on 2 dimensional histogram
ci2d <- function(x,
y = NULL,
- nbins=25,
+ nbins=51,
+ method=c("bkde2D","hist2d"),
+ bandwidth,
+ factor=1.0,
+
ci.levels=c(0.50,0.75,0.90,0.95,0.975),
+
show=c("filled.contour","contour","image","none"),
- xlab=deparse(substitute(x)),
- ylab=deparse(substitute(y)),
col=topo.colors(length(breaks)-1),
+ show.points=FALSE,
+ pch=par("pch"),
+ points.col="red",
+ xlab, ylab,
...)
{
- show=match.arg(show)
+ show <- match.arg(show)
+ method <- match.arg(method)
breaks <- unique(c(0, ci.levels, 1.0))
-
- h2d <- hist2d(x,y, show=FALSE, nbins=nbins, ...)
- h2d$density <- h2d$counts / sum(h2d$counts, na.rm=TRUE)
+ # get labels for x and y
+ if (missing(xlab))
+ xlab <- if (missing(x))
+ ""
+ else deparse(substitute(x))
+ if (missing(ylab))
+ ylab <- if (missing(y))
+ ""
+ else deparse(substitute(y))
+
+ if(!is.null(y))
+ x <- cbind(x,y)
+
+ if(method=="hist2d")
+ {
+ h2d <- hist2d(x,
+ show=FALSE,
+ nbins=nbins,
+ ...)
+ ## normalize
+ h2d$density <- h2d$counts / sum(h2d$counts, na.rm=TRUE)
+ }
+ else if (method=="bkde2D")
+ {
+ if(length(nbins)==1)
+ nbins <- c(nbins, nbins)
+
+ if(missing(bandwidth))
+ {
+ h.x = dpik(x[,1])
+ h.y = dpik(x[,2])
+ bandwidth <- c(h.x, h.y)
+ }
+
+ est <- bkde2D(x,
+ bandwidth=bandwidth*factor,
+ gridsize=nbins,
+ ...
+ )
+
+ h2d <- list()
+ h2d$x <- est$x1
+ h2d$y <- est$x2
+ h2d$counts <- est$fhat
+ h2d$density <- est$fhat / sum(est$fhat) # normalize
+
+ }
+ else
+ stop("Unknown method: '", method, "'")
+
uniqueVals <- rev(unique(sort(h2d$density)))
cumProbs <- sapply(uniqueVals,
function(val) sum( h2d$density[h2d$density>=val] ) )
@@ -49,14 +105,26 @@
image(h2d$x, h2d$y, h2d$cumDensity,
xlab=xlab, ylab=ylab,
breaks=breaks, col=col)
+ if(show.points)
+ points(x[,1], x[,2], pch=pch, col=points.col);
}
else if(show=="filled.contour")
{
+ if(show.points)
+ plot.title <- function() {
+ points(x[,1], x[,2], pch=pch, col=points.col);
+ }
+ else
+ plot.title <- function() {}
+
+
filled.contour(h2d$x, h2d$y, h2d$cumDensity,
levels=breaks,
col=col,
- key.axes={ axis(4, at=breaks);
- title("\nCI Level") }
+ xlab=xlab,
+ ylab=ylab,
+ plot.title=plot.title(),
+ key.title=title("\nCI Level")
)
}
else if(show=="contour")
@@ -65,14 +133,26 @@
contour(h2d$x, h2d$y, h2d$cumDensity,
levels=tmpBreaks,
labels=tmpBreaks,
+ xlab=xlab,
+ ylab=ylab,
nlevels=length(tmpBreaks),
col=col
)
+ if(show.points)
+ points(x[,1], x[,2], pch=pch, col=points.col);
}
-
h2d$contours <- contourLines(h2d$x, h2d$y, h2d$cumDensity,
levels=breaks, nlevels=length(breaks))
- names
+
+ # use the confidence level value as the name in the contour list
+ names(h2d$contours) <- sapply(h2d$contours, function(x) x$level)
+
+ # convert each contour into a (x,y) dataframe
+ h2d$contours <- lapply( h2d$contours,
+ function(J) data.frame(x=J$x, y=J$y) )
+
+ h2d$call <- match.call()
+
invisible(h2d)
}
Modified: trunk/gplots/man/ci2d.Rd
===================================================================
--- trunk/gplots/man/ci2d.Rd 2011-09-01 19:26:23 UTC (rev 1485)
+++ trunk/gplots/man/ci2d.Rd 2011-09-01 19:37:38 UTC (rev 1486)
@@ -1,3 +1,4 @@
+% $Id$
\name{ci2d}
\alias{ci2d}
\title{
@@ -4,18 +5,23 @@
Create 2-dimensional empirical confidence regions
}
\description{
- Create 2-dimensional empirical confidence regions based on a
- 2-dimensoinal histogram.
+ Create 2-dimensional empirical confidence regions from provided data.
}
\usage{
-ci2d(x, y=NULL,
- nbins=25,
- ci.levels=c(0.5, 0.75, 0.9, 0.95, 0.975),
- show=c("filled.contour", "contour", "image", "none"),
- xlab=deparse(substitute(x)),
- ylab=deparse(substitute(y)),
- col=topo.colors(length(breaks) - 1),
- ...)
+ci2d <- function(x,
+ y = NULL,
+ nbins=51,
+ method=c("bkde2D","hist2d"),
+ bandwidth,
+ factor=1.0,
+ ci.levels=c(0.50,0.75,0.90,0.95,0.975),
+ show=c("filled.contour","contour","image","none"),
+ col=topo.colors(length(breaks)-1),
+ show.points=FALSE,
+ pch=par("pch"),
+ points.col="red",
+ xlab, ylab,
+ ...)
}
\arguments{
\item{x}{either a vector containing the x coordinates
@@ -23,51 +29,232 @@
\item{y}{a vector contianing the y coordinates, not required if `x'
is matrix}
\item{nbins}{number of bins in each dimension. May be a scalar or a
- 2 element vector. Defaults to 25.}
+ 2 element vector. Defaults to 51.}
+ \item{method}{One of "bkde2D" (for KernSmooth::bdke2d) or "hist2d"
+ (for gplots::hist2d) specifyting the name of the method to create
+ the 2-d density summarizing the data. Defaults to "bkde2D".}
+ \item{bandwidth}{Bandwidth to use for \code{KernSmooth::bkde2D}.
+ See below for default value. }
\item{ci.levels}{Confidence level(s) to use for plotting
data. Defaults to \code{c(0.5, 0.75, 0.9, 0.95, 0.975)} }
\item{show}{Plot type to be displaed. One of "filled.contour",
"contour", "image", or "none". Defaults to "filled.contour".}
+ \item{show.points}{Boolean indicating whether original data values
+ should be plotted. Defaults to \code{TRUE}.}
+ \item{pch}{Point type for plots. See \code{points} for details.}
+ \item{points.col}{Point color for plotting original data. Defaiults to
+ "red".}
+ \item{col}{Colors to use for plots.}
\item{xlab, ylab}{Axis labels}
- \item{col}{Colors to use for plots.}
- \item{\dots}{Additional arguments passed to \code{hist2d}. }
+ \item{\dots}{Additional arguments passed to \code{KernSmooth::bkde2D}
+ or \code{gplots::hist2d}. }
}
\details{
- This function utilizes \code{hist2d} to create a 2-dimensional
- histogram of the data passed as an argument. This data is then
- converted into densities and used to create and display confidence
- regions.
+ This function utilizes either \code{KernSmooth::bkde2D} or
+ \code{gplots::hist2d} to estmate a 2-dimensional density of the data
+ passed as an argument. This density is then used to create and
+ (optionally) display confidence regions.
+
+ When \code{bandwidth} is ommited and \code{method="bkde2d"},
+ \code{KernSmooth::dpik} is appled in x and y dimensions to select the
+ bandwidth.
+
}
+\note{
+ Confidence intervals generated by ci2d are \emph{approximate}, and
+ are subject to biases and/or artifacts induced by the binning or
+ kernel smoothing method, bin locations, bin sizes, and kernel bandwidth.
+ }
\value{
- A list containing 3 elements:
- \item{counts}{Matrix containing the number of points falling into each
- bin}
- \item{x}{lower x limit of each bin}
- \item{y}{lower y limit of each bin}
- \item{density}{Matrix containing the probability density of each bin (count in bin/total
- count)}
- \item{cumDensity}{Matrix where each element contains the cumulative probability density
- of all elements with the same density (used to create the confidence
- region plots) }
- \item{contours}{Contours of each confidence region}
+ A list containing (at least) the following elements:
+ \item{x}{x position of each density estimate bin}
+ \item{y}{y position of each density estimate bin}
+ \item{density}{Matrix containing the probability density of each bin
+ (count in bin/total count)}
+ \item{cumDensity}{Matrix where each element contains the cumulative
+ probability density of all elements with the same density (used to
+ create the confidence region plots) }
+ \item{contours}{Contours of each confidence region}.
+ \item{call}{Call used to create this object}
}
%\references{
%}
\author{ Gregory R. Warnes \email{gr...@wa...}}
\seealso{
+ \code{\link[KernSmooth]{bkde2D}}, \code{\link[KernSmooth]{dpik}},
\code{\link{hist2d}}
}
\examples{
- # example data, bivariate normal, no correlation
+ ####
+ ## Basic usage
+ ####
+ data(geyser, package="MASS")
+
+ x <- geyser$duration
+ y <- geyser$waiting
+
+ # 2-d confidence intervals based on binned kernel density estimate
+ ci2d(x,y) # filled contour plot
+ ci2d(x,y, show.points=TRUE) # show original data
+
+
+ # image plot
+ ci2d(x,y, show="image")
+ ci2d(x,y, show="image", show.points=TRUE)
+
+ # contour plot
+ ci2d(x,y, show="contour", col="black")
+ ci2d(x,y, show="contour", col="black", show.points=TRUE)
+
+ ####
+ ## Control Axis scales
+ ####
x <- rnorm(2000, sd=4)
y <- rnorm(2000, sd=1)
+ # 2-d confidence intervals based on binned kernel density estimate
+ ci2d(x,y)
+
# 2-d confidence intervals based on 2d histogram
+ ci2d(x,y, method="hist2d", nbins=25)
+
+ # Require same scale for each axis, this looks oval
+ ci2d(x,y, range.x=list(c(-20,20), c(-20,20)))
+ ci2d(x,y, method="hist2d", same.scale=TRUE, nbins=25) # hist2d
+
+ ####
+ ## Control smoothing and binning
+ ####
+ x <- rnorm(2000, sd=4)
+ y <- rnorm(2000, mean=x, sd=2)
+
+ # Default 2-d confidence intervals based on binned kernel density estimate
ci2d(x,y)
- # same scale for each axis, this looks oval
- ci2d(x,y, same.scale=TRUE)
+ # change the smoother bandwidth
+ ci2d(x,y,
+ bandwidth=c(sd(x)/8, sd(y)/8)
+ )
+ # change the smoother number of bins
+ ci2d(x,y, nbins=10)
+ ci2d(x,y)
+ ci2d(x,y, nbins=100)
+
+ # Default 2-d confidence intervals based on 2d histogram
+ ci2d(x,y, method="hist2d", show.points=TRUE)
+
+ # change the number of histogram bins
+ ci2d(x,y, nbin=10, method="hist2d", show.points=TRUE )
+ ci2d(x,y, nbin=25, method="hist2d", show.points=TRUE )
+
+ ####
+ ## Perform plotting manually
+ ####
+ data(geyser, package="MASS")
+
+ # let ci2d handle plotting contours...
+ ci2d(geyser$duration, geyser$waiting, show="contour", col="black")
+
+ # call contour() directly, show the 90 percent CI, and the mean point
+ est <- ci2d(geyser$duration, geyser$waiting, show="none")
+ contour(est$x, est$y, est$cumDensity,
+ xlab="duration", ylab="waiting",
+ levels=0.90, lwd=4, lty=2)
+ points(mean(geyser$duration), mean(geyser$waiting),
+ col="red", pch="X")
+
+
+ ####
+ ## Extract confidence region values
+ ###
+ data(geyser, package="MASS")
+
+ ## Empirical 90 percent confidence limits
+ quantile( geyser$duration, c(0.05, 0.95) )
+ quantile( geyser$waiting, c(0.05, 0.95) )
+
+ ## Bivariate 90 percent confidence region
+ est <- ci2d(geyser$duration, geyser$waiting, show="none")
+ names(est$contours) ## show available contours
+
+ ci.90 <- est$contours[names(est$contours)=="0.9"] # get region(s)
+ ci.90 <- rbind(ci.90[[1]],NA, ci.90[[2]], NA, ci.90[[3]]) # join them
+
+ print(ci.90) # show full contour
+ range(ci.90$x, na.rm=TRUE) # range for duration
+ range(ci.90$y, na.rm=TRUE) # range for waiting
+
+ ####
+ ## Visually compare confidence regions
+ ####
+ data(geyser, package="MASS")
+
+ ## Bivariate smoothed 90 percent confidence region
+ est <- ci2d(geyser$duration, geyser$waiting, show="none")
+ names(est$contours) ## show available contours
+
+ ci.90 <- est$contours[names(est$contours)=="0.9"] # get region(s)
+ ci.90 <- rbind(ci.90[[1]],NA, ci.90[[2]], NA, ci.90[[3]]) # join them
+
+ plot( waiting ~ duration, data=geyser,
+ main="Comparison of 90 percent confidence regions" )
+ polygon( ci.90, col="green", border="green", density=10)
+
+ ## Univariate Normal-Theory 90 percent confidence region
+ mean.x <- mean(geyser$duration)
+ mean.y <- mean(geyser$waiting)
+ sd.x <- sd(geyser$duration)
+ sd.y <- sd(geyser$waiting)
+
+ t.value <- qt(c(0.05,0.95), df=nobs(geyser$duration), lower=TRUE)
+ ci.x <- mean.x + t.value* sd.x
+ ci.y <- mean.y + t.value* sd.y
+
+ plotCI(mean.x, mean.y,
+ li=ci.x[1],
+ ui=ci.x[2],
+ barcol="blue", col="blue",
+ err="x",
+ pch="X",
+ add=TRUE )
+
+ plotCI(mean.x, mean.y,
+ li=ci.y[1],
+ ui=ci.y[2],
+ barcol="blue", col="blue",
+ err="y",
+ pch=NA,
+ add=TRUE )
+
+# rect(ci.x[1], ci.y[1], ci.x[2], ci.y[2], border="blue",
+# density=5,
+# angle=45,
+# col="blue" )
+
+
+ ## Empirical univariate 90 percent confidence region
+ box <- cbind( x=quantile( geyser$duration, c(0.05, 0.95 )),
+ y=quantile( geyser$waiting, c(0.05, 0.95 )) )
+
+ rect(box[1,1], box[1,2], box[2,1], box[2,2], border="red",
+ density=5,
+ angle=-45,
+ col="red" )
+
+ ## now a nice legend
+ legend( "topright", legend=c(" Region type",
+ "Univariate Normal Theory",
+ "Univarite Empirical",
+ "Smoothed Bivariate"),
+ lwd=c(NA,1,1,1),
+ col=c("black","blue","red","green"),
+ lty=c(NA,1,1,1)
+ )
+
+
+
+
}
% Add one or more standard keywords, see file 'KEYWORDS' in the
% R documentation directory.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|