[R-gregmisc-users] SF.net SVN: r-gregmisc:[1486] trunk/gplots

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 1486
          http://r-gregmisc.svn.sourceforge.net/r-gregmisc/?rev=1486&view=rev
Author:   warnes
Date:     2011-09-01 19:37:38 +0000 (Thu, 01 Sep 2011)
Log Message:
-----------
Improvements to ci2d():

- Add option to utilize KernDensity::bkde2D to estimate the 2-d
  density (now the default).

- Add option to display points for original data on generated plots

- Name elements of returned contour list according to the significance
  level to make it easier to select desired contours.

- Improve handling of x and y labels

Modified Paths:
--------------
    trunk/gplots/R/ci2d.R
    trunk/gplots/man/ci2d.Rd

Modified: trunk/gplots/R/ci2d.R
===================================================================

--- trunk/gplots/R/ci2d.R	2011-09-01 19:26:23 UTC (rev 1485)
+++ trunk/gplots/R/ci2d.R	2011-09-01 19:37:38 UTC (rev 1486)
@@ -1,3 +1,5 @@
+## $Id$
+
 ## first(...) selects the first element of which(...)
 first <- function(x,...)
   {
@@ -19,24 +21,78 @@
 }
 
 ## non-parametric 2 dimensional approximate confidence interval
-## based on 2 dimensional histogram
 ci2d <- function(x,
                  y = NULL,
-                 nbins=25,
+                 nbins=51,
+                 method=c("bkde2D","hist2d"),
+                 bandwidth,
+                 factor=1.0,
+                 
                  ci.levels=c(0.50,0.75,0.90,0.95,0.975),
+                 
                  show=c("filled.contour","contour","image","none"),
-                 xlab=deparse(substitute(x)),
-                 ylab=deparse(substitute(y)),
                  col=topo.colors(length(breaks)-1),
+                 show.points=FALSE,
+                 pch=par("pch"),
+                 points.col="red",
+                 xlab, ylab, 
                  ...)
   {
 
-    show=match.arg(show)
+    show <- match.arg(show)
+    method <- match.arg(method)
     breaks <- unique(c(0, ci.levels, 1.0))
-    
-    h2d <- hist2d(x,y, show=FALSE, nbins=nbins, ...)
-    h2d$density <- h2d$counts / sum(h2d$counts, na.rm=TRUE)
 
+    # get labels for x and y
+    if (missing(xlab)) 
+      xlab <- if (missing(x)) 
+        ""
+      else deparse(substitute(x))
+    if (missing(ylab)) 
+      ylab <- if (missing(y)) 
+        ""
+      else deparse(substitute(y))
+
+    if(!is.null(y))
+      x <- cbind(x,y)
+
+    if(method=="hist2d")
+      {
+        h2d <- hist2d(x,
+                      show=FALSE,
+                      nbins=nbins,
+                      ...)
+        ## normalize
+        h2d$density <- h2d$counts / sum(h2d$counts, na.rm=TRUE)
+      }
+    else if (method=="bkde2D")
+      {
+        if(length(nbins)==1)
+          nbins <- c(nbins, nbins)
+
+        if(missing(bandwidth))
+          {
+            h.x = dpik(x[,1])
+            h.y = dpik(x[,2])
+            bandwidth <-  c(h.x, h.y)
+          }
+
+        est <- bkde2D(x,
+                      bandwidth=bandwidth*factor,
+                      gridsize=nbins,
+                      ...
+                      )
+
+        h2d <- list()
+        h2d$x <- est$x1
+        h2d$y <- est$x2
+        h2d$counts <- est$fhat
+        h2d$density <- est$fhat / sum(est$fhat) # normalize
+
+      }
+    else
+      stop("Unknown method: '", method, "'")
+
     uniqueVals <- rev(unique(sort(h2d$density)))
     cumProbs <- sapply(uniqueVals,
                        function(val) sum( h2d$density[h2d$density>=val] ) )
@@ -49,14 +105,26 @@
         image(h2d$x, h2d$y, h2d$cumDensity,
               xlab=xlab, ylab=ylab,
               breaks=breaks, col=col)
+        if(show.points)
+            points(x[,1], x[,2], pch=pch, col=points.col);          
       }
     else if(show=="filled.contour")
       {
+        if(show.points)
+          plot.title <- function() {
+            points(x[,1], x[,2], pch=pch, col=points.col);
+          }
+        else
+          plot.title <- function() {}
+
+        
         filled.contour(h2d$x, h2d$y, h2d$cumDensity,
                        levels=breaks,
                        col=col,
-                       key.axes={ axis(4, at=breaks);
-                                  title("\nCI Level") }
+                       xlab=xlab,
+                       ylab=ylab,
+                       plot.title=plot.title(),
+                       key.title=title("\nCI Level")
                        )
        }
     else if(show=="contour")
@@ -65,14 +133,26 @@
         contour(h2d$x, h2d$y, h2d$cumDensity,
                 levels=tmpBreaks,
                 labels=tmpBreaks,
+                xlab=xlab,
+                ylab=ylab,
                 nlevels=length(tmpBreaks),
                 col=col
                 )
+        if(show.points)
+            points(x[,1], x[,2], pch=pch, col=points.col);          
       }
-    
 
     h2d$contours <- contourLines(h2d$x, h2d$y, h2d$cumDensity,
                                  levels=breaks, nlevels=length(breaks))
-    names
+
+    # use the confidence level value as the name in the contour list
+    names(h2d$contours) <- sapply(h2d$contours, function(x) x$level)
+
+    # convert each contour into a (x,y) dataframe 
+    h2d$contours <- lapply( h2d$contours,
+                            function(J) data.frame(x=J$x, y=J$y) )
+
+    h2d$call <- match.call()
+    
     invisible(h2d)
   }

Modified: trunk/gplots/man/ci2d.Rd
===================================================================
--- trunk/gplots/man/ci2d.Rd	2011-09-01 19:26:23 UTC (rev 1485)
+++ trunk/gplots/man/ci2d.Rd	2011-09-01 19:37:38 UTC (rev 1486)
@@ -1,3 +1,4 @@
+% $Id$
 \name{ci2d}
 \alias{ci2d}
 \title{
@@ -4,18 +5,23 @@
   Create 2-dimensional empirical confidence regions
 }
 \description{
-  Create 2-dimensional empirical confidence regions based on a
-  2-dimensoinal histogram.
+  Create 2-dimensional empirical confidence regions from provided data.
 }
 \usage{
-ci2d(x, y=NULL,
-     nbins=25,
-     ci.levels=c(0.5, 0.75, 0.9, 0.95, 0.975),
-     show=c("filled.contour", "contour", "image", "none"),
-     xlab=deparse(substitute(x)),
-     ylab=deparse(substitute(y)),
-     col=topo.colors(length(breaks) - 1),
-     ...)
+ci2d <- function(x,
+                 y = NULL,
+                 nbins=51,
+                 method=c("bkde2D","hist2d"),
+                 bandwidth,
+                 factor=1.0,
+                 ci.levels=c(0.50,0.75,0.90,0.95,0.975),
+                 show=c("filled.contour","contour","image","none"),
+                 col=topo.colors(length(breaks)-1),
+                 show.points=FALSE,
+                 pch=par("pch"),
+                 points.col="red",
+                 xlab, ylab, 
+                 ...)
 }
 \arguments{
   \item{x}{either a vector containing the x coordinates
@@ -23,51 +29,232 @@
   \item{y}{a vector contianing the y coordinates, not required if `x'
     is matrix}
   \item{nbins}{number of bins in each dimension. May be a scalar or a
-    2 element vector.  Defaults to 25.}
+    2 element vector.  Defaults to 51.}
+  \item{method}{One of "bkde2D" (for KernSmooth::bdke2d) or "hist2d"
+    (for gplots::hist2d) specifyting the name of the method to create
+    the 2-d density summarizing the data.  Defaults to "bkde2D".}
+  \item{bandwidth}{Bandwidth to use for \code{KernSmooth::bkde2D}.
+    See below for default value. }
   \item{ci.levels}{Confidence level(s) to use for plotting
     data. Defaults to \code{c(0.5, 0.75, 0.9, 0.95, 0.975)} }
   \item{show}{Plot type to be displaed.  One of "filled.contour",
     "contour", "image", or "none".  Defaults to "filled.contour".}
+  \item{show.points}{Boolean indicating whether original data values
+    should be plotted.  Defaults to \code{TRUE}.}
+  \item{pch}{Point type for plots.  See \code{points} for details.}
+  \item{points.col}{Point color for plotting original data. Defaiults to
+  "red".}
+  \item{col}{Colors to use for plots.}
   \item{xlab, ylab}{Axis labels}
-  \item{col}{Colors to use for plots.}
-  \item{\dots}{Additional arguments passed to \code{hist2d}. }
+  \item{\dots}{Additional arguments passed to \code{KernSmooth::bkde2D}
+    or \code{gplots::hist2d}. }
 }
 \details{
-  This function utilizes \code{hist2d} to create a 2-dimensional
-  histogram of the data passed as an argument.  This data is then
-  converted into densities and used to create and display confidence
-  regions.
+  This function utilizes either \code{KernSmooth::bkde2D} or
+  \code{gplots::hist2d} to estmate a 2-dimensional density of the data
+  passed as an argument.  This density is then used to create and
+  (optionally) display confidence regions.
+
+  When \code{bandwidth} is ommited and \code{method="bkde2d"},
+  \code{KernSmooth::dpik} is appled in x and y dimensions to select the
+  bandwidth.
+  
 }
+\note{
+  Confidence intervals generated by ci2d are \emph{approximate}, and
+  are subject to biases and/or artifacts induced by the binning or
+  kernel smoothing method, bin locations, bin sizes, and kernel bandwidth.
+  }
 \value{
-  A list containing 3 elements:
-  \item{counts}{Matrix containing the number of points falling into each
-    bin}
-  \item{x}{lower x limit of each bin}
-  \item{y}{lower y limit of each bin}
-  \item{density}{Matrix containing the probability density of each bin (count in bin/total
-    count)}
-  \item{cumDensity}{Matrix where each element contains the cumulative probability density
-    of all elements with the same density (used to create the confidence
-    region plots) }
-  \item{contours}{Contours of each confidence region}
+  A list containing (at least) the following elements:
+  \item{x}{x position of each density estimate bin}
+  \item{y}{y position of each density estimate bin}
+  \item{density}{Matrix containing the probability density of each bin
+    (count in bin/total count)}
+  \item{cumDensity}{Matrix where each element contains the cumulative
+    probability density of all elements with the same density (used to
+    create the confidence region plots) }
+  \item{contours}{Contours of each confidence region}.
+  \item{call}{Call used to create this object}
 }
 %\references{
 %}
 \author{ Gregory R. Warnes \email{gr...@wa...}}
 \seealso{
+  \code{\link[KernSmooth]{bkde2D}}, \code{\link[KernSmooth]{dpik}},
   \code{\link{hist2d}}
 }
 \examples{
-   # example data, bivariate normal, no correlation
+   ####
+   ## Basic usage 
+   ####
+   data(geyser, package="MASS")
+
+   x <- geyser$duration
+   y <- geyser$waiting
+
+   # 2-d confidence intervals based on binned kernel density estimate
+   ci2d(x,y)                   # filled contour plot
+   ci2d(x,y, show.points=TRUE) # show original data
+
+
+   # image plot
+   ci2d(x,y, show="image")
+   ci2d(x,y, show="image", show.points=TRUE)
+
+   # contour plot
+   ci2d(x,y, show="contour", col="black")
+   ci2d(x,y, show="contour", col="black", show.points=TRUE)
+
+   ####
+   ## Control Axis scales
+   ####
    x <- rnorm(2000, sd=4)
    y <- rnorm(2000, sd=1)
 
+   # 2-d confidence intervals based on binned kernel density estimate
+   ci2d(x,y)
+
    # 2-d confidence intervals based on 2d histogram
+   ci2d(x,y, method="hist2d", nbins=25)
+ 
+   # Require same scale for each axis, this looks oval
+   ci2d(x,y, range.x=list(c(-20,20), c(-20,20)))
+   ci2d(x,y, method="hist2d", same.scale=TRUE, nbins=25) # hist2d 
+
+   ####
+   ## Control smoothing and binning 
+   ####
+   x <- rnorm(2000, sd=4)
+   y <- rnorm(2000, mean=x, sd=2)
+
+   # Default 2-d confidence intervals based on binned kernel density estimate
    ci2d(x,y)
 
-   # same scale for each axis, this looks oval
-   ci2d(x,y, same.scale=TRUE)
+   # change the smoother bandwidth
+   ci2d(x,y,
+        bandwidth=c(sd(x)/8, sd(y)/8)
+       )
 
+   # change the smoother number of bins
+   ci2d(x,y, nbins=10)
+   ci2d(x,y)
+   ci2d(x,y, nbins=100)
+
+   # Default 2-d confidence intervals based on 2d histogram
+   ci2d(x,y, method="hist2d", show.points=TRUE)
+
+   # change the number of histogram bins
+   ci2d(x,y, nbin=10, method="hist2d", show.points=TRUE )
+   ci2d(x,y, nbin=25, method="hist2d", show.points=TRUE )
+
+   ####
+   ## Perform plotting manually
+   ####
+   data(geyser, package="MASS")
+
+   # let ci2d handle plotting contours...
+   ci2d(geyser$duration, geyser$waiting, show="contour", col="black")
+
+   # call contour() directly, show the 90 percent CI, and the mean point 
+   est <- ci2d(geyser$duration, geyser$waiting, show="none")
+   contour(est$x, est$y, est$cumDensity,
+           xlab="duration", ylab="waiting",
+           levels=0.90, lwd=4, lty=2)
+   points(mean(geyser$duration), mean(geyser$waiting),
+         col="red", pch="X")
+
+
+   ####
+   ## Extract confidence region values
+   ###
+   data(geyser, package="MASS")
+
+   ## Empirical 90 percent confidence limits
+   quantile( geyser$duration, c(0.05, 0.95) )
+   quantile( geyser$waiting, c(0.05, 0.95) )
+
+   ## Bivariate 90 percent confidence region
+   est <- ci2d(geyser$duration, geyser$waiting, show="none")
+   names(est$contours) ## show available contours
+
+   ci.90 <- est$contours[names(est$contours)=="0.9"]  # get region(s)
+   ci.90 <- rbind(ci.90[[1]],NA, ci.90[[2]], NA, ci.90[[3]]) # join them
+
+   print(ci.90)                  # show full contour
+   range(ci.90$x, na.rm=TRUE)    # range for duration
+   range(ci.90$y, na.rm=TRUE)    # range for waiting
+
+   ####
+   ## Visually compare confidence regions 
+   ####
+   data(geyser, package="MASS")
+
+   ## Bivariate smoothed 90 percent confidence region
+   est <- ci2d(geyser$duration, geyser$waiting, show="none")
+   names(est$contours) ## show available contours
+
+   ci.90 <- est$contours[names(est$contours)=="0.9"]  # get region(s)
+   ci.90 <- rbind(ci.90[[1]],NA, ci.90[[2]], NA, ci.90[[3]]) # join them
+
+   plot( waiting ~ duration, data=geyser,
+         main="Comparison of 90 percent confidence regions" )
+   polygon( ci.90, col="green", border="green", density=10)
+
+   ## Univariate Normal-Theory 90 percent confidence region
+   mean.x <- mean(geyser$duration)
+   mean.y <- mean(geyser$waiting)
+   sd.x <- sd(geyser$duration)
+   sd.y <- sd(geyser$waiting)
+
+   t.value <- qt(c(0.05,0.95), df=nobs(geyser$duration), lower=TRUE)
+   ci.x <- mean.x +  t.value* sd.x
+   ci.y <- mean.y +  t.value* sd.y
+
+   plotCI(mean.x, mean.y,
+          li=ci.x[1],
+          ui=ci.x[2],
+          barcol="blue", col="blue",
+          err="x",
+          pch="X",
+          add=TRUE )
+
+   plotCI(mean.x, mean.y,
+          li=ci.y[1],
+          ui=ci.y[2],
+          barcol="blue", col="blue",
+          err="y",
+          pch=NA,
+          add=TRUE )
+
+#   rect(ci.x[1], ci.y[1], ci.x[2], ci.y[2], border="blue",
+#        density=5,
+#        angle=45,
+#        col="blue" )
+
+
+   ## Empirical univariate 90 percent confidence region
+   box <- cbind( x=quantile( geyser$duration, c(0.05, 0.95 )), 
+                 y=quantile( geyser$waiting, c(0.05, 0.95 )) )
+
+   rect(box[1,1], box[1,2], box[2,1], box[2,2], border="red",
+        density=5,
+        angle=-45,
+        col="red" )
+
+   ## now a nice legend
+   legend( "topright", legend=c("       Region type",
+                                "Univariate Normal Theory",
+                                "Univarite Empirical",
+                                "Smoothed Bivariate"),
+           lwd=c(NA,1,1,1),
+           col=c("black","blue","red","green"),
+           lty=c(NA,1,1,1)
+         )
+
+   
+
+
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.