[R-gregmisc-users] SF.net SVN: r-gregmisc:[1307] trunk/gdata

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 1307
          http://r-gregmisc.svn.sourceforge.net/r-gregmisc/?rev=1307&view=rev
Author:   ggorjan
Date:     2008-12-31 13:25:52 +0000 (Wed, 31 Dec 2008)

Log Message:
-----------
New function bindData that binds two data frames into a multivariate data frame in a different way than merge.

Added Paths:
-----------
    trunk/gdata/R/bindData.R
    trunk/gdata/inst/unitTests/runit.bindData.R
    trunk/gdata/man/bindData.Rd

Added: trunk/gdata/R/bindData.R
===================================================================

--- trunk/gdata/R/bindData.R	                        (rev 0)
+++ trunk/gdata/R/bindData.R	2008-12-31 13:25:52 UTC (rev 1307)
@@ -0,0 +1,38 @@
+### bindData.R
+###------------------------------------------------------------------------
+### What: Bind two data frames - code
+### $Id$
+### Time-stamp: <2008-12-30 22:01:00 ggorjan>
+###------------------------------------------------------------------------
+
+bindData <- function(x, y, common)
+{
+  ## --- Setup ---
+  if(!is.data.frame(x)) stop("'x' must be a data frame")
+  if(!is.data.frame(y)) stop("'y' must be a data frame")
+
+  ## --- New data frame ---
+
+  ## First add common column and a dataset indicator column
+  z <- rbind(x[common], y[common])
+
+  ## Other columns
+  ## - remove common columns in x and y
+  namesz <- names(z)
+  otherx <- names(x)
+  otherx <- otherx[!(otherx %in% namesz)]
+  othery <- names(y)
+  othery <- othery[!(othery %in% namesz)]
+
+  ## - add all other columns but as a set for each input data frame
+  rx <- nrow(x); cx <- length(otherx)
+  ry <- nrow(y); cy <- length(othery)
+  
+  z <- cbind(z, rbind(x[otherx], matrix(rep(NA, times=(ry * cx)), nrow=ry, ncol=cx, dimnames=list(NULL, otherx))))
+  z <- cbind(z, rbind(matrix(rep(NA, times=(rx * cy)), nrow=rx, ncol=cy, dimnames=list(NULL, othery)), y[othery]))
+
+  z
+}
+
+###------------------------------------------------------------------------
+### bindData.R ends here


Property changes on: trunk/gdata/R/bindData.R
___________________________________________________________________
Added: svn:keywords
   + 

Added: trunk/gdata/inst/unitTests/runit.bindData.R
===================================================================
--- trunk/gdata/inst/unitTests/runit.bindData.R	                        (rev 0)
+++ trunk/gdata/inst/unitTests/runit.bindData.R	2008-12-31 13:25:52 UTC (rev 1307)
@@ -0,0 +1,75 @@
+### runit.bindData.R
+###------------------------------------------------------------------------
+### What: Bind two data frames - unit tests
+### $Id$
+### Time-stamp: <2008-12-30 11:58:50 ggorjan>
+###------------------------------------------------------------------------
+
+### {{{ --- Test setup ---
+
+if(FALSE) {
+  library("RUnit")
+  library("gdata")
+}
+
+### }}}
+### {{{ --- bindData ---
+
+test.bindData <- function()
+{
+  ## 'x'/'y' must be a data.frame
+  checkException(bindData(x=1:10, y=1:10))
+  checkException(bindData(x=matrix(1:10), y=matrix(1:10)))
+  
+  n1 <- 6; n2 <- 12; n3 <- 4
+  ## Single trait 1
+  num <- c(5:n1, 10:13)
+  tmp1 <- data.frame(y1=rnorm(n=n1),
+                     f1=factor(rep(c("A", "B"), n1/2)),
+                     ch=letters[num],
+                     fa=factor(letters[num]),
+                     nu=(num) + 0.5,
+                     id=factor(num), stringsAsFactors=FALSE)
+
+  ## Single trait 2 with repeated records, some subjects also in tmp1 
+  num <- 4:9
+  tmp2 <- data.frame(y2=rnorm(n=n2),
+                     f2=factor(rep(c("C", "D"), n2/2)),
+                     ch=letters[rep(num, times=2)],
+                     fa=factor(letters[rep(c(num), times=2)]),
+                     nu=c((num) + 0.5, (num) + 0.25),
+                     id=factor(rep(num, times=2)), stringsAsFactors=FALSE)
+
+  ## Single trait 3 with completely distinct set of subjects
+  num <- 1:4
+  tmp3 <- data.frame(y3=rnorm(n=n3),
+                     f3=factor(rep(c("E", "F"), n3/2)),
+                     ch=letters[num],
+                     fa=factor(letters[num]),
+                     nu=(num) + 0.5,
+                     id=factor(num), stringsAsFactors=FALSE)
+
+  ## Combine all datasets
+  tmp12 <- bindData(x=tmp1, y=tmp2, common=c("id", "nu", "ch", "fa"))
+  tmp123 <- bindData(x=tmp12, y=tmp3, common=c("id", "nu", "ch", "fa"))
+
+  checkEquals(names(tmp123), c("id", "nu", "ch", "fa", "y1", "f1", "y2", "f2", "y3", "f3"))
+  checkEquals(rbind(tmp1["id"], tmp2["id"], tmp3["id"]), tmp123["id"])
+  checkEquals(rbind(tmp1["fa"], tmp2["fa"], tmp3["fa"]), tmp123["fa"])
+  checkEquals(is.na(tmp123$y1), c(rep(FALSE, times=n1), rep(TRUE, times=n2+n3)))
+  checkEquals(is.na(tmp123$f1), c(rep(FALSE, times=n1), rep(TRUE, times=n2+n3)))
+  checkEquals(is.na(tmp123$y2), c(rep(TRUE, times=n1), rep(FALSE, times=n2), rep(TRUE, times=n3)))
+  checkEquals(is.na(tmp123$f2), c(rep(TRUE, times=n1), rep(FALSE, times=n2), rep(TRUE, times=n3)))
+  checkEquals(is.na(tmp123$y3), c(rep(TRUE, times=n1+n2), rep(FALSE, times=n3)))
+  checkEquals(is.na(tmp123$f3), c(rep(TRUE, times=n1+n2), rep(FALSE, times=n3)))
+}
+
+### }}}
+### {{{ Dear Emacs
+## Local variables:
+## folded-file: t
+## End:
+### }}}
+
+###------------------------------------------------------------------------
+### runit.bindData.R ends here


Property changes on: trunk/gdata/inst/unitTests/runit.bindData.R
___________________________________________________________________
Added: svn:keywords
   + 

Added: trunk/gdata/man/bindData.Rd
===================================================================
--- trunk/gdata/man/bindData.Rd	                        (rev 0)
+++ trunk/gdata/man/bindData.Rd	2008-12-31 13:25:52 UTC (rev 1307)
@@ -0,0 +1,93 @@
+% bindData.Rd
+%--------------------------------------------------------------------------
+% What: Bind two data frames - help
+% $Id$
+% Time-stamp: <2008-12-30 13:49:50 ggorjan>
+%--------------------------------------------------------------------------
+
+\name{bindData}
+\alias{bindData}
+
+\title{Bind two data frames into a multivariate data frame}
+
+\description{
+  Usually data frames represent one set of variables and one needs to 
+  bind/join them for multivariate analysis. When \code{\link{merge}} is not
+  the approriate solution, \code{bindData} might perform an appropriate binding
+  for two data frames. This is especially usefull when some variables are
+  measured once, while others are repeated.
+}
+
+\usage{
+  bindData(x, y, common)
+}
+
+\arguments{
+  \item{x}{data.frame}
+  \item{y}{data.frame}
+  \item{common}{character, list of column names that are common to both
+    input data frames}
+}
+
+\details{
+  Data frames are joined in a such a way, that the new data frame has
+  \eqn{c + (n_1 - c) + (n_2 - c)} columns, where \eqn{c} is the number of
+  common columns, and \eqn{n_1} and \eqn{n_2} are the number of columns 
+  in the first and in the second data frame, respectively.
+}
+
+\value{
+  A data frame.
+}
+
+\author{Gregor Grojanc}
+
+\seealso{
+  \code{\link[base]{merge}},
+  \code{\link{wideByFactor}}
+}
+
+\examples{
+n1 <- 6
+n2 <- 12
+n3 <- 4
+## Single trait 1
+num <- c(5:n1, 10:13)
+(tmp1 <- data.frame(y1=rnorm(n=n1),
+                    f1=factor(rep(c("A", "B"), n1/2)),
+                    ch=letters[num],
+                    fa=factor(letters[num]),
+                    nu=(num) + 0.5,
+                    id=factor(num), stringsAsFactors=FALSE))
+
+## Single trait 2 with repeated records, some subjects also in tmp1 
+num <- 4:9
+(tmp2 <- data.frame(y2=rnorm(n=n2),
+                    f2=factor(rep(c("C", "D"), n2/2)),
+                    ch=letters[rep(num, times=2)],
+                    fa=factor(letters[rep(c(num), times=2)]),
+                    nu=c((num) + 0.5, (num) + 0.25),
+                    id=factor(rep(num, times=2)), stringsAsFactors=FALSE))
+
+## Single trait 3 with completely distinct set of subjects
+num <- 1:4
+(tmp3 <- data.frame(y3=rnorm(n=n3),
+                    f3=factor(rep(c("E", "F"), n3/2)),
+                    ch=letters[num],
+                    fa=factor(letters[num]),
+                    nu=(num) + 0.5,
+                    id=factor(num), stringsAsFactors=FALSE))
+
+## Combine all datasets
+(tmp12 <- bindData(x=tmp1, y=tmp2, common=c("id", "nu", "ch", "fa")))
+(tmp123 <- bindData(x=tmp12, y=tmp3, common=c("id", "nu", "ch", "fa")))
+
+## Sort by subject
+tmp123[order(tmp123$ch), ]
+}
+
+\keyword{manip}
+\keyword{misc}
+
+%--------------------------------------------------------------------------
+% bindData.Rd ends here
\ No newline at end of file


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.