[R-gregmisc-users] SF.net SVN: r-gregmisc:[1307] trunk/gdata
Brought to you by:
warnes
From: <gg...@us...> - 2008-12-31 13:26:03
|
Revision: 1307 http://r-gregmisc.svn.sourceforge.net/r-gregmisc/?rev=1307&view=rev Author: ggorjan Date: 2008-12-31 13:25:52 +0000 (Wed, 31 Dec 2008) Log Message: ----------- New function bindData that binds two data frames into a multivariate data frame in a different way than merge. Added Paths: ----------- trunk/gdata/R/bindData.R trunk/gdata/inst/unitTests/runit.bindData.R trunk/gdata/man/bindData.Rd Added: trunk/gdata/R/bindData.R =================================================================== --- trunk/gdata/R/bindData.R (rev 0) +++ trunk/gdata/R/bindData.R 2008-12-31 13:25:52 UTC (rev 1307) @@ -0,0 +1,38 @@ +### bindData.R +###------------------------------------------------------------------------ +### What: Bind two data frames - code +### $Id$ +### Time-stamp: <2008-12-30 22:01:00 ggorjan> +###------------------------------------------------------------------------ + +bindData <- function(x, y, common) +{ + ## --- Setup --- + if(!is.data.frame(x)) stop("'x' must be a data frame") + if(!is.data.frame(y)) stop("'y' must be a data frame") + + ## --- New data frame --- + + ## First add common column and a dataset indicator column + z <- rbind(x[common], y[common]) + + ## Other columns + ## - remove common columns in x and y + namesz <- names(z) + otherx <- names(x) + otherx <- otherx[!(otherx %in% namesz)] + othery <- names(y) + othery <- othery[!(othery %in% namesz)] + + ## - add all other columns but as a set for each input data frame + rx <- nrow(x); cx <- length(otherx) + ry <- nrow(y); cy <- length(othery) + + z <- cbind(z, rbind(x[otherx], matrix(rep(NA, times=(ry * cx)), nrow=ry, ncol=cx, dimnames=list(NULL, otherx)))) + z <- cbind(z, rbind(matrix(rep(NA, times=(rx * cy)), nrow=rx, ncol=cy, dimnames=list(NULL, othery)), y[othery])) + + z +} + +###------------------------------------------------------------------------ +### bindData.R ends here Property changes on: trunk/gdata/R/bindData.R ___________________________________________________________________ Added: svn:keywords + Added: trunk/gdata/inst/unitTests/runit.bindData.R =================================================================== --- trunk/gdata/inst/unitTests/runit.bindData.R (rev 0) +++ trunk/gdata/inst/unitTests/runit.bindData.R 2008-12-31 13:25:52 UTC (rev 1307) @@ -0,0 +1,75 @@ +### runit.bindData.R +###------------------------------------------------------------------------ +### What: Bind two data frames - unit tests +### $Id$ +### Time-stamp: <2008-12-30 11:58:50 ggorjan> +###------------------------------------------------------------------------ + +### {{{ --- Test setup --- + +if(FALSE) { + library("RUnit") + library("gdata") +} + +### }}} +### {{{ --- bindData --- + +test.bindData <- function() +{ + ## 'x'/'y' must be a data.frame + checkException(bindData(x=1:10, y=1:10)) + checkException(bindData(x=matrix(1:10), y=matrix(1:10))) + + n1 <- 6; n2 <- 12; n3 <- 4 + ## Single trait 1 + num <- c(5:n1, 10:13) + tmp1 <- data.frame(y1=rnorm(n=n1), + f1=factor(rep(c("A", "B"), n1/2)), + ch=letters[num], + fa=factor(letters[num]), + nu=(num) + 0.5, + id=factor(num), stringsAsFactors=FALSE) + + ## Single trait 2 with repeated records, some subjects also in tmp1 + num <- 4:9 + tmp2 <- data.frame(y2=rnorm(n=n2), + f2=factor(rep(c("C", "D"), n2/2)), + ch=letters[rep(num, times=2)], + fa=factor(letters[rep(c(num), times=2)]), + nu=c((num) + 0.5, (num) + 0.25), + id=factor(rep(num, times=2)), stringsAsFactors=FALSE) + + ## Single trait 3 with completely distinct set of subjects + num <- 1:4 + tmp3 <- data.frame(y3=rnorm(n=n3), + f3=factor(rep(c("E", "F"), n3/2)), + ch=letters[num], + fa=factor(letters[num]), + nu=(num) + 0.5, + id=factor(num), stringsAsFactors=FALSE) + + ## Combine all datasets + tmp12 <- bindData(x=tmp1, y=tmp2, common=c("id", "nu", "ch", "fa")) + tmp123 <- bindData(x=tmp12, y=tmp3, common=c("id", "nu", "ch", "fa")) + + checkEquals(names(tmp123), c("id", "nu", "ch", "fa", "y1", "f1", "y2", "f2", "y3", "f3")) + checkEquals(rbind(tmp1["id"], tmp2["id"], tmp3["id"]), tmp123["id"]) + checkEquals(rbind(tmp1["fa"], tmp2["fa"], tmp3["fa"]), tmp123["fa"]) + checkEquals(is.na(tmp123$y1), c(rep(FALSE, times=n1), rep(TRUE, times=n2+n3))) + checkEquals(is.na(tmp123$f1), c(rep(FALSE, times=n1), rep(TRUE, times=n2+n3))) + checkEquals(is.na(tmp123$y2), c(rep(TRUE, times=n1), rep(FALSE, times=n2), rep(TRUE, times=n3))) + checkEquals(is.na(tmp123$f2), c(rep(TRUE, times=n1), rep(FALSE, times=n2), rep(TRUE, times=n3))) + checkEquals(is.na(tmp123$y3), c(rep(TRUE, times=n1+n2), rep(FALSE, times=n3))) + checkEquals(is.na(tmp123$f3), c(rep(TRUE, times=n1+n2), rep(FALSE, times=n3))) +} + +### }}} +### {{{ Dear Emacs +## Local variables: +## folded-file: t +## End: +### }}} + +###------------------------------------------------------------------------ +### runit.bindData.R ends here Property changes on: trunk/gdata/inst/unitTests/runit.bindData.R ___________________________________________________________________ Added: svn:keywords + Added: trunk/gdata/man/bindData.Rd =================================================================== --- trunk/gdata/man/bindData.Rd (rev 0) +++ trunk/gdata/man/bindData.Rd 2008-12-31 13:25:52 UTC (rev 1307) @@ -0,0 +1,93 @@ +% bindData.Rd +%-------------------------------------------------------------------------- +% What: Bind two data frames - help +% $Id$ +% Time-stamp: <2008-12-30 13:49:50 ggorjan> +%-------------------------------------------------------------------------- + +\name{bindData} +\alias{bindData} + +\title{Bind two data frames into a multivariate data frame} + +\description{ + Usually data frames represent one set of variables and one needs to + bind/join them for multivariate analysis. When \code{\link{merge}} is not + the approriate solution, \code{bindData} might perform an appropriate binding + for two data frames. This is especially usefull when some variables are + measured once, while others are repeated. +} + +\usage{ + bindData(x, y, common) +} + +\arguments{ + \item{x}{data.frame} + \item{y}{data.frame} + \item{common}{character, list of column names that are common to both + input data frames} +} + +\details{ + Data frames are joined in a such a way, that the new data frame has + \eqn{c + (n_1 - c) + (n_2 - c)} columns, where \eqn{c} is the number of + common columns, and \eqn{n_1} and \eqn{n_2} are the number of columns + in the first and in the second data frame, respectively. +} + +\value{ + A data frame. +} + +\author{Gregor Grojanc} + +\seealso{ + \code{\link[base]{merge}}, + \code{\link{wideByFactor}} +} + +\examples{ +n1 <- 6 +n2 <- 12 +n3 <- 4 +## Single trait 1 +num <- c(5:n1, 10:13) +(tmp1 <- data.frame(y1=rnorm(n=n1), + f1=factor(rep(c("A", "B"), n1/2)), + ch=letters[num], + fa=factor(letters[num]), + nu=(num) + 0.5, + id=factor(num), stringsAsFactors=FALSE)) + +## Single trait 2 with repeated records, some subjects also in tmp1 +num <- 4:9 +(tmp2 <- data.frame(y2=rnorm(n=n2), + f2=factor(rep(c("C", "D"), n2/2)), + ch=letters[rep(num, times=2)], + fa=factor(letters[rep(c(num), times=2)]), + nu=c((num) + 0.5, (num) + 0.25), + id=factor(rep(num, times=2)), stringsAsFactors=FALSE)) + +## Single trait 3 with completely distinct set of subjects +num <- 1:4 +(tmp3 <- data.frame(y3=rnorm(n=n3), + f3=factor(rep(c("E", "F"), n3/2)), + ch=letters[num], + fa=factor(letters[num]), + nu=(num) + 0.5, + id=factor(num), stringsAsFactors=FALSE)) + +## Combine all datasets +(tmp12 <- bindData(x=tmp1, y=tmp2, common=c("id", "nu", "ch", "fa"))) +(tmp123 <- bindData(x=tmp12, y=tmp3, common=c("id", "nu", "ch", "fa"))) + +## Sort by subject +tmp123[order(tmp123$ch), ] +} + +\keyword{manip} +\keyword{misc} + +%-------------------------------------------------------------------------- +% bindData.Rd ends here \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |