[R-gregmisc-users] SF.net SVN: r-gregmisc:[1310] trunk/gdata
Brought to you by:
warnes
From: <gg...@us...> - 2008-12-31 13:29:09
|
Revision: 1310 http://r-gregmisc.svn.sourceforge.net/r-gregmisc/?rev=1310&view=rev Author: ggorjan Date: 2008-12-31 13:29:03 +0000 (Wed, 31 Dec 2008) Log Message: ----------- New function wideByFactor that reshapes given dataset by a given factor - it creates a "multivariate" data.frame. Added Paths: ----------- trunk/gdata/R/wideByFactor.R trunk/gdata/inst/unitTests/runit.wideByFactor.R trunk/gdata/man/wideByFactor.Rd Added: trunk/gdata/R/wideByFactor.R =================================================================== --- trunk/gdata/R/wideByFactor.R (rev 0) +++ trunk/gdata/R/wideByFactor.R 2008-12-31 13:29:03 UTC (rev 1310) @@ -0,0 +1,40 @@ +### wideByFactor.R +###------------------------------------------------------------------------ +### What: Reshape by factor levels - code +### $Id$ +### Time-stamp: <2008-12-30 22:17:32 ggorjan> +###------------------------------------------------------------------------ + +wideByFactor <- function(x, factor, common, sort=TRUE, keepFactor=TRUE) +{ + ## --- Setup --- + if(!is.data.frame(x)) stop("'x' must be a data frame") + if(length(factor) != 1) stop("'factor' can be only of length one") + if(!is.factor(x[[factor]])) stop("column defined in 'factor' must be a factor") + if(sort) x <- x[order(x[[factor]]), ] + + ## --- Extend by factors levels --- + y <- x[common] + if(keepFactor) y[factor] <- x[factor] + levs <- levels(x[[factor]]) + + ## Remove common and factor from the list of column names + other <- names(x) + other <- other[!(other %in% common) & !(other %in% factor)] + + ## Add all other columns but as a set for each level of a factor + for(level in levs) { + for(col in other) { + ## add a column col + y[paste(col, level, sep=".")] <- x[col] + ## fill with NA for other levels than level + y[x[factor] != level, paste(col, level, sep=".")] <- NA + ## This filling migth be inefficient if there is large number + ## of levels, since there will be quite a lot of filling. + } + } + y +} + +###------------------------------------------------------------------------ +### wideByFactor.R ends here \ No newline at end of file Property changes on: trunk/gdata/R/wideByFactor.R ___________________________________________________________________ Added: svn:keywords + Added: trunk/gdata/inst/unitTests/runit.wideByFactor.R =================================================================== --- trunk/gdata/inst/unitTests/runit.wideByFactor.R (rev 0) +++ trunk/gdata/inst/unitTests/runit.wideByFactor.R 2008-12-31 13:29:03 UTC (rev 1310) @@ -0,0 +1,55 @@ +### runit.wideByFactor.R +###------------------------------------------------------------------------ +### What: Reshape by factor levels - unit tests +### $Id$ +### Time-stamp: <2008-12-30 11:58:50 ggorjan> +###------------------------------------------------------------------------ + +### {{{ --- Test setup --- + +if(FALSE) { + library("RUnit") + library("gdata") +} + +### }}} +### {{{ --- wideByFactor --- + +test.wideByFactor <- function() +{ + n <- 10 + f <- 2 + tmp <- data.frame(y1=(1:n)/2, + y2=(n:1)*2, + f1=factor(rep(letters[1:f], n/2)), + f2=factor(c(rep(c("M"), n/2), rep(c("F"), n/2))), + c1=1:n, + c2=2*(1:n)) + + ## 'x' must be a data.frame + checkException(wideByFactor(x=1:10)) + checkException(wideByFactor(x=matrix(1:10))) + ## 'factor' can be only of length one + checkException(wideByFactor(x=tmp, factor=c("f1", "f2"))) + ## column defined in 'factor' must be a factor + checkException(wideByFactor(x=tmp, factor="c1")) + + tmp2 <- wideByFactor(x=tmp, factor="f1", common=c("c1", "c2"), sort=FALSE) + checkEquals(tmp2[c("c1", "c2")], tmp[c("c1", "c2")]) + checkEquals(names(tmp2), c("c1", "c2", "f1", "y1.a", "y2.a", "f2.a", "y1.b", "y2.b", "f2.b")) + checkEquals(tmp2$y1.a, c(0.5, NA, 1.5, NA, 2.5, NA, 3.5, NA, 4.5, NA)) + checkEquals(tmp2$f2.a, factor(c("M", NA, "M", NA, "M", NA, "F", NA, "F", NA))) + tmp2 <- wideByFactor(x=tmp, factor="f1", common=c("c1", "c2"), sort=TRUE, keepFactor=FALSE) + checkEquals(tmp2$f2.a, factor(c("M", "M", "M", "F", "F", NA, NA, NA, NA, NA))) + checkEquals(names(tmp2), c("c1", "c2", "y1.a", "y2.a", "f2.a", "y1.b", "y2.b", "f2.b")) +} + +### }}} +### {{{ Dear Emacs +## Local variables: +## folded-file: t +## End: +### }}} + +###------------------------------------------------------------------------ +### runit.wideByFactor.R ends here Property changes on: trunk/gdata/inst/unitTests/runit.wideByFactor.R ___________________________________________________________________ Added: svn:keywords + Added: trunk/gdata/man/wideByFactor.Rd =================================================================== --- trunk/gdata/man/wideByFactor.Rd (rev 0) +++ trunk/gdata/man/wideByFactor.Rd 2008-12-31 13:29:03 UTC (rev 1310) @@ -0,0 +1,73 @@ +% wideByFactor.Rd +%-------------------------------------------------------------------------- +% What: Reshape by factor levels - help +% $Id$ +% Time-stamp: <2008-12-30 13:49:50 ggorjan> +%-------------------------------------------------------------------------- + +\name{wideByFactor} +\alias{wideByFactor} + +\title{Create multivariate data by a given factor} + +\description{ + +\code{wideByFactor} modifies data.frame in such a way that variables are +\dQuote{separated} into several columns by factor levels. + +} + +\usage{ + wideByFactor(x, factor, common, sort=TRUE, keepFactor=TRUE) +} + +\arguments{ + \item{x}{data frame} + \item{factor}{character, column name of a factor by which variables will + be divided} + \item{common}{character, column names of (common) columns that should not + be divided} + \item{sort}{logical, sort resulting data frame by factor levels} + \item{keepFactor}{logical, keep the \sQuote{factor} column} +} + +\details{ + +Given data frame is modified in such a way, that output represents a data frame +with \eqn{c + f + n * v} columns, where \eqn{c} is a number of common columns +for all levels of a factor, \eqn{f} is a factor column, \eqn{n} is a number of +levels in factor \eqn{f} and \eqn{v} is a number of variables that should be +divided for each level of a factor. Number of rows stays the same! +} + +\value{ + A data frame where divided variables have sort of \dQuote{diagonalized} structure +} + +\author{Gregor Gorjanc} + +\seealso{ + \code{\link[stats]{reshape}} in the \pkg{stats} package, + \code{\link[reshape]{melt}} and \code{\link[reshape]{cast}} in + the \pkg{reshape} package +} + +\examples{ +n <- 10 +f <- 2 +tmp <- data.frame(y1=rnorm(n=n), + y2=rnorm(n=n), + f1=factor(rep(letters[1:f], n/2)), + f2=factor(c(rep(c("M"), n/2), rep(c("F"), n/2))), + c1=1:n, + c2=2*(1:n)) + +wideByFactor(x=tmp, factor="f1", common=c("c1", "c2", "f2")) +wideByFactor(x=tmp, factor="f1", common=c("c1", "c2")) +} + +\keyword{manip} +\keyword{misc} + +%-------------------------------------------------------------------------- +% wideByFactor.Rd ends here \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |