[R-gregmisc-users] SF.net SVN: r-gregmisc:[1310] trunk/gdata

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 1310
          http://r-gregmisc.svn.sourceforge.net/r-gregmisc/?rev=1310&view=rev
Author:   ggorjan
Date:     2008-12-31 13:29:03 +0000 (Wed, 31 Dec 2008)

Log Message:
-----------
New function wideByFactor that reshapes given dataset by a given factor - it creates a "multivariate" data.frame.

Added Paths:
-----------
    trunk/gdata/R/wideByFactor.R
    trunk/gdata/inst/unitTests/runit.wideByFactor.R
    trunk/gdata/man/wideByFactor.Rd

Added: trunk/gdata/R/wideByFactor.R
===================================================================

--- trunk/gdata/R/wideByFactor.R	                        (rev 0)
+++ trunk/gdata/R/wideByFactor.R	2008-12-31 13:29:03 UTC (rev 1310)
@@ -0,0 +1,40 @@
+### wideByFactor.R
+###------------------------------------------------------------------------
+### What: Reshape by factor levels - code
+### $Id$
+### Time-stamp: <2008-12-30 22:17:32 ggorjan>
+###------------------------------------------------------------------------
+
+wideByFactor <- function(x, factor, common, sort=TRUE, keepFactor=TRUE)
+{
+  ## --- Setup ---
+  if(!is.data.frame(x)) stop("'x' must be a data frame")
+  if(length(factor) != 1) stop("'factor' can be only of length one") 
+  if(!is.factor(x[[factor]])) stop("column defined in 'factor' must be a factor")
+  if(sort) x <- x[order(x[[factor]]), ]
+
+  ## --- Extend by factors levels ---
+  y <- x[common]
+  if(keepFactor) y[factor] <- x[factor]
+  levs <- levels(x[[factor]])
+
+  ## Remove common and factor from the list of column names
+  other <- names(x)
+  other <- other[!(other %in% common) & !(other %in% factor)]
+
+  ## Add all other columns but as a set for each level of a factor
+  for(level in levs) {
+    for(col in other) {
+      ## add a column col
+      y[paste(col, level, sep=".")] <- x[col]
+      ## fill with NA for other levels than level
+      y[x[factor] != level, paste(col, level, sep=".")] <- NA
+      ## This filling migth be inefficient if there is large number
+      ## of levels, since there will be quite a lot of filling.
+    }
+  }
+  y
+}
+
+###------------------------------------------------------------------------
+### wideByFactor.R ends here
\ No newline at end of file


Property changes on: trunk/gdata/R/wideByFactor.R
___________________________________________________________________
Added: svn:keywords
   + 

Added: trunk/gdata/inst/unitTests/runit.wideByFactor.R
===================================================================
--- trunk/gdata/inst/unitTests/runit.wideByFactor.R	                        (rev 0)
+++ trunk/gdata/inst/unitTests/runit.wideByFactor.R	2008-12-31 13:29:03 UTC (rev 1310)
@@ -0,0 +1,55 @@
+### runit.wideByFactor.R
+###------------------------------------------------------------------------
+### What: Reshape by factor levels - unit tests
+### $Id$
+### Time-stamp: <2008-12-30 11:58:50 ggorjan>
+###------------------------------------------------------------------------
+
+### {{{ --- Test setup ---
+
+if(FALSE) {
+  library("RUnit")
+  library("gdata")
+}
+
+### }}}
+### {{{ --- wideByFactor ---
+
+test.wideByFactor <- function()
+{
+  n <- 10
+  f <- 2
+  tmp <- data.frame(y1=(1:n)/2,
+                    y2=(n:1)*2,
+                    f1=factor(rep(letters[1:f], n/2)),
+                    f2=factor(c(rep(c("M"), n/2), rep(c("F"), n/2))),
+                    c1=1:n,
+                    c2=2*(1:n))
+  
+  ## 'x' must be a data.frame
+  checkException(wideByFactor(x=1:10))
+  checkException(wideByFactor(x=matrix(1:10)))
+  ## 'factor' can be only of length one
+  checkException(wideByFactor(x=tmp, factor=c("f1", "f2")))
+  ## column defined in 'factor' must be a factor
+  checkException(wideByFactor(x=tmp, factor="c1"))
+
+  tmp2 <- wideByFactor(x=tmp, factor="f1", common=c("c1", "c2"), sort=FALSE)
+  checkEquals(tmp2[c("c1", "c2")], tmp[c("c1", "c2")])
+  checkEquals(names(tmp2), c("c1", "c2", "f1", "y1.a", "y2.a", "f2.a", "y1.b", "y2.b", "f2.b"))
+  checkEquals(tmp2$y1.a, c(0.5, NA, 1.5, NA, 2.5, NA, 3.5, NA, 4.5, NA))
+  checkEquals(tmp2$f2.a, factor(c("M", NA, "M", NA, "M", NA, "F", NA, "F", NA)))
+  tmp2 <- wideByFactor(x=tmp, factor="f1", common=c("c1", "c2"), sort=TRUE, keepFactor=FALSE)
+  checkEquals(tmp2$f2.a, factor(c("M", "M", "M", "F", "F", NA, NA, NA, NA, NA)))
+  checkEquals(names(tmp2), c("c1", "c2", "y1.a", "y2.a", "f2.a", "y1.b", "y2.b", "f2.b"))
+}
+
+### }}}
+### {{{ Dear Emacs
+## Local variables:
+## folded-file: t
+## End:
+### }}}
+
+###------------------------------------------------------------------------
+### runit.wideByFactor.R ends here


Property changes on: trunk/gdata/inst/unitTests/runit.wideByFactor.R
___________________________________________________________________
Added: svn:keywords
   + 

Added: trunk/gdata/man/wideByFactor.Rd
===================================================================
--- trunk/gdata/man/wideByFactor.Rd	                        (rev 0)
+++ trunk/gdata/man/wideByFactor.Rd	2008-12-31 13:29:03 UTC (rev 1310)
@@ -0,0 +1,73 @@
+% wideByFactor.Rd
+%--------------------------------------------------------------------------
+% What: Reshape by factor levels - help
+% $Id$
+% Time-stamp: <2008-12-30 13:49:50 ggorjan>
+%--------------------------------------------------------------------------
+
+\name{wideByFactor}
+\alias{wideByFactor}
+
+\title{Create multivariate data by a given factor}
+
+\description{
+  
+\code{wideByFactor} modifies data.frame in such a way that variables are
+\dQuote{separated} into several columns by factor levels.
+
+}
+
+\usage{
+  wideByFactor(x, factor, common, sort=TRUE, keepFactor=TRUE)
+}
+
+\arguments{
+  \item{x}{data frame}
+  \item{factor}{character, column name of a factor by which variables will 
+    be divided}
+  \item{common}{character, column names of (common) columns that should not
+    be divided}
+  \item{sort}{logical, sort resulting data frame by factor levels}
+  \item{keepFactor}{logical, keep the \sQuote{factor} column}
+}
+
+\details{
+
+Given data frame is modified in such a way, that output represents a data frame 
+with \eqn{c + f + n * v} columns, where \eqn{c} is a number of common columns 
+for all levels of a factor, \eqn{f} is a factor column, \eqn{n} is a number of 
+levels in factor \eqn{f} and \eqn{v} is a number of variables that should be 
+divided for each level of a factor. Number of rows stays the same!
+}
+
+\value{
+  A data frame where divided variables have sort of \dQuote{diagonalized} structure 
+}
+
+\author{Gregor Gorjanc}
+
+\seealso{ 
+  \code{\link[stats]{reshape}} in the \pkg{stats} package,
+  \code{\link[reshape]{melt}} and \code{\link[reshape]{cast}} in 
+  the \pkg{reshape} package
+}
+
+\examples{
+n <- 10
+f <- 2
+tmp <- data.frame(y1=rnorm(n=n),
+                  y2=rnorm(n=n),
+                  f1=factor(rep(letters[1:f], n/2)),
+                  f2=factor(c(rep(c("M"), n/2), rep(c("F"), n/2))),
+                  c1=1:n,
+                  c2=2*(1:n))
+
+wideByFactor(x=tmp, factor="f1", common=c("c1", "c2", "f2"))
+wideByFactor(x=tmp, factor="f1", common=c("c1", "c2"))
+}
+
+\keyword{manip}
+\keyword{misc}
+
+%--------------------------------------------------------------------------
+% wideByFactor.Rd ends here
\ No newline at end of file


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.