Thread: [R-gregmisc-users] SF.net SVN: r-gregmisc:[1513] trunk/gtools/R/smartbind.R
Brought to you by:
warnes
From: <wa...@us...> - 2011-09-28 22:56:48
|
Revision: 1513 http://r-gregmisc.svn.sourceforge.net/r-gregmisc/?rev=1513&view=rev Author: warnes Date: 2011-09-28 22:56:42 +0000 (Wed, 28 Sep 2011) Log Message: ----------- smartbind(): Prevent coersion to data frame from mangling column names. Modified Paths: -------------- trunk/gtools/R/smartbind.R Modified: trunk/gtools/R/smartbind.R =================================================================== --- trunk/gtools/R/smartbind.R 2011-09-28 22:53:47 UTC (rev 1512) +++ trunk/gtools/R/smartbind.R 2011-09-28 22:56:42 UTC (rev 1513) @@ -15,7 +15,7 @@ if(is.matrix(x) || is.data.frame(x)) x else - data.frame(as.list(x)) + data.frame(as.list(x), check.names=FALSE) ) #retval <- new.env() This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <wa...@us...> - 2012-05-04 16:06:59
|
Revision: 1529 http://r-gregmisc.svn.sourceforge.net/r-gregmisc/?rev=1529&view=rev Author: warnes Date: 2012-05-04 16:06:49 +0000 (Fri, 04 May 2012) Log Message: ----------- smartbind(): Improve handling of factors and ordered factors. Modified Paths: -------------- trunk/gtools/R/smartbind.R Modified: trunk/gtools/R/smartbind.R =================================================================== --- trunk/gtools/R/smartbind.R 2012-04-19 22:09:02 UTC (rev 1528) +++ trunk/gtools/R/smartbind.R 2012-05-04 16:06:49 UTC (rev 1529) @@ -2,11 +2,8 @@ ## Function to do rbind of data frames quickly, even if the columns don't match ## -smartbind <- function(..., fill=NA, sep=':') +smartbind <- function(..., fill=NA, sep=':', verbose=FALSE) { - verbose <- FALSE - - data <- list(...) if(is.null(names(data))) names(data) <- as.character(1:length(data)) @@ -29,24 +26,50 @@ else paste(x, seq(1,rowLens[x]),sep=sep)) ) + colClassList <- vector(mode="list", length=length(data)) + factorColumnList <- vector(mode="list", length=length(data)) + factorLevelList <- vector(mode="list", length=length(data)) + - start <- 1 + start <- 1 + blockIndex <- 1 for(block in data) { - if(verbose) print(block) + colClassList [[blockIndex]] <- list() + factorColumnList[[blockIndex]] <- character(length=0) + factorLevelList [[blockIndex]] <- list() + + if(verbose) print(head(block)) end <- start+nrow(block)-1 for(col in colnames(block)) { + classVec <- class(block[,col]) + + ## store class and factor level information for later use + colClassList[[blockIndex]][[col]] <- classVec + if("factor" %in% classVec) + { + + factorColumnList[[blockIndex]] <- + c(factorColumnList[[blockIndex]], col) + + factorLevelList[[blockIndex]][[col]] <- + levels(block[,col]) + } + if( !(col %in% names(retval))) { if(verbose) cat("Start:", start, " End:", end, " Column:", col, "\n", sep="") - if(class(block[,col])=="factor") - newclass <- "character" + if ("factor" %in% classVec) + { + newclass <- "character" + } else - newclass <- class(block[,col]) + newclass <- classVec + retval[[col]] <- as.vector(rep(fill,nrows), mode=newclass) } @@ -54,9 +77,87 @@ mode=class(retval[[col]])) } start <- end+1 + blockIndex <- blockIndex+1 } - #retval <- as.list(retval) + all.equal.or.null <- function(x,y,...) + { + if(is.null(x) || is.null(y) ) + return(TRUE) + else + return(all.equal(x,y,...)) + } + + ## Handle factors, merging levels + for( col in unique(unlist(factorColumnList)) ) + { + ## Ensure column classes match across blocks + colClasses <- lapply(colClassList, function(x) x[[col]]) + allSame <- all(sapply(colClasses[-1], + function(x) isTRUE(all.equal.or.null(colClasses[[1]], x)) + ) + ) + + if(allSame) + colClass <- colClasses[[1]] + else + { + warning("Column class mismatch for '", col, "'. ", + "Converting column to class 'character'.") + next() + } + + + ## check if factor levels are all the same + colLevels <- lapply(factorLevelList, function(x) x[[col]]) + allSame <- all(sapply(colLevels[-1], + function(x) isTRUE(all.equal.or.null(colLevels[[1]], x)) + ) + ) + + + if(allSame) + { + if("ordered" %in% colClass) + retval[[col]] <- ordered(retval[[col]], levels=colLevels[[1]] ) + else + retval[[col]] <- factor(retval[[col]], levels=colLevels[[1]] ) + } + else + { + ## Check if longest set of levels is a superset of all others, + ## and use that one + longestIndex <- which.max( sapply(colLevels, length) ) + longestLevels <- colLevels[[longestIndex]] + allSubset <- sapply(colLevels[-longestIndex], + function(l) all(l %in% longestLevels) + ) + if(allSubset) + { + if("ordered" %in% colClass) + retval[[col]] <- ordered(retval[[col]], levels=longestLevels ) + else + retval[[col]] <- factor(retval[[col]], levels=longestLevels ) + } + else + { + # form superset by appending to longest level set + levelSuperSet <- unique(c(longestLevels, unlist(colLevels))) + retval[[col]] <- factor(retval[[col]], levels=levelSuperSet ) + + if(length(colClass)>1) # not just plain factor + { + browser() + warning( "column '", col, "' of class ", + paste("'", colClass, "'", collapse=":", + sep="'"), + " converted to class 'factor'. Check level ordering." ) + } + + } + } + } + attr(retval,"row.names") <- rowNameList class(retval) <- "data.frame" return(retval) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <wa...@us...> - 2013-03-17 02:21:46
|
Revision: 1640 http://sourceforge.net/p/r-gregmisc/code/1640 Author: warnes Date: 2013-03-17 02:21:43 +0000 (Sun, 17 Mar 2013) Log Message: ----------- Fix error in smartbind: factor levels were not being handled if the factor column was not present in the first data frame. Modified Paths: -------------- trunk/gtools/R/smartbind.R Modified: trunk/gtools/R/smartbind.R =================================================================== --- trunk/gtools/R/smartbind.R 2013-01-14 20:47:57 UTC (rev 1639) +++ trunk/gtools/R/smartbind.R 2013-03-17 02:21:43 UTC (rev 1640) @@ -19,9 +19,9 @@ retval <- list() rowLens <- unlist(lapply(data, nrow)) nrows <- sum(rowLens) - + rowNameList <- unlist(lapply( names(data), - function(x) + function(x) if(rowLens[x]<=1) x else paste(x, seq(1,rowLens[x]),sep=sep)) ) @@ -29,8 +29,8 @@ colClassList <- vector(mode="list", length=length(data)) factorColumnList <- vector(mode="list", length=length(data)) factorLevelList <- vector(mode="list", length=length(data)) - - + + start <- 1 blockIndex <- 1 for(block in data) @@ -38,7 +38,7 @@ colClassList [[blockIndex]] <- list() factorColumnList[[blockIndex]] <- character(length=0) factorLevelList [[blockIndex]] <- list() - + if(verbose) print(head(block)) end <- start+nrow(block)-1 for(col in colnames(block)) @@ -49,14 +49,14 @@ colClassList[[blockIndex]][[col]] <- classVec if("factor" %in% classVec) { - + factorColumnList[[blockIndex]] <- c(factorColumnList[[blockIndex]], col) - + factorLevelList[[blockIndex]][[col]] <- levels(block[,col]) } - + if( !(col %in% names(retval))) { if(verbose) cat("Start:", start, @@ -69,10 +69,10 @@ } else newclass <- classVec - + retval[[col]] <- as.vector(rep(fill,nrows), mode=newclass) } - + retval[[col]][start:end] <- as.vector(block[,col], mode=class(retval[[col]])) } @@ -87,48 +87,53 @@ else return(all.equal(x,y,...)) } - + ## Handle factors, merging levels for( col in unique(unlist(factorColumnList)) ) { ## Ensure column classes match across blocks colClasses <- lapply(colClassList, function(x) x[[col]]) - allSame <- all(sapply(colClasses[-1], - function(x) isTRUE(all.equal.or.null(colClasses[[1]], x)) + firstNotNull <- which(!sapply(colClasses, is.null))[1] + allSameOrNull <- all(sapply(colClasses[-firstNotNull], + function(x) isTRUE(all.equal.or.null(colClasses[[firstNotNull]], x)) ) ) - if(allSame) - colClass <- colClasses[[1]] + if(allSameOrNull) + { + # grab the first *non-NULL* class information + colClass <- colClasses[[firstNotNull]] + } else { warning("Column class mismatch for '", col, "'. ", "Converting column to class 'character'.") next() } - + ## check if factor levels are all the same colLevels <- lapply(factorLevelList, function(x) x[[col]]) - allSame <- all(sapply(colLevels[-1], - function(x) isTRUE(all.equal.or.null(colLevels[[1]], x)) - ) - ) + firstNotNull <- which(!sapply(colLevels, is.null))[1] + allSameOrNull <- all(sapply(colLevels[-firstNotNull], + function(x) isTRUE(all.equal.or.null(colLevels[[firstNotNull]], x)) + ) + ) - - if(allSame) + + if(allSameOrNull) { if("ordered" %in% colClass) - retval[[col]] <- ordered(retval[[col]], levels=colLevels[[1]] ) + retval[[col]] <- ordered(retval[[col]], levels=colLevels[[firstNotNull]] ) else - retval[[col]] <- factor(retval[[col]], levels=colLevels[[1]] ) + retval[[col]] <- factor(retval[[col]], levels=colLevels[[firstNotNull]] ) } else { ## Check if longest set of levels is a superset of all others, ## and use that one longestIndex <- which.max( sapply(colLevels, length) ) - longestLevels <- colLevels[[longestIndex]] + longestLevels <- colLevels[[longestIndex]] allSubset <- sapply(colLevels[-longestIndex], function(l) all(l %in% longestLevels) ) @@ -144,7 +149,7 @@ # form superset by appending to longest level set levelSuperSet <- unique(c(longestLevels, unlist(colLevels))) retval[[col]] <- factor(retval[[col]], levels=levelSuperSet ) - + if(length(colClass)>1) # not just plain factor { browser() @@ -157,7 +162,7 @@ } } } - + attr(retval,"row.names") <- rowNameList class(retval) <- "data.frame" return(retval) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <wa...@us...> - 2014-05-28 00:15:00
|
Revision: 1814 http://sourceforge.net/p/r-gregmisc/code/1814 Author: warnes Date: 2014-05-28 00:14:56 +0000 (Wed, 28 May 2014) Log Message: ----------- smartbind: Convert non-native type columns (except factor) to character. Modified Paths: -------------- trunk/gtools/R/smartbind.R Modified: trunk/gtools/R/smartbind.R =================================================================== --- trunk/gtools/R/smartbind.R 2014-04-18 18:11:20 UTC (rev 1813) +++ trunk/gtools/R/smartbind.R 2014-05-28 00:14:56 UTC (rev 1814) @@ -63,18 +63,33 @@ " End:", end, " Column:", col, "\n", sep="") + if ("factor" %in% classVec) { newclass <- "character" } else - newclass <- classVec + newclass <- classVec[1] + ## Coerce everything that isn't a native type to character + if(! (newclass %in% c("logical", "integer", "numeric", + "complex", "character", "raw") )) + { + newclass <- "character" + warning("Converting non-atomic type column '", col, + "' to type character.") + } + retval[[col]] <- as.vector(rep(fill,nrows), mode=newclass) } - retval[[col]][start:end] <- as.vector(block[,col], - mode=class(retval[[col]])) + mode <- class(retval[[col]]) + if(mode=="character") + vals <- as.character(block[,col]) + else + vals <- block[,col] + + retval[[col]][start:end] <- as.vector(vals, mode=mode) } start <- end+1 blockIndex <- blockIndex+1 This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |