[R-gregmisc-users] SF.net SVN: r-gregmisc: [1123] trunk/SASxport/R
Brought to you by:
warnes
From: <wa...@us...> - 2007-08-08 18:53:47
|
Revision: 1123 http://r-gregmisc.svn.sourceforge.net/r-gregmisc/?rev=1123&view=rev Author: warnes Date: 2007-08-08 11:53:42 -0700 (Wed, 08 Aug 2007) Log Message: ----------- Updates Added Paths: ----------- trunk/SASxport/R/AFirst.lib.s trunk/SASxport/R/formats.R trunk/SASxport/R/iformat.R trunk/SASxport/R/importConvertDateTime.R trunk/SASxport/R/label.R trunk/SASxport/R/lookup.xport.R trunk/SASxport/R/makeNames.R trunk/SASxport/R/read.xport.R trunk/SASxport/R/testDateTime.R trunk/SASxport/R/units.R Removed Paths: ------------- trunk/SASxport/R/xport.R Added: trunk/SASxport/R/AFirst.lib.s =================================================================== --- trunk/SASxport/R/AFirst.lib.s (rev 0) +++ trunk/SASxport/R/AFirst.lib.s 2007-08-08 18:53:42 UTC (rev 1123) @@ -0,0 +1,19 @@ +## +## Code originally from Frank Harrell's 'Hmisc' library: +## http://biostat.mc.vanderbilt.edu/twiki/bin/view/Main/Hmisc +## Copied with permission on 2007-08-04 +## + +## $Id: AFirst.lib.s,v 1.6 2005/09/26 15:44:17 dupontct Exp $ +under.unix <- !(version$os=='Microsoft Windows' || + version$os=='Win32' || version$os=='mingw32') + +.R. <- TRUE +.SV4. <- FALSE + +.noGenenerics <- TRUE # faster loading as new methods not used + +if(!exists('existsFunction')) { + existsFunction <- function(...) exists(..., mode='function') +} + Added: trunk/SASxport/R/formats.R =================================================================== --- trunk/SASxport/R/formats.R (rev 0) +++ trunk/SASxport/R/formats.R 2007-08-08 18:53:42 UTC (rev 1123) @@ -0,0 +1,20 @@ +formats <- function(x, default) + UseMethod("formats") + +formats.default <- function(x, default=NULL) +{ + lab <- attr(x,"format") + if(is.null(lab)) + default + else + lab +} + +"formats<-" <- function(x, value) + UseMethod("formats<-") + +"formats<-.default" <- function(x, value) +{ + attr(x,'format') <- value + x +} Added: trunk/SASxport/R/iformat.R =================================================================== --- trunk/SASxport/R/iformat.R (rev 0) +++ trunk/SASxport/R/iformat.R 2007-08-08 18:53:42 UTC (rev 1123) @@ -0,0 +1,20 @@ +iformat <- function(x, default) + UseMethod("iformat") + +iformat.default <- function(x, default=NULL) +{ + lab <- attr(x,"iformat") + if(is.null(lab)) + default + else + lab +} + +"iformat<-" <- function(x, value) + UseMethod("iformat<-") + +"iformat<-.default" <- function(x, value) +{ + attr(x,'iformat') <- value + x +} Added: trunk/SASxport/R/importConvertDateTime.R =================================================================== --- trunk/SASxport/R/importConvertDateTime.R (rev 0) +++ trunk/SASxport/R/importConvertDateTime.R 2007-08-08 18:53:42 UTC (rev 1123) @@ -0,0 +1,40 @@ +importConvertDateTime <- + function(x, type=c('date','time','datetime'), + input=c('sas','spss','dataload'), form) +{ + type <- match.arg(type) + input <- match.arg(input) + + if(input != 'sas' && type != 'date') + stop('only date variables are support for spss, dataload') + + if(.R.) { + adjdays <- c(sas=3653, spss=141428, dataload=135080)[input] + ## 1970-1-1 minus 1960-1-1, 1582-10-14, or 1600-3-1 + if(input=='spss') x <- x/86400 + + switch(type, + date = structure(x - adjdays, class='Date'), + time = { + ## Don MacQueen 3Apr02 + z <- structure(x, class=c('POSIXt','POSIXct')) + f <- format(z, tz='GMT') + z <- as.POSIXct(format(z, tz='GMT'), tz='') + structure(z, class=c('timePOSIXt','POSIXt','POSIXct')) + }, + datetime = { + chron((x - adjdays*86400)/86400, + out.format=c(dates='day mon year', times='h:m:s')) + } + ) + } else if(.SV4.) + switch(type, + date = timeDate(julian=x, format=form), + time = timeDate(ms=x*1000, format=form), + datetime = timeDate(julian=x/86400, format=form)) + else + switch(type, + date = dates(x, out.format=form), + time = chron(x/86400, out.format=form), + datetime = chron(x/86400, out.format=form)) +} Added: trunk/SASxport/R/label.R =================================================================== --- trunk/SASxport/R/label.R (rev 0) +++ trunk/SASxport/R/label.R 2007-08-08 18:53:42 UTC (rev 1123) @@ -0,0 +1,20 @@ +label <- function(x, default) + UseMethod("label") + +label.default <- function(x, default=NULL) +{ + lab <- attr(x,"label") + if(is.null(lab)) + default + else + lab +} + +"label<-" <- function(x, value) + UseMethod("label<-") + +"label<-.default" <- function(x, value) +{ + attr(x,'label') <- value + x +} Added: trunk/SASxport/R/lookup.xport.R =================================================================== --- trunk/SASxport/R/lookup.xport.R (rev 0) +++ trunk/SASxport/R/lookup.xport.R 2007-08-08 18:53:42 UTC (rev 1123) @@ -0,0 +1,2 @@ +## Simply make this accessible here as a convenience to the user +lookup.xport <- foreign:::lookup.xport Added: trunk/SASxport/R/makeNames.R =================================================================== --- trunk/SASxport/R/makeNames.R (rev 0) +++ trunk/SASxport/R/makeNames.R 2007-08-08 18:53:42 UTC (rev 1123) @@ -0,0 +1,12 @@ +makeNames <- function(names, unique=FALSE, allow=NULL) +{ + ## Runs make.names with exceptions in vector allow + ## By default, R 1.9 make.names is overridden to convert _ to . as + ## with S-Plus and previous versions of R. Specify allow='_' otherwise. + if(!.R. & length(allow)) + stop('does not apply for S-Plus') + n <- make.names(names, unique) + if(!length(allow)) + n <- gsub('_', '.', n) + n +} Added: trunk/SASxport/R/read.xport.R =================================================================== --- trunk/SASxport/R/read.xport.R (rev 0) +++ trunk/SASxport/R/read.xport.R 2007-08-08 18:53:42 UTC (rev 1123) @@ -0,0 +1,214 @@ +read.xport <- function(file, + force.integer=TRUE, + formats=NULL, + name.chars=NULL, + names.tolower=TRUE, + keep=NULL, + drop=NULL, + as.is=0.95, # Prevent factor conversion if 95% or more unique + verbose=FALSE + ) + { + sasdateform <- + toupper(c("date","mmddyy","yymmdd","ddmmyy","yyq","monyy", + "julian","qtr","weekdate","weekdatx","weekday","month")) + sastimeform <- toupper(c("hhmm","hour","mmss","time")) + sasdatetimeform <- toupper(c("datetime","tod")) + + if(length(grep('http://', file))) { + tf <- tempfile() + download.file(file, tf, mode='wb', quiet=TRUE) + file <- tf + } + + if(verbose) + { + oldOptionsDebug <- options("DEBUG") + options(DEBUG=TRUE) + on.exit(options(DEBUG=oldOptionsDebug)) + } + + scat("Extracting data file information...") + dsinfo <- foreign:::lookup.xport(file) + + if(length(keep)) + whichds <- toupper(keep) + else + + whichds <- setdiff(names(dsinfo), c(toupper(drop),'_CONTENTS_','_contents_')) + + scat("Reading the data file...") + ds <- foreign:::read.xport(file) + + if( (length(keep)>0 || length(drop)>0) ) + ds <- ds[whichds] + + scat("Processing contents...") + ## PROC FORMAT CNTLOUT= dataset present? + fds <- NULL + if(!length(formats)) { + fds <- sapply(dsinfo, function(x) + all(c('FMTNAME','START','END','MIN','MAX','FUZZ') + %in% x$name)) + fds <- names(fds)[fds] + if(length(fds) > 1) { + warning('transport file contains more than one PROC FORMAT CNTLOUT= dataset; using only the first') + fds <- fds[1] + } + } + + finfo <- NULL + if(length(formats) || length(fds)) { + if(length(formats)) + finfo <- formats + else + finfo <- ds[[fds]] + + ## Remove leading $ from char format names + ## fmtname <- sub('^\\$','',as.character(finfo$FMTNAME)) + fmtname <- as.character(finfo$FMTNAME) + finfo <- split(finfo[c('START','END','LABEL')], fmtname) + finfo <- lapply(finfo, + function(f) + { + rb <- function(a) + { # remove leading + trailing blanks + a <- sub('[[:space:]]+$', '', as.character(a)) + sub('^[[:space:]]+', '', a) + } + + st <- rb(f$START) + en <- rb(f$END) + lab <- rb(f$LABEL) + ##j <- is.na(st) | is.na(en) + ## st %in% c('','.','NA') | en %in% c('','.','NA') + j <- is.na(st) | is.na(en) | st == '' | en == '' + if(any(j)) { + warning('NA in code in FORMAT definition; removed') + st <- st[!j]; en <- en[!j]; lab <- lab[!j] + } + + if(!all(st==en)) + return(NULL) + + list(value = all.is.numeric(st, 'vector'), + label = lab) + }) + } + + ## Number of non-format datasets + nods <- length(whichds) + nds <- nods - (length(formats) == 0 && length(finfo) > 0) + which.regular <- setdiff(whichds, fds) + dsn <- tolower(which.regular) + + + ## Handle lowercase name conversions + if(names.tolower) + names.tolower <- tolower + else + names.tolower <- function(x) x + + if(nds > 1) + { + res <- vector('list', nds) + names(res) <- gsub('_','.',dsn) + } + + possiblyConvertChar <- (is.logical(as.is) && !as.is) || + (is.numeric(as.is) && as.is > 0) + j <- 0 + for(k in which.regular) { + j <- j + 1 + scat('Processing SAS dataset', k) + w <- + if(nods==1) + ds + else ds[[k]] + + scat('.') + + if(!length(w)) { + scat('Empty dataset', k, 'ignored\n') + next + } + + nam <- names.tolower(makeNames(names(w), allow=name.chars)) + names(w) <- nam + dinfo <- dsinfo[[k]] + fmt <- sub('^\\$','',dinfo$format) + lab <- dinfo$label + ndinfo <- names.tolower(makeNames(dinfo$name, allow=name.chars)) + names(lab) <- names(fmt) <- ndinfo + for(i in 1:length(w)) { + changed <- FALSE + x <- w[[i]] + fi <- fmt[nam[i]]; names(fi) <- NULL + if(fi != '' && length(finfo) && (fi %in% names(finfo))) { + f <- finfo[[fi]] + if(length(f)) { ## may be NULL because had a range in format + x <- factor(x, f$value, f$label) + attr(x, 'format') <- fi + changed <- TRUE + } + } + + if(is.numeric(x)) { + if(fi %in% sasdateform) { + x <- importConvertDateTime(x, 'date', 'sas') + changed <- TRUE + } else if(fi %in% sastimeform) { + x <- importConvertDateTime(x, 'time', 'sas') + changed <- TRUE + } else if(fi %in% sasdatetimeform) { + x <- importConvertDateTime(x, 'datetime', 'sas') + changed <- TRUE + } else if(force.integer) { + if(all(is.na(x))) { + storage.mode(x) <- 'integer' + changed <- TRUE + } else if(max(abs(x),na.rm=TRUE) <= (2^31-1) && + all(floor(x) == x, na.rm=TRUE)) { + storage.mode(x) <- 'integer' + changed <- TRUE + } + } + } else if(possiblyConvertChar && is.character(x)) { + if((is.logical(as.is) && !as.is) || + (is.numeric(as.is) && length(unique(x)) < as.is*length(x))) { + x <- factor(x, exclude='') + changed <- TRUE + } + } + + lz <- lab[nam[i]] + if(lz != '') { + names(lz) <- NULL + label(x) <- lz + changed <- TRUE + } + + fmt <- fmt[nam[i]]; + if( !is.null(fmt) && !is.na(fmt) && fmt > '') { + names(fmt) <- NULL + formats(x) <- fmt + changed <- TRUE + } + + if(changed) + w[[i]] <- x + } + + scat('.') + + if(nds>1) + res[[j]] <- w + } + + scat("Done") + + + if(nds > 1) + res + else w + } Added: trunk/SASxport/R/testDateTime.R =================================================================== --- trunk/SASxport/R/testDateTime.R (rev 0) +++ trunk/SASxport/R/testDateTime.R 2007-08-08 18:53:42 UTC (rev 1123) @@ -0,0 +1,36 @@ + +## Determine if variable is a date, time, or date/time variable in R +## or S-Plus. The following 2 functions are used by describe.vector +## timeUsed assumes is date/time combination variable and has no NAs +testDateTime <- function(x, what=c('either','both','timeVaries')) +{ + what <- match.arg(what) + cl <- class(x) # was oldClass 22jun03 + if(!length(cl)) + return(FALSE) + + dc <- if(.R.) + c('Date', 'POSIXt','POSIXct','dates','times','chron') + else + c('timeDate','date','dates','times','chron') + + dtc <- if(.R.) + c('POSIXt','POSIXct','chron') + else + c('timeDate','chron') + + switch(what, + either = any(cl %in% dc), + both = any(cl %in% dtc), + timeVaries = { + if('chron' %in% cl || 'Date' %in% cl || !.R.) { + ## chron or S+ timeDate + y <- as.numeric(x) + length(unique(round(y - floor(y),13))) > 1 + } + else if(.R.) + length(unique(format(x,'%H%M%S'))) > 1 + else + FALSE + }) +} Added: trunk/SASxport/R/units.R =================================================================== --- trunk/SASxport/R/units.R (rev 0) +++ trunk/SASxport/R/units.R 2007-08-08 18:53:42 UTC (rev 1123) @@ -0,0 +1,20 @@ +units <- function(x, default) + UseMethod("units") + +units.default <- function(x, default=NULL) +{ + lab <- attr(x,"units") + if(is.null(lab)) + default + else + lab +} + +"units<-" <- function(x, value) + UseMethod("units<-") + +"units<-.default" <- function(x, value) +{ + attr(x,'units') <- value + x +} Deleted: trunk/SASxport/R/xport.R =================================================================== --- trunk/SASxport/R/xport.R 2007-08-08 18:53:04 UTC (rev 1122) +++ trunk/SASxport/R/xport.R 2007-08-08 18:53:42 UTC (rev 1123) @@ -1,31 +0,0 @@ -### -### Read SAS xport format libraries -### -### Copyright 1999-1999 Douglas M. Bates <bates$stat.wisc.edu>, -### Saikat DebRoy <saikat$stat.wisc.edu> -### -### This file is part of the `foreign' library for R and related languages. -### It is made available under the terms of the GNU General Public -### License, version 2, or at your option, any later version, -### incorporated herein by reference. -### -### This program is distributed in the hope that it will be -### useful, but WITHOUT ANY WARRANTY; without even the implied -### warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR -### PURPOSE. See the GNU General Public License for more -### details. -### -### You should have received a copy of the GNU General Public -### License along with this program; if not, write to the Free -### Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, -### MA 02110-1301, USA - -lookup.xport <- function(file) .Call(xport_info, file) - - -read.xport <- function(file) { - data.info <- lookup.xport(file) - ans <- .Call(xport_read, file, data.info) - if (length(ans) == 1) as.data.frame(ans[[1]]) - else lapply(ans, as.data.frame) -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |