[R-gregmisc-users] SF.net SVN: r-gregmisc: [1183] trunk/SASxport
Brought to you by:
warnes
From: <wa...@us...> - 2007-09-14 15:35:45
|
Revision: 1183 http://r-gregmisc.svn.sourceforge.net/r-gregmisc/?rev=1183&view=rev Author: warnes Date: 2007-09-14 08:35:40 -0700 (Fri, 14 Sep 2007) Log Message: ----------- Add option to read.xport() that permits inclusion of PROC CONTENTS format information in the returned list Modified Paths: -------------- trunk/SASxport/R/read.xport.R trunk/SASxport/inst/ChangeLog trunk/SASxport/man/lookup.xport.Rd trunk/SASxport/man/read.xport.Rd trunk/SASxport/tests/TestUnnamedComponents.Rout.save Added Paths: ----------- trunk/SASxport/R/process.formats.R trunk/SASxport/data/ trunk/SASxport/data/Alfalfa.R trunk/SASxport/data/Alfalfa.xpt trunk/SASxport/man/Alfalfa.Rd Added: trunk/SASxport/R/process.formats.R =================================================================== --- trunk/SASxport/R/process.formats.R (rev 0) +++ trunk/SASxport/R/process.formats.R 2007-09-14 15:35:40 UTC (rev 1183) @@ -0,0 +1,38 @@ +## Transform SAS 'PROC CONTENTS' dataset into a form useful for +## converting raw SAS objects to/from the appropriate R objects. + +process.formats <- function(finfo) + { + if(is.null(finfo)) return( list() ) + ## Remove leading $ from char format names + ## fmtname <- sub('^\\$','',as.character(finfo$FMTNAME)) + fmtname <- as.character(finfo$FMTNAME) + finfo <- split(finfo[c('START','END','LABEL')], fmtname) + finfo <- lapply(finfo, + function(f) + { + rb <- function(a) + { # remove leading + trailing blanks + a <- sub('[[:space:]]+$', '', as.character(a)) + sub('^[[:space:]]+', '', a) + } + + st <- rb(f$START) + en <- rb(f$END) + lab <- rb(f$LABEL) + ##j <- is.na(st) | is.na(en) + ## st %in% c('','.','NA') | en %in% c('','.','NA') + j <- is.na(st) | is.na(en) | st == '' | en == '' + if(any(j)) { + warning('NA in code in FORMAT definition; removed') + st <- st[!j]; en <- en[!j]; lab <- lab[!j] + } + + if(!all(st==en)) + stop("Format ranges are not handled.") + + list(value = all.is.numeric(st, 'vector'), + label = lab) + }) + finfo + } Modified: trunk/SASxport/R/read.xport.R =================================================================== --- trunk/SASxport/R/read.xport.R 2007-09-14 14:37:48 UTC (rev 1182) +++ trunk/SASxport/R/read.xport.R 2007-09-14 15:35:40 UTC (rev 1183) @@ -13,7 +13,8 @@ drop=NULL, as.is=0.95, # Prevent factor conversion if 95% or more unique verbose=FALSE, - as.list=FALSE + as.list=FALSE, + include.formats=FALSE ) { sasdateform <- @@ -84,40 +85,9 @@ finfo <- NULL if(length(formats) || length(fds)) { if(length(formats)) - finfo <- formats + finfo <- process.formats(formats) else - finfo <- ds[[fds]] - - ## Remove leading $ from char format names - ## fmtname <- sub('^\\$','',as.character(finfo$FMTNAME)) - fmtname <- as.character(finfo$FMTNAME) - finfo <- split(finfo[c('START','END','LABEL')], fmtname) - finfo <- lapply(finfo, - function(f) - { - rb <- function(a) - { # remove leading + trailing blanks - a <- sub('[[:space:]]+$', '', as.character(a)) - sub('^[[:space:]]+', '', a) - } - - st <- rb(f$START) - en <- rb(f$END) - lab <- rb(f$LABEL) - ##j <- is.na(st) | is.na(en) - ## st %in% c('','.','NA') | en %in% c('','.','NA') - j <- is.na(st) | is.na(en) | st == '' | en == '' - if(any(j)) { - warning('NA in code in FORMAT definition; removed') - st <- st[!j]; en <- en[!j]; lab <- lab[!j] - } - - if(!all(st==en)) - return(NULL) - - list(value = all.is.numeric(st, 'vector'), - label = lab) - }) + finfo <- process.formats(ds[[fds]]) } ## Number of non-format datasets @@ -165,7 +135,8 @@ for(i in 1:length(w)) { changed <- FALSE x <- w[[i]] - fi <- fmt[nam[i]]; names(fi) <- NULL + fi <- fmt[nam[i]]; + names(fi) <- NULL if(fi != '' && length(finfo) && (fi %in% names(finfo))) { f <- finfo[[fi]] if(length(f)) { ## may be NULL because had a range in format @@ -227,6 +198,13 @@ } scat("Done") + + if(include.formats) + { + nds <- nds+1 + res$"FORMATS" <- ds[[fds]] + } + if(nds > 1 || as.list) res Added: trunk/SASxport/data/Alfalfa.R =================================================================== --- trunk/SASxport/data/Alfalfa.R (rev 0) +++ trunk/SASxport/data/Alfalfa.R 2007-09-14 15:35:40 UTC (rev 1183) @@ -0,0 +1,3 @@ +library(SASxport) + +Alfalfa <- read.xport("Alfalfa.xpt") Added: trunk/SASxport/data/Alfalfa.xpt =================================================================== (Binary files differ) Property changes on: trunk/SASxport/data/Alfalfa.xpt ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Modified: trunk/SASxport/inst/ChangeLog =================================================================== --- trunk/SASxport/inst/ChangeLog 2007-09-14 14:37:48 UTC (rev 1182) +++ trunk/SASxport/inst/ChangeLog 2007-09-14 15:35:40 UTC (rev 1183) @@ -1,3 +1,94 @@ +2007-09-13 01:55 warnes + + * tests/TestUnnamedComponents.Rout.save: Update test output to + match recent changes. + +2007-09-13 01:19 warnes + + * R/write.xport.R: Add checking and handling for unnamed data + frames or variables + +2007-09-13 01:14 warnes + + * tests/Alfalfa_Test.Rout.save, tests/TestUnnamedComponents.R, + tests/TestUnnamedComponents.Rout.save, tests/Theoph.Rout.save, + tests/cars.Rout.save, tests/testDates.Rout.save, + tests/test_fields.Rout.save, tests/xport.Rout.save, + tests/xxx.Rout.save: Update tests now that 'units' and 'units<-' + functions no longer are included + +2007-09-12 22:27 warnes + + * NAMESPACE: Remove units() and units<-() functions since they + arene't ever used. + +2007-09-12 22:25 warnes + + * R/units.R, man/label.Rd, man/units.Rd: Remove units() and + units<-() functions since they arene't ever used. + +2007-09-12 22:24 warnes + + * man/units.Rd: Remove units from manual page + +2007-09-11 23:08 warnes + + * man/read.xport.Rd: Add assertion to test that read.xport(.., + as.list=TRUE) works properly + +2007-09-11 23:05 warnes + + * R/write.xport.R: Forgot to save buffer before svn commit. + +2007-09-11 21:22 warnes + + * R/read.xport.R, man/read.xport.Rd: Correct error in handling + 'verbose' argument, error when more than one dataset has the same + name, and add 'as.list' argument to ensure return value is a + list, even if there is only one dataset in the file + +2007-09-11 21:21 warnes + + * R/write.xport.R, man/write.xport.Rd: Improve handling of list + argument. Also check that names are proper and unique. + +2007-09-07 16:47 warnes + + * tests/Theoph.R, tests/Theoph.Rout.save: Add round-trip test for + Theoph data set + +2007-09-07 16:32 warnes + + * tests/Alfalfa_Test.Rout.save, tests/cars.Rout.save, + tests/testDates.Rout.save, tests/xxx.Rout.save: Change argument + name in write.xport from 'filename' to 'file' to match read.xport + +2007-09-07 16:25 warnes + + * man/write.xport.Rd: Change argument name in write.xport from + 'filename' to 'file' to match read.xport + +2007-09-07 16:23 warnes + + * R/write.xport.R, man/read.xport.Rd, man/write.xport.Rd, + tests/Alfalfa_Test.R, tests/cars.R, tests/testDates.R, + tests/xxx.R: Change argument name in write.xport from 'filename' + to 'file' to match read.xport + +2007-09-07 16:21 warnes + + * DESCRIPTION: Fix typo + +2007-08-29 02:24 warnes + + * DESCRIPTION: Update Version to 1.0, depend on current version of + foreign + +2007-08-22 19:21 warnes + + * ChangeLog, NEWS, inst/ChangeLog, inst/NEWS: Update ChangeLog and + NEWS files + 2007-08-22 18:11 warnes * R/read.xport.R, man/read.xport.Rd, tests/xport.Rout.save: Modify Added: trunk/SASxport/man/Alfalfa.Rd =================================================================== --- trunk/SASxport/man/Alfalfa.Rd (rev 0) +++ trunk/SASxport/man/Alfalfa.Rd 2007-09-14 15:35:40 UTC (rev 1183) @@ -0,0 +1,54 @@ +\name{Alfalfa} +\alias{Alfalfa} +\docType{data} +\title{ Example SAS data set } +\description{ + This data set exists to provide an example file for lookup.xport() and + read.xport() +} +\usage{data(Alfalfa)} +\format{ + A data frame with 40 observations on the following 6 variables. + \describe{ + \item{\code{POP}}{Population, a factor with levels \code{MAX} amd \code{min}} + \item{\code{SAMPLE}}{Sample ID (0:5)} + \item{\code{REP}}{Replicate (always 1)} + \item{\code{SEEDWT}}{Sed weight} + \item{\code{HARV1}}{Harvest 1 volume} + \item{\code{HARV2}}{Harvest 2 volume} + } +} +\details{ + Population "MAX" has slightly higher harvest volumes (\code{HARV1} and + \code{HARV2}) than population "min". (Surprise! Shock! Awe!) + } +\source{ + The 'Alfalfa.xpt' file was obtained from the R 'foreign' package. +} +\examples{ +data(Alfalfa) + +# go were the data is... +here <- getwd() +setwd(file.path(.path.package("SASxport"),"data")) + +# Description of the file contents +lookup.xport("Alfalfa.xpt") + +# Load the file contents +Alfalfa <- read.xport("Alfalfa.xpt") +head(Alfalfa) + +# return home +setwd(here) + +# Just for fun, plot the data +par(mfrow=c(1,2)) +plot( HARV1 ~ POP, data=Alfalfa) +plot( HARV2 ~ POP, data=Alfalfa) + + + + +} +\keyword{datasets} Modified: trunk/SASxport/man/lookup.xport.Rd =================================================================== --- trunk/SASxport/man/lookup.xport.Rd 2007-09-14 14:37:48 UTC (rev 1182) +++ trunk/SASxport/man/lookup.xport.Rd 2007-09-14 15:35:40 UTC (rev 1183) @@ -44,11 +44,13 @@ for \code{\link[foreign]{lookup.xport}}. } \examples{ -\dontrun{ +\dontshow{ +setwd(file.path(.path.package("SASxport"),"data")) +} ## Get information on a local file -lookup.xport("xxx.xpt") -} +lookup.xport("Alfalfa.xpt") + ## Or read a copy of test2.xpt available on the web: \dontrun{ url <- 'http://biostat.mc.vanderbilt.edu/cgi-bin/viewvc.cgi/*checkout*/Hmisc/trunk/tests/test2.xpt' Modified: trunk/SASxport/man/read.xport.Rd =================================================================== --- trunk/SASxport/man/read.xport.Rd 2007-09-14 14:37:48 UTC (rev 1182) +++ trunk/SASxport/man/read.xport.Rd 2007-09-14 15:35:40 UTC (rev 1183) @@ -14,7 +14,8 @@ drop=NULL, as.is=0.95, verbose=FALSE, - as.list=FALSE + as.list=FALSE, + include.formats=FALSE ) } \arguments{ @@ -63,14 +64,18 @@ during the data loading and conversion process.} \item{as.list}{Logical indicating whether to return a list even if the SAS xport file contains only only one dataset.} + \item{include.formats}{Logical indicating whether to include SAS + format information (if present) in the returned list} } \value{ - If there the trasport file only contains one dataset (not counting any - \code{PROC FORMAT} datasets) and \code{as.list=FALSE}, the result is a - single data set. Otherwise the result is a list - containing all the non-\code{PROC FORMAT} datasets. + If only a single dataset is present (after removing \code{PROC FORMAT} + data when \code{include.formats=FALSE}), the return value is a single + dataframe object. Otherwise the return is a list of dataframe objects. + + Note that if \code{include.formats=TRUE}, the returned list will + contain a dataframe named "FORMATS" containing any available 'PROC FORMAT' + information. } - \details{ \itemize{ @@ -163,6 +168,9 @@ # We can also get the dataset wrapped in a list w <- read.xport(url, as.list=TRUE) +# And we can ask for the format information to be included as well. +w <- read.xport(url, as.list=TRUE, include.formats=TRUE) + \dontshow{ SASxport:::assert( is.data.frame(w)==FALSE && is.list(w)==TRUE ) } Modified: trunk/SASxport/tests/TestUnnamedComponents.Rout.save =================================================================== --- trunk/SASxport/tests/TestUnnamedComponents.Rout.save 2007-09-14 14:37:48 UTC (rev 1182) +++ trunk/SASxport/tests/TestUnnamedComponents.Rout.save 2007-09-14 15:35:40 UTC (rev 1183) @@ -74,6 +74,9 @@ rd.xpr> # We can also get the dataset wrapped in a list rd.xpr> w <- read.xport(url, as.list=TRUE) +rd.xpr> # And we can ask for the format information to be included as well. +rd.xpr> w <- read.xport(url, as.list=TRUE, include.formats=TRUE) + rd.xpr> ## Don't show: rd.xpr> SASxport:::assert( is.data.frame(w)==FALSE && is.list(w)==TRUE ) > write.xport(w$test,file="a.xpt") @@ -228,6 +231,30 @@ Z.X7 Z X7 numeric 8 100 Z.X8 Z X8 numeric 8 100 +Variables in data set `FORMATS': + dataset name type format width label nobs +FORMATS.FMTNAME FORMATS FMTNAME character 8 3 +FORMATS.START FORMATS START character 16 3 +FORMATS.END FORMATS END character 16 3 +FORMATS.LABEL FORMATS LABEL character 8 3 +FORMATS.MIN FORMATS MIN numeric 8 3 +FORMATS.MAX FORMATS MAX numeric 8 3 +FORMATS.DEFAULT FORMATS DEFAULT numeric 8 3 +FORMATS.LENGTH FORMATS LENGTH numeric 8 3 +FORMATS.FUZZ FORMATS FUZZ numeric 8 3 +FORMATS.PREFIX FORMATS PREFIX character 8 3 +FORMATS.MULT FORMATS MULT numeric 8 3 +FORMATS.FILL FORMATS FILL character 8 3 +FORMATS.NOEDIT FORMATS NOEDIT numeric 8 3 +FORMATS.TYPE FORMATS TYPE character 8 3 +FORMATS.SEXCL FORMATS SEXCL character 8 3 +FORMATS.EEXCL FORMATS EEXCL character 8 3 +FORMATS.HLO FORMATS HLO character 8 3 +FORMATS.DECSEP FORMATS DECSEP character 8 3 +FORMATS.DIG3SEP FORMATS DIG3SEP character 8 3 +FORMATS.DATATYPE FORMATS DATATYPE character 8 3 +FORMATS.LANGUAGE FORMATS LANGUAGE character 8 3 + > > names(w) <- NULL > write.xport(w[[1]],w[[2]],file="a.xpt") @@ -284,6 +311,30 @@ NONAME.1.X7 NONAME.1 X7 numeric 8 100 NONAME.1.X8 NONAME.1 X8 numeric 8 100 +Variables in data set `NONAME.2': + dataset name type format width label nobs +NONAME.2.FMTNAME NONAME.2 FMTNAME character 8 3 +NONAME.2.START NONAME.2 START character 16 3 +NONAME.2.END NONAME.2 END character 16 3 +NONAME.2.LABEL NONAME.2 LABEL character 8 3 +NONAME.2.MIN NONAME.2 MIN numeric 8 3 +NONAME.2.MAX NONAME.2 MAX numeric 8 3 +NONAME.2.DEFAULT NONAME.2 DEFAULT numeric 8 3 +NONAME.2.LENGTH NONAME.2 LENGTH numeric 8 3 +NONAME.2.FUZZ NONAME.2 FUZZ numeric 8 3 +NONAME.2.PREFIX NONAME.2 PREFIX character 8 3 +NONAME.2.MULT NONAME.2 MULT numeric 8 3 +NONAME.2.FILL NONAME.2 FILL character 8 3 +NONAME.2.NOEDIT NONAME.2 NOEDIT numeric 8 3 +NONAME.2.TYPE NONAME.2 TYPE character 8 3 +NONAME.2.SEXCL NONAME.2 SEXCL character 8 3 +NONAME.2.EEXCL NONAME.2 EEXCL character 8 3 +NONAME.2.HLO NONAME.2 HLO character 8 3 +NONAME.2.DECSEP NONAME.2 DECSEP character 8 3 +NONAME.2.DIG3SEP NONAME.2 DIG3SEP character 8 3 +NONAME.2.DATATYPE NONAME.2 DATATYPE character 8 3 +NONAME.2.LANGUAGE NONAME.2 LANGUAGE character 8 3 + > > ### Check that we catch invalid parameters > failure <- try( write.xport(5,"a.xpt") ) @@ -352,6 +403,9 @@ rd.xpr> # We can also get the dataset wrapped in a list rd.xpr> w <- read.xport(url, as.list=TRUE) +rd.xpr> # And we can ask for the format information to be included as well. +rd.xpr> w <- read.xport(url, as.list=TRUE, include.formats=TRUE) + rd.xpr> ## Don't show: rd.xpr> SASxport:::assert( is.data.frame(w)==FALSE && is.list(w)==TRUE ) > write.xport(list=base::list(w$test,w$z),file="a.xpt") @@ -437,6 +491,9 @@ rd.xpr> # We can also get the dataset wrapped in a list rd.xpr> w <- read.xport(url, as.list=TRUE) +rd.xpr> # And we can ask for the format information to be included as well. +rd.xpr> w <- read.xport(url, as.list=TRUE, include.formats=TRUE) + rd.xpr> ## Don't show: rd.xpr> SASxport:::assert( is.data.frame(w)==FALSE && is.list(w)==TRUE ) > names(w) <- NULL @@ -466,6 +523,30 @@ NONAME.1.X7 NONAME.1 X7 numeric 8 100 NONAME.1.X8 NONAME.1 X8 numeric 8 100 +Variables in data set `NONAME.2': + dataset name type format width label nobs +NONAME.2.FMTNAME NONAME.2 FMTNAME character 8 3 +NONAME.2.START NONAME.2 START character 16 3 +NONAME.2.END NONAME.2 END character 16 3 +NONAME.2.LABEL NONAME.2 LABEL character 8 3 +NONAME.2.MIN NONAME.2 MIN numeric 8 3 +NONAME.2.MAX NONAME.2 MAX numeric 8 3 +NONAME.2.DEFAULT NONAME.2 DEFAULT numeric 8 3 +NONAME.2.LENGTH NONAME.2 LENGTH numeric 8 3 +NONAME.2.FUZZ NONAME.2 FUZZ numeric 8 3 +NONAME.2.PREFIX NONAME.2 PREFIX character 8 3 +NONAME.2.MULT NONAME.2 MULT numeric 8 3 +NONAME.2.FILL NONAME.2 FILL character 8 3 +NONAME.2.NOEDIT NONAME.2 NOEDIT numeric 8 3 +NONAME.2.TYPE NONAME.2 TYPE character 8 3 +NONAME.2.SEXCL NONAME.2 SEXCL character 8 3 +NONAME.2.EEXCL NONAME.2 EEXCL character 8 3 +NONAME.2.HLO NONAME.2 HLO character 8 3 +NONAME.2.DECSEP NONAME.2 DECSEP character 8 3 +NONAME.2.DIG3SEP NONAME.2 DIG3SEP character 8 3 +NONAME.2.DATATYPE NONAME.2 DATATYPE character 8 3 +NONAME.2.LANGUAGE NONAME.2 LANGUAGE character 8 3 + > > # remove variable names > example(read.xport) @@ -523,6 +604,9 @@ rd.xpr> # We can also get the dataset wrapped in a list rd.xpr> w <- read.xport(url, as.list=TRUE) +rd.xpr> # And we can ask for the format information to be included as well. +rd.xpr> w <- read.xport(url, as.list=TRUE, include.formats=TRUE) + rd.xpr> ## Don't show: rd.xpr> SASxport:::assert( is.data.frame(w)==FALSE && is.list(w)==TRUE ) > colnames(w[[2]]) <- c() @@ -553,4 +637,28 @@ Z.NONAME_3 Z NONAME_3 numeric 8 100 Z.NONAME_4 Z NONAME_4 numeric 8 100 +Variables in data set `FORMATS': + dataset name type format width label nobs +FORMATS.FMTNAME FORMATS FMTNAME character 8 3 +FORMATS.START FORMATS START character 16 3 +FORMATS.END FORMATS END character 16 3 +FORMATS.LABEL FORMATS LABEL character 8 3 +FORMATS.MIN FORMATS MIN numeric 8 3 +FORMATS.MAX FORMATS MAX numeric 8 3 +FORMATS.DEFAULT FORMATS DEFAULT numeric 8 3 +FORMATS.LENGTH FORMATS LENGTH numeric 8 3 +FORMATS.FUZZ FORMATS FUZZ numeric 8 3 +FORMATS.PREFIX FORMATS PREFIX character 8 3 +FORMATS.MULT FORMATS MULT numeric 8 3 +FORMATS.FILL FORMATS FILL character 8 3 +FORMATS.NOEDIT FORMATS NOEDIT numeric 8 3 +FORMATS.TYPE FORMATS TYPE character 8 3 +FORMATS.SEXCL FORMATS SEXCL character 8 3 +FORMATS.EEXCL FORMATS EEXCL character 8 3 +FORMATS.HLO FORMATS HLO character 8 3 +FORMATS.DECSEP FORMATS DECSEP character 8 3 +FORMATS.DIG3SEP FORMATS DIG3SEP character 8 3 +FORMATS.DATATYPE FORMATS DATATYPE character 8 3 +FORMATS.LANGUAGE FORMATS LANGUAGE character 8 3 + > This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |