[R-gregmisc-users] SF.net SVN: r-gregmisc:[1565] trunk/gdata
Brought to you by:
warnes
From: <wa...@us...> - 2012-06-18 20:26:38
|
Revision: 1565 http://r-gregmisc.svn.sourceforge.net/r-gregmisc/?rev=1565&view=rev Author: warnes Date: 2012-06-18 20:26:32 +0000 (Mon, 18 Jun 2012) Log Message: ----------- read.xls() and supporting functions now allow blank lines to be preserved, rather than skipped, by supplying the argument "blank.lines.skip=FALSE". The underlying perl function has been extended to suppor this via an optional "-s" argument which, when present, *preserves* blank lines during the conversion. Modified Paths: -------------- trunk/gdata/R/xls2sep.R trunk/gdata/inst/perl/xls2csv.pl trunk/gdata/man/read.xls.Rd trunk/gdata/tests/test.read.xls.R trunk/gdata/tests/test.read.xls.Rout.save Modified: trunk/gdata/R/xls2sep.R =================================================================== --- trunk/gdata/R/xls2sep.R 2012-06-13 01:10:28 UTC (rev 1564) +++ trunk/gdata/R/xls2sep.R 2012-06-18 20:26:32 UTC (rev 1565) @@ -1,19 +1,30 @@ ## s$Id$ -xls2csv <- function(xls, sheet=1, verbose=FALSE, ..., perl="perl") - xls2sep(xls=xls, sheet=sheet, verbose=verbose, ..., method="csv", +xls2csv <- function(xls, sheet=1, verbose=FALSE, blank.lines.skip=TRUE, + ..., perl="perl") + xls2sep(xls=xls, sheet=sheet, verbose=verbose, + blank.lines.skip=blank.lines.skip, ..., method="csv", perl=perl) -xls2tab <- function(xls, sheet=1, verbose=FALSE, ..., perl="perl") - xls2sep(xls=xls, sheet=sheet, verbose=verbose, ..., method="tab", +xls2tab <- function(xls, sheet=1, verbose=FALSE, blank.lines.skip=TRUE, + ..., perl="perl") + xls2sep(xls=xls, sheet=sheet, verbose=verbose, + blank.lines.skip=blank.lines.skip, ..., method="tab", perl=perl) -xls2tsv <- function(xls, sheet=1, verbose=FALSE, ..., perl="perl") - xls2sep(xls=xls, sheet=sheet, verbose=verbose, ..., method="tsv", +xls2tsv <- function(xls, sheet=1, verbose=FALSE, blank.lines.skip=TRUE, + ..., perl="perl") + xls2sep(xls=xls, sheet=sheet, verbose=verbose, + blank.lines.skip=blank.lines.skip, ..., method="tsv", perl=perl) -xls2sep <- function(xls, sheet=1, verbose=FALSE, ..., - method=c("csv","tsv","tab"), perl = perl) +xls2sep <- function(xls, + sheet=1, + verbose=FALSE, + blank.lines.skip=TRUE, + ..., + method=c("csv","tsv","tab"), + perl = perl) { method <- match.arg(method) @@ -69,9 +80,18 @@ ## ## + ## blank.lines.skip + ## + if (blank.lines.skip) + skipBlank="" + else + skipBlank="-s" + + ## ## execution command cmd <- paste(shQuote(perl), shQuote(script), + skipBlank, # flag is not quoted shQuote(xls), shQuote(targetFile), shQuote(sheet), Modified: trunk/gdata/inst/perl/xls2csv.pl =================================================================== --- trunk/gdata/inst/perl/xls2csv.pl 2012-06-13 01:10:28 UTC (rev 1564) +++ trunk/gdata/inst/perl/xls2csv.pl 2012-06-18 20:26:32 UTC (rev 1565) @@ -11,6 +11,7 @@ #use Spreadsheet::XLSX; use POSIX; use File::Spec::Functions; +use Getopt::Std; ## # Try to load the modules we need @@ -26,7 +27,8 @@ my($row, $col, $sheet, $cell, $usage, $targetfile,$basename, $sheetnumber, $filename, $volume, $directories, $whoami, - $sep, $sepName, $sepLabel, $sepExt); + $sep, $sepName, $sepLabel, $sepExt, + $skipBlankLines, %switches); ## ## Figure out whether I'm called as xls2csv.pl or xls2tab.pl @@ -66,11 +68,11 @@ ## $usage = <<EOF; -$whoami <excel file> [<output file>] [<worksheet number>] +$whoami [-s] <excel file> [<output file>] [<worksheet number>] -Translate the Microsoft Excel spreadsheet file contained in -<excel file> into $sepName separated value format ($sepLabel) and -store in <output file>. +Translate the Microsoft Excel spreadsheet file contained in <excel +file> into $sepName separated value format ($sepLabel) and store in +<output file>, skipping blank lines unless "-s" is present. If <output file> is not specified, the output file will have the same name as the input file with '.xls', or 'xlsx' removed and '.$sepExt' @@ -85,6 +87,12 @@ ## parse arguments ## +# Handle switches (currently, just -s) +getopts('s', \%switches); +$skipBlankLines=!$switches{s}; + +# Now the rest of the arguments + if( !defined($ARGV[0]) ) { print $usage; @@ -253,12 +261,12 @@ } # skip blank/empty lines - if( $outputLine =~ /^[$sep ]*$/ ) - { - $cumulativeBlankLines++ - } + if( $skipBlankLines && ($outputLine =~ /^[$sep ]*$/) ) + { + $cumulativeBlankLines++ + } else - { + { print OutFile "$outputLine \n" } } @@ -266,7 +274,7 @@ close OutFile; print " (Ignored $cumulativeBlankLines blank lines.)\n" - if ($cumulativeBlankLines); + if $skipBlankLines; print "\n"; } Modified: trunk/gdata/man/read.xls.Rd =================================================================== --- trunk/gdata/man/read.xls.Rd 2012-06-13 01:10:28 UTC (rev 1564) +++ trunk/gdata/man/read.xls.Rd 2012-06-18 20:26:32 UTC (rev 1565) @@ -9,11 +9,11 @@ \usage{ read.xls(xls, sheet=1, verbose=FALSE, pattern, na.strings=c("NA","#DIV/0!"), ..., method=c("csv","tsv","tab"), perl="perl") -xls2csv(xls, sheet=1, verbose=FALSE, ..., perl="perl") -xls2tab(xls, sheet=1, verbose=FALSE, ..., perl="perl") -xls2tsv(xls, sheet=1, verbose=FALSE, ..., perl="perl") -xls2sep(xls, sheet=1, verbose=FALSE, ..., method=c("csv","tsv","tab"), - perl="perl") +xls2csv(xls, sheet=1, verbose=FALSE, blank.lines.skip=TRUE, ..., perl="perl") +xls2tab(xls, sheet=1, verbose=FALSE, blank.lines.skip=TRUE, ..., perl="perl") +xls2tsv(xls, sheet=1, verbose=FALSE, blank.lines.skip=TRUE, ..., perl="perl") +xls2sep(xls, sheet=1, verbose=FALSE, blank.lines.skip=TRUE, ..., + method=c("csv","tsv","tab"), perl="perl") } \arguments{ \item{xls}{path to the Microsoft Excel file. Supports "http://", @@ -27,7 +27,10 @@ \item{method}{intermediate file format, "csv" for comma-separated and "tab" for tab-separated} \item{na.strings}{a character vector of strings which are to be interpreted - as 'NA' values. See \code{\link[utils]{read.table}} for details.} + as 'NA' values. See \code{\link[utils]{read.table}} for + details.} + \item{blank.lines.skip}{logical flag indicating whether blank lines in + the orginal file should be ignored.} \item{...}{additional arguments to read.table. The defaults for read.csv() are used.} } Modified: trunk/gdata/tests/test.read.xls.R =================================================================== --- trunk/gdata/tests/test.read.xls.R 2012-06-13 01:10:28 UTC (rev 1564) +++ trunk/gdata/tests/test.read.xls.R 2012-06-18 20:26:32 UTC (rev 1565) @@ -70,3 +70,21 @@ data <- read.xls(exampleFile2007, sheet="Sheet with initial text", skip=2) print(data) } + + +## Check handling of skip.blank.lines=FALSE + +example.skip <- read.xls(exampleFile, sheet=2, blank.lines.skip=FALSE) +example.skip + +if( 'XLSX' %in% xlsFormats() ) + { + example.x.skip <- read.xls(exampleFile2007, sheet=2, blank.lines.skip=FALSE) + example.x.skip + } + + + + + + Modified: trunk/gdata/tests/test.read.xls.Rout.save =================================================================== --- trunk/gdata/tests/test.read.xls.Rout.save 2012-06-13 01:10:28 UTC (rev 1564) +++ trunk/gdata/tests/test.read.xls.Rout.save 2012-06-18 20:26:32 UTC (rev 1565) @@ -642,6 +642,36 @@ 3 NA ThirdRow 3 2 1 NA Red 4 NA FourthRow 4 3 2 1 Black > +> +> ## Check handling of skip.blank.lines=FALSE +> +> example.skip <- read.xls(exampleFile, sheet=2, blank.lines.skip=FALSE) +> example.skip + X D E. F G Factor +1 FirstRow 1 NA NA NA Red +2 SecondRow 2 1 NA NA Green +3 NA NA NA NA +4 ThirdRow 3 2 1 NA Red +5 FourthRow 4 3 2 1 Black +> +> if( 'XLSX' %in% xlsFormats() ) ++ { ++ example.x.skip <- read.xls(exampleFile2007, sheet=2, blank.lines.skip=FALSE) ++ example.x.skip ++ } + X D E. F G Factor +1 FirstRow 1 NA NA NA Red +2 SecondRow 2 1 NA NA Green +3 NA NA NA NA +4 ThirdRow 3 2 1 NA Red +5 FourthRow 4 3 2 1 Black +> +> +> +> +> +> +> > proc.time() user system elapsed - 2.916 0.357 3.366 + 3.259 0.383 3.748 This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |