[R-gregmisc-users] SF.net SVN: r-gregmisc:[1808] trunk/gtools
Brought to you by:
warnes
From: <wa...@us...> - 2014-04-17 16:56:37
|
Revision: 1808 http://sourceforge.net/p/r-gregmisc/code/1808 Author: warnes Date: 2014-04-17 16:56:34 +0000 (Thu, 17 Apr 2014) Log Message: ----------- Add ASCIIfy function posted to RDevel by Arni Magnusson Modified Paths: -------------- trunk/gtools/DESCRIPTION trunk/gtools/NAMESPACE Added Paths: ----------- trunk/gtools/R/ASCIIfy.R trunk/gtools/man/ASCIIfy.Rd Modified: trunk/gtools/DESCRIPTION =================================================================== --- trunk/gtools/DESCRIPTION 2014-04-10 02:32:21 UTC (rev 1807) +++ trunk/gtools/DESCRIPTION 2014-04-17 16:56:34 UTC (rev 1808) @@ -1,8 +1,8 @@ Package: gtools Title: Various R programming tools Description: Various R programming tools -Version: 3.3.1 -Date: 2014-03-01 +Version: 3.4.0 +Date: 2014-04-15 Author: Gregory R. Warnes, Ben Bolker, and Thomas Lumley Maintainer: Gregory R. Warnes <gr...@wa...> License: LGPL-2.1 Modified: trunk/gtools/NAMESPACE =================================================================== --- trunk/gtools/NAMESPACE 2014-04-10 02:32:21 UTC (rev 1807) +++ trunk/gtools/NAMESPACE 2014-04-17 16:56:34 UTC (rev 1808) @@ -4,6 +4,7 @@ addLast, ask, assert, + ASCIIfy, binsearch, capture, checkRVersion, Added: trunk/gtools/R/ASCIIfy.R =================================================================== --- trunk/gtools/R/ASCIIfy.R (rev 0) +++ trunk/gtools/R/ASCIIfy.R 2014-04-17 16:56:34 UTC (rev 1808) @@ -0,0 +1,39 @@ +ASCIIfy <- function(string, bytes=2, fallback="?") +{ + bytes <- match.arg(as.character(bytes), 1:2) + convert <- function(char) # convert to ASCII, e.g. "z", "\xfe", or "\u00fe" + { + raw <- charToRaw(char) + if(length(raw)==1 && raw<=127) # 7-bit + ascii <- char + else if(length(raw)==1 && bytes==1) # 8-bit to \x00 + ascii <- paste0("\\x", raw) + else if(length(raw)==1 && bytes==2) # 8-bit to \u0000 + ascii <- paste0("\\u", chartr(" ","0",formatC(as.character(raw),width=4))) + else if(length(raw)==2 && bytes==1) # 16-bit to \x00, if possible + if(utf8ToInt(char) <= 255) + ascii <- paste0("\\x", format.hexmode(utf8ToInt(char))) + else { + ascii <- fallback; warning(char, " could not be converted to 1 byte")} + else if(length(raw)==2 && bytes==2) # UTF-8 to \u0000 + ascii <- paste0("\\u", format.hexmode(utf8ToInt(char),width=4)) + else { + ascii <- fallback + warning(char, " could not be converted to ", bytes, " byte")} + return(ascii) + } + + if(length(string) > 1) + { + sapply(string, ASCIIfy, bytes=bytes, fallback=fallback, USE.NAMES=FALSE) + } + else + { + input <- unlist(strsplit(string,"")) # "c" "a" "f" "<\'e>" + output <- character(length(input)) # "" "" "" "" + for(i in seq_along(input)) + output[i] <- convert(input[i]) # "c" "a" "f" "\\u00e9" + output <- paste(output, collapse="") # "caf\\u00e9" + return(output) + } +} Added: trunk/gtools/man/ASCIIfy.Rd =================================================================== --- trunk/gtools/man/ASCIIfy.Rd (rev 0) +++ trunk/gtools/man/ASCIIfy.Rd 2014-04-17 16:56:34 UTC (rev 1808) @@ -0,0 +1,47 @@ +\name{ASCIIfy} +\alias{ASCIIfy} +\title{Convert Characters to ASCII} +\description{ + Convert character vector to ASCII, replacing non-ASCII characters with + single-byte (\samp{\x00}) or two-byte (\samp{\u0000}) codes. +} +\usage{ +ASCIIfy(x, bytes = 2, fallback = "?") +} +\arguments{ + \item{x}{a character vector, possibly containing non-ASCII + characters.} + \item{bytes}{either \code{1} or \code{2}, for single-byte + (\samp{\x00}) or two-byte (\samp{\u0000}) codes.} + \item{fallback}{an output character to use, when input characters + cannot be converted.} +} +\value{ + A character vector like \code{x}, except non-ASCII characters have + been replaced with \samp{\x00} or \samp{\u0000} codes. +} +\author{Arni Magnusson \email{ar...@ha...}} +\note{ + To render single backslashes, use these or similar techniques: + \verb{ + write(ASCIIfy(x), "file.txt") + cat(paste(ASCIIfy(x), collapse="\n"), "\n", sep="")} + + The resulting strings are plain ASCII and can be used in R functions + and datasets to improve package portability. +} +\seealso{ + \code{\link[tools]{showNonASCII}} identifies non-ASCII characters in + a character vector. +} +\examples{ +cities <- c("S\u00e3o Paulo", "Reykjav\u00edk") +print(cities) +ASCIIfy(cities, 1) +ASCIIfy(cities, 2) + +athens <- "\u0391\u03b8\u03ae\u03bd\u03b1" +print(athens) +ASCIIfy(athens) +} +\keyword{} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |