[R-gregmisc-users] SF.net SVN: r-gregmisc: [1202] trunk/SASxport
Brought to you by:
warnes
From: <wa...@us...> - 2007-11-01 06:01:53
|
Revision: 1202 http://r-gregmisc.svn.sourceforge.net/r-gregmisc/?rev=1202&view=rev Author: warnes Date: 2007-10-31 23:01:50 -0700 (Wed, 31 Oct 2007) Log Message: ----------- Copy code from foreign for lookup.xport() and read.xport(), extend lookup.xport() to show information about SAS format and iformat Added Paths: ----------- trunk/SASxport/R/xport.R trunk/SASxport/src/SASxport.c trunk/SASxport/src/SASxport.h Added: trunk/SASxport/R/xport.R =================================================================== --- trunk/SASxport/R/xport.R (rev 0) +++ trunk/SASxport/R/xport.R 2007-11-01 06:01:50 UTC (rev 1202) @@ -0,0 +1,32 @@ +### This file is part of the 'foreign' package for R. + +### +### Read SAS xport format libraries +### +### Copyright 1999-1999 Douglas M. Bates <bates$stat.wisc.edu>, +### Saikat DebRoy <saikat$stat.wisc.edu> +### +### This file is part of the `foreign' library for R and related languages. +### It is made available under the terms of the GNU General Public +### License, version 2, or at your option, any later version, +### incorporated herein by reference. +### +### This program is distributed in the hope that it will be +### useful, but WITHOUT ANY WARRANTY; without even the implied +### warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +### PURPOSE. See the GNU General Public License for more +### details. +### +### You should have received a copy of the GNU General Public +### License along with this program; if not, a copy is available at +### http://www.r-project.org/Licenses/ + +lookup.xport.inner <- function(file) .Call(xport_info, file) + + +read.xport.inner <- function(file) { + data.info <- lookup.xport.inner(file) + ans <- .Call(xport_read, file, data.info) + if (length(ans) == 1) as.data.frame(ans[[1]]) + else lapply(ans, as.data.frame) +} Added: trunk/SASxport/src/SASxport.c =================================================================== --- trunk/SASxport/src/SASxport.c (rev 0) +++ trunk/SASxport/src/SASxport.c 2007-11-01 06:01:50 UTC (rev 1202) @@ -0,0 +1,722 @@ +/* + * + * Read SAS transport data set format + * + * Copyright 1999-1999 Douglas M. Bates <ba...@st...>, + * Saikat DebRoy <sa...@st...> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, a copy is available at + * http://www.r-project.org/Licenses/ + */ + +#include <stdio.h> +#include <string.h> +#include <R.h> +#include <Rinternals.h> +//#include "foreign.h" +#include "SASxport.h" + + +#ifdef ENABLE_NLS +#include <libintl.h> +#define _(String) dgettext ("foreign", String) +#define gettext_noop(String) (String) +#else +#define _(String) (String) +#define gettext_noop(String) (String) +#endif + + +#define HEADER_BEG "HEADER RECORD*******" +#define HEADER_TYPE_LIBRARY "LIBRARY " +#define HEADER_TYPE_MEMBER "MEMBER " +#define HEADER_TYPE_DSCRPTR "DSCRPTR " +#define HEADER_TYPE_NAMESTR "NAMESTR " +#define HEADER_TYPE_OBS "OBS " +#define HEADER_END "HEADER RECORD!!!!!!!000000000000000000000000000000 " + +#define LIB_HEADER HEADER_BEG HEADER_TYPE_LIBRARY HEADER_END +#define MEM_HEADER HEADER_BEG HEADER_TYPE_MEMBER \ + "HEADER RECORD!!!!!!!000000000000000001600000000" +#define DSC_HEADER HEADER_BEG HEADER_TYPE_DSCRPTR HEADER_END +#define NAM_HEADER HEADER_BEG HEADER_TYPE_NAMESTR \ + "HEADER RECORD!!!!!!!000000" +#define OBS_HEADER HEADER_BEG HEADER_TYPE_OBS HEADER_END +#define BLANK24 " " + +#define GET_RECORD(rec, fp, len) \ + fread((rec), sizeof(char), (size_t) (len), (fp)) + +#define IS_SASNA_CHAR(c) ((c) == 0x5f || (c) == 0x2e || \ + (0x41 <= (c) && (c) <= 0x5a)) + +#ifndef NULL +#define NULL ((void *) 0) +#endif + + +#define Two32 4294967296.0 + +static double get_IBM_double(char* c, size_t len) +{ + /* Conversion from IBM 360 format to double */ +/* + * IBM format: + * 6 5 0 + * 3 1 0 + * + * SEEEEEEEMMMM ......... MMMM + * + * Sign bit, 7 bit exponent, 56 bit fraction. Exponent is + * excess 64. The fraction is multiplied by a power of 16 of + * the actual exponent. Normalized floating point numbers are + * represented with the radix point immediately to the left of + * the high order hex fraction digit. + */ + unsigned int i, upper, lower; + /* exponent is expressed here as + excess 70 (=64+6) to accomodate + integer conversion of c[1] to c[4] */ + char negative = c[0] & 0x80, exponent = (c[0] & 0x7f) - 70, buf[4]; + double value; + char ibuf[8]; + + if (len < 2 || len > 8) + error(_("invalid field length in numeric variable")); + + /* this effectively zero-pads c: */ + memset(ibuf, 0, (size_t) 8); + memcpy(ibuf, c, len); + c = ibuf; + /* check for missing value */ + /* This isn't really right: NAs are ' ', '.', A-Z plus zero fill */ + if (c[1] == '\0' && c[0] != '\0') return R_NaReal; + /* convert c[1] to c[3] to an int */ + buf[0] = '\0'; + for (i = 1; i < 4; i++) buf[i] = c[i]; + char_to_uint(buf, upper); + /* convert c[4] to c[7] to an int */ + for (i = 0; i < 4; i++) buf[i] = c[i + 4]; + char_to_uint(buf, lower); + /* initialize the constant if needed */ + value = ((double) upper + ((double) lower)/Two32) * + pow(16., (double) exponent); + if (negative) value = -value; + return value; +} + +static int +get_nam_header(FILE *fp, struct SAS_XPORT_namestr *namestr, int length) +{ + char record[141]; + int n; + + record[length] = '\0'; + n = GET_RECORD(record, fp, length); + if(n != length) + return 0; + + char_to_short(record, namestr->ntype); + char_to_short(record+2, namestr->nhfun); + char_to_short(record+4, namestr->nlng); + char_to_short(record+6, namestr->nvar0); + memcpy(namestr->nname, record + 8, 8); + memcpy(namestr->nlabel, record + 16, 40); + memcpy(namestr->nform, record + 56, 8); + char_to_short(record+64, namestr->nfl); + char_to_short(record+66, namestr->nfd); + char_to_short(record+68, namestr->nfj); + memcpy(namestr->nfill, record + 70, 2); + memcpy(namestr->niform, record + 72, 8); + char_to_short(record+80, namestr->nifl); + char_to_short(record+82, namestr->nifd); + char_to_int(record+84, namestr->npos); + return 1; +} + +static int +get_lib_header(FILE *fp, struct SAS_XPORT_header *head) +{ + char record[81]; + int n; + + n = GET_RECORD(record, fp, 80); + if(n == 80 && strncmp(LIB_HEADER, record, 80) != 0) + error(_("file not in SAS transfer format")); + + n = GET_RECORD(record, fp, 80); + if(n != 80) + return 0; + record[80] = '\0'; + memcpy(head->sas_symbol[0], record, 8); + memcpy(head->sas_symbol[1], record+8, 8); + memcpy(head->saslib, record+16, 8); + memcpy(head->sasver, record+24, 8); + memcpy(head->sas_os, record+32, 8); + if((strrchr(record+40, ' ') - record) != 63) + return 0; + memcpy(head->sas_create, record+64, 16); + + n = GET_RECORD(record, fp, 80); + if(n != 80) + return 0; + record[80] = '\0'; + memcpy(head->sas_mod, record, 16); + if((strrchr(record+16, ' ') - record) != 79) + return 0; + return 1; +} + +static int +get_mem_header(FILE *fp, struct SAS_XPORT_member *member) +{ + char record[81]; + int n; + + n = GET_RECORD(record, fp, 80); + if(n != 80 || strncmp(DSC_HEADER, record, 80) != 0) + error(_("file not in SAS transfer format")); + + n = GET_RECORD(record, fp, 80); + if(n != 80) + return 0; + record[80] = '\0'; + memcpy(member->sas_symbol, record, 8); + memcpy(member->sas_dsname, record+8, 8); + memcpy(member->sasdata, record+16, 8); + memcpy(member->sasver, record+24, 8); + memcpy(member->sas_osname, record+32, 8); + if((strrchr(record+40, ' ') - record) != 63) + return 0; + memcpy(member->sas_create, record+64, 16); + + n = GET_RECORD(record, fp, 80); + if(n != 80) + return 0; + memcpy(member->sas_mod, record, 16); + if((strrchr(record+16, ' ') - record) != 79) + return 0; + return 1; +} + +static int +init_xport_info(FILE *fp) +{ + char record[81]; + int n; + int namestr_length; + + struct SAS_XPORT_header *lib_head; + + lib_head = Calloc(1, struct SAS_XPORT_header); + + if(!get_lib_header(fp, lib_head)) { + Free(lib_head); + error(_("SAS transfer file has incorrect library header")); + } + + Free(lib_head); + + n = GET_RECORD(record, fp, 80); + if(n != 80 || strncmp(MEM_HEADER, record, 75) != 0 || + strncmp(" ", record+78, 2) != 0) + error(_("file not in SAS transfer format")); + record[78] = '\0'; + sscanf(record+75, "%d", &namestr_length); + + return namestr_length; +} + +static int +init_mem_info(FILE *fp, char *name) +{ + int length, n; + char record[81]; + char *tmp; + struct SAS_XPORT_member *mem_head; + + mem_head = Calloc(1, struct SAS_XPORT_member); + if(!get_mem_header(fp, mem_head)) { + Free(mem_head); + error(_("SAS transfer file has incorrect member header")); + } + + n = GET_RECORD(record, fp, 80); + record[80] = '\0'; + if(n != 80 || strncmp(NAM_HEADER, record, 54) != 0 || + (strrchr(record+58, ' ') - record) != 79) { + Free(mem_head); + error(_("file not in SAS transfer format")); + } + record[58] = '\0'; + sscanf(record+54, "%d", &length); + + tmp = strchr(mem_head->sas_dsname, ' '); + n = tmp - mem_head->sas_dsname; + if(n > 0) { + if (n > 8) + n = 8; + strncpy(name, mem_head->sas_dsname, n); + name[n] = '\0'; + } else name[0] = '\0'; + + Free(mem_head); + + return length; +} + +static int +next_xport_info(FILE *fp, int namestr_length, int nvars, + int *headpad, + int *tailpad, + int *length, + int *ntype, + int *nlng, + int *nvar0, + SEXP nname, + SEXP nlabel, + SEXP nform, + int *nfl, + int *nfd, + SEXP niform, + int *nifl, + int *nifd, + int *npos) +{ + char *tmp; + char record[81]; + int i, n, nbytes, totwidth, nlength, restOfCard; + struct SAS_XPORT_namestr *nam_head; + + nam_head = Calloc(nvars, struct SAS_XPORT_namestr); + + for(i = 0; i < nvars; i++) { + if(!get_nam_header(fp, nam_head+i, namestr_length)) { + Free(nam_head); + error(_("SAS transfer file has incorrect library header")); + } + } + + *headpad = 480 + nvars * namestr_length; + i = *headpad % 80; + if(i > 0) { + i = 80 - i; + if (fseek(fp, i, SEEK_CUR) != 0) { + Free(nam_head); + error(_("file not in SAS transfer format")); + } + (*headpad) += i; + } + + n = GET_RECORD(record, fp, 80); + if(n != 80 || strncmp(OBS_HEADER, record, 80) != 0) { + Free(nam_head); + error(_("file not in SAS transfer format")); + } + + for(i = 0; i < nvars; i++) { + int nname_len = 0, nlabel_len = 0, nform_len = 0, niform_len=0; + char tmpname[41]; + + /* Variable storage type */ + ntype[i] = (int) ((nam_head[i].ntype == 1) ? REALSXP : STRSXP); + + /* Storage length */ + nlng[i] = nam_head[i].nlng; + + /* Variable number */ + nvar0[i] = nam_head[i].nvar0; + + /* Position of var in observation */ + npos[i] = nam_head[i].npos; + + /* Variable name */ + nname_len = 8; + while (nname_len && nam_head[i].nname[nname_len-1] == ' ') + nname_len--; + strncpy(tmpname, nam_head[i].nname, nname_len); + tmpname[nname_len] = '\0'; + SET_STRING_ELT(nname, i, mkChar(tmpname)); + + /* Variable label */ + nlabel_len = 40; + while (nlabel_len && nam_head[i].nlabel[nlabel_len-1] == ' ') + nlabel_len--; + strncpy(tmpname, nam_head[i].nlabel, nlabel_len); + tmpname[nlabel_len] = '\0'; + SET_STRING_ELT(nlabel, i, mkChar(tmpname)); + + /* Variable format name */ + nform_len = 8; + while (nform_len && nam_head[i].nform[nform_len-1] == ' ') + nform_len--; + strncpy(tmpname, nam_head[i].nform, nform_len); + tmpname[nform_len] = '\0'; + SET_STRING_ELT(nform, i, mkChar(tmpname)); + + /* Format length */ + nfl[i] = nam_head[i].nfl;; + + /* Format digits */ + nfd[i] = nam_head[i].nfd;; + + /* Variable iformat name */ + niform_len = 8; + while (niform_len && nam_head[i].niform[niform_len-1] == ' ') + niform_len--; + strncpy(tmpname, nam_head[i].niform, niform_len); + tmpname[niform_len] = '\0'; + SET_STRING_ELT(niform, i, mkChar(tmpname)); + + /* Format length */ + nifl[i] = nam_head[i].nifl;; + + /* Format digits */ + nifd[i] = nam_head[i].nifd;; + + } + + Free(nam_head); + + totwidth = 0; + for(i = 0; i < nvars; i++) + totwidth += nlng[i]; + + nbytes = 0; + nlength = 0; + tmp = Calloc(totwidth <= 80 ? 81 : (totwidth+1), char); + restOfCard = 0; + *tailpad = 0; + while(!feof(fp)) { + int allSpace = 1; + fpos_t currentPos; + +/* restOfCard = 80 - (ftell(fp) % 80); */ + if (fgetpos(fp, ¤tPos)) { + error(_("problem accessing SAS XPORT file")); + } + + n = GET_RECORD(tmp, fp, restOfCard); + if (n != restOfCard) { + allSpace = 0; + } else { + for (i = 0; i < restOfCard; i++) { + if (tmp[i] != ' ') { + allSpace = 0; + break; + } + } + } + if (allSpace) { + n = GET_RECORD(record, fp, 80); + if (n < 1) { + *tailpad = restOfCard; + break; + } + if(n == 80 && strncmp(MEM_HEADER, record, 75) == 0 && + strncmp(" ", record+78, 2) == 0) { + *tailpad = restOfCard; + record[78] = '\0'; + sscanf(record+75, "%d", &namestr_length); + break; + } + } + if (fsetpos(fp, ¤tPos)) { + error(_("problem accessing SAS XPORT file")); + } + + n = GET_RECORD(tmp, fp, totwidth); + if (n != totwidth) { + if (!feof(fp)) { + error(_("problem accessing SAS XPORT file")); + } + *tailpad = n; + break; + } + restOfCard = (restOfCard >= totwidth)? + (restOfCard - totwidth): + (80 - (totwidth - restOfCard)%80); + nlength++; + } + *length = nlength; + Free(tmp); + + return (feof(fp)?-1:namestr_length); +} + +/* + * get the list element named str. + */ + +static SEXP +getListElement(SEXP list, char *str) { + SEXP names; + SEXP elmt = (SEXP) NULL; + const char *tempChar; + int i; + + names = getAttrib(list, R_NamesSymbol); + + for (i = 0; i < LENGTH(list); i++) { + tempChar = CHAR(STRING_ELT(names, i)); + if( strcmp(tempChar,str) == 0) { + elmt = VECTOR_ELT(list, i); + break; + } + } + return elmt; +} + +#define VAR_INFO_LENGTH 16 + +const char *cVarInfoNames[VAR_INFO_LENGTH] = { + "headpad", + "type", + "width", + "index", + "position", + "name", + "label", + "format", + "flength", + "fdigits", + "iformat", + "iflength", + "ifdigits", + "sexptype", + "tailpad", + "length" +}; + +#define XPORT_VAR_HEADPAD(varinfo) VECTOR_ELT(varinfo, 0) +#define XPORT_VAR_TYPE(varinfo) VECTOR_ELT(varinfo, 1) +#define XPORT_VAR_WIDTH(varinfo) VECTOR_ELT(varinfo, 2) +#define XPORT_VAR_INDEX(varinfo) VECTOR_ELT(varinfo, 3) +#define XPORT_VAR_POSITION(varinfo) VECTOR_ELT(varinfo, 4) +#define XPORT_VAR_NAME(varinfo) VECTOR_ELT(varinfo, 5) +#define XPORT_VAR_LABEL(varinfo) VECTOR_ELT(varinfo, 6) +#define XPORT_VAR_FORM(varinfo) VECTOR_ELT(varinfo, 7) +#define XPORT_VAR_FLENGTH(varinfo) VECTOR_ELT(varinfo, 8) +#define XPORT_VAR_FDIGITS(varinfo) VECTOR_ELT(varinfo, 9) +#define XPORT_VAR_IFORM(varinfo) VECTOR_ELT(varinfo, 10) +#define XPORT_VAR_IFLENGTH(varinfo) VECTOR_ELT(varinfo, 11) +#define XPORT_VAR_IFDIGITS(varinfo) VECTOR_ELT(varinfo, 12) +#define XPORT_VAR_SEXPTYPE(varinfo) VECTOR_ELT(varinfo, 13) +#define XPORT_VAR_TAILPAD(varinfo) VECTOR_ELT(varinfo, 14) +#define XPORT_VAR_LENGTH(varinfo) VECTOR_ELT(varinfo, 15) + +#define SET_XPORT_VAR_HEADPAD(varinfo, val) SET_VECTOR_ELT(varinfo, 0, val) +#define SET_XPORT_VAR_TYPE(varinfo, val) SET_VECTOR_ELT(varinfo, 1, val) +#define SET_XPORT_VAR_WIDTH(varinfo, val) SET_VECTOR_ELT(varinfo, 2, val) +#define SET_XPORT_VAR_INDEX(varinfo, val) SET_VECTOR_ELT(varinfo, 3, val) +#define SET_XPORT_VAR_POSITION(varinfo, val) SET_VECTOR_ELT(varinfo, 4, val) +#define SET_XPORT_VAR_NAME(varinfo, val) SET_VECTOR_ELT(varinfo, 5, val) +#define SET_XPORT_VAR_LABEL(varinfo, val) SET_VECTOR_ELT(varinfo, 6, val) +#define SET_XPORT_VAR_FORM(varinfo, val) SET_VECTOR_ELT(varinfo, 7, val) +#define SET_XPORT_VAR_FLENGTH(varinfo, val) SET_VECTOR_ELT(varinfo, 8, val) +#define SET_XPORT_VAR_FDIGITS(varinfo, val) SET_VECTOR_ELT(varinfo, 9, val) +#define SET_XPORT_VAR_IFORM(varinfo, val) SET_VECTOR_ELT(varinfo, 10, val) +#define SET_XPORT_VAR_IFLENGTH(varinfo, val) SET_VECTOR_ELT(varinfo, 11, val) +#define SET_XPORT_VAR_IFDIGITS(varinfo, val) SET_VECTOR_ELT(varinfo, 12, val) +#define SET_XPORT_VAR_SEXPTYPE(varinfo, val) SET_VECTOR_ELT(varinfo, 13, val) +#define SET_XPORT_VAR_TAILPAD(varinfo, val) SET_VECTOR_ELT(varinfo, 14, val) +#define SET_XPORT_VAR_LENGTH(varinfo, val) SET_VECTOR_ELT(varinfo, 15, val) + + +SEXP +xport_info(SEXP xportFile) +{ + FILE *fp; + int i, namestrLength, memLength, ansLength; + char dsname[9]; + SEXP ans, ansNames, varInfoNames, varInfo; + SEXP char_numeric, char_character; + + PROTECT(varInfoNames = allocVector(STRSXP, VAR_INFO_LENGTH)); + for(i = 0; i < VAR_INFO_LENGTH; i++) + SET_STRING_ELT(varInfoNames, i, mkChar(cVarInfoNames[i])); + + PROTECT(char_numeric = mkChar("numeric")); + PROTECT(char_character = mkChar("character")); + + if(!isValidString(xportFile)) + error(_("first argument must be a file name")); + fp = fopen(R_ExpandFileName(CHAR(STRING_ELT(xportFile, 0))), "rb"); + if (!fp) + error(_("unable to open file")); + namestrLength = init_xport_info(fp); + + ansLength = 0; + PROTECT(ans = allocVector(VECSXP, 0)); + PROTECT(ansNames = allocVector(STRSXP, 0)); + + while(namestrLength > 0 && (memLength = init_mem_info(fp, dsname)) > 0) { + + PROTECT(varInfo = allocVector(VECSXP, VAR_INFO_LENGTH)); + setAttrib(varInfo, R_NamesSymbol, varInfoNames); + + SET_XPORT_VAR_TYPE(varInfo, allocVector(STRSXP, memLength)); + SET_XPORT_VAR_WIDTH(varInfo, allocVector(INTSXP, memLength)); + SET_XPORT_VAR_INDEX(varInfo, allocVector(INTSXP, memLength)); + SET_XPORT_VAR_POSITION(varInfo, allocVector(INTSXP, memLength)); + SET_XPORT_VAR_NAME(varInfo, allocVector(STRSXP, memLength)); + SET_XPORT_VAR_LABEL(varInfo, allocVector(STRSXP, memLength)); + SET_XPORT_VAR_FORM(varInfo, allocVector(STRSXP, memLength)); + SET_XPORT_VAR_FLENGTH(varInfo, allocVector(INTSXP, memLength)); + SET_XPORT_VAR_FDIGITS(varInfo, allocVector(INTSXP, memLength)); + SET_XPORT_VAR_IFORM(varInfo, allocVector(STRSXP, memLength)); + SET_XPORT_VAR_IFLENGTH(varInfo, allocVector(INTSXP, memLength)); + SET_XPORT_VAR_IFDIGITS(varInfo, allocVector(INTSXP, memLength)); + SET_XPORT_VAR_SEXPTYPE(varInfo, allocVector(INTSXP, memLength)); + SET_XPORT_VAR_HEADPAD(varInfo, allocVector(INTSXP, 1)); + SET_XPORT_VAR_TAILPAD(varInfo, allocVector(INTSXP, 1)); + SET_XPORT_VAR_LENGTH(varInfo, allocVector(INTSXP, 1)); + + namestrLength = + next_xport_info(fp, namestrLength, memLength, + INTEGER(XPORT_VAR_HEADPAD(varInfo)), + INTEGER(XPORT_VAR_TAILPAD(varInfo)), + INTEGER(XPORT_VAR_LENGTH(varInfo)), + INTEGER(XPORT_VAR_SEXPTYPE(varInfo)), + INTEGER(XPORT_VAR_WIDTH(varInfo)), + INTEGER(XPORT_VAR_INDEX(varInfo)), + XPORT_VAR_NAME(varInfo), + XPORT_VAR_LABEL(varInfo), + XPORT_VAR_FORM(varInfo), + INTEGER(XPORT_VAR_FLENGTH(varInfo)), + INTEGER(XPORT_VAR_FDIGITS(varInfo)), + XPORT_VAR_IFORM(varInfo), + INTEGER(XPORT_VAR_IFLENGTH(varInfo)), + INTEGER(XPORT_VAR_IFDIGITS(varInfo)), + INTEGER(XPORT_VAR_POSITION(varInfo)) + ); + + for(i = 0; i < memLength; i++) { + int *ntype = INTEGER(XPORT_VAR_SEXPTYPE(varInfo)); + SET_STRING_ELT(XPORT_VAR_TYPE(varInfo), i, + (ntype[i] == REALSXP) ? char_numeric : + char_character); + } + PROTECT(ans = lengthgets(ans, ansLength+1)); + PROTECT(ansNames = lengthgets(ansNames, ansLength+1)); +/* PROTECT(newAns = allocVector(VECSXP, ansLength+1)); */ +/* PROTECT(newAnsNames = allocVector(STRSXP, ansLength+1)); */ + +/* for(i = 0; i < ansLength; i++) { */ +/* SET_VECTOR_ELT(newAns, i, VECTOR_ELT(ans, i)); */ +/* SET_STRING_ELT(newAnsNames, i, STRING_ELT(ansNames, i)); */ +/* } */ +/* ans = newAns; */ +/* ansNames = newAnsNames; */ + + SET_STRING_ELT(ansNames, ansLength, mkChar(dsname)); + SET_VECTOR_ELT(ans, ansLength, varInfo); + ansLength++; + + UNPROTECT(5); + PROTECT(ans); + PROTECT(ansNames); + } + + setAttrib(ans, R_NamesSymbol, ansNames); + UNPROTECT(5); + fclose(fp); + return ans; +} + +SEXP +xport_read(SEXP xportFile, SEXP xportInfo) +{ + int i, j, k, n; + int nvar; + int ansLength, dataLength, totalWidth; + int dataHeadPad, dataTailPad; + int *dataWidth; + int *dataPosition; + SEXPTYPE *dataType; + char *record, *tmpchar, *c; + FILE *fp; + SEXP ans, names, data, dataInfo, dataName; + + ansLength = LENGTH(xportInfo); + PROTECT(ans = allocVector(VECSXP, ansLength)); + names = getAttrib(xportInfo, R_NamesSymbol); + setAttrib(ans, R_NamesSymbol, names); + + if(!isValidString(xportFile)) + error(_("first argument must be a file name")); + fp = fopen(R_ExpandFileName(CHAR(STRING_ELT(xportFile, 0))), "rb"); + if (!fp) + error(_("unable to open file")); + if (fseek(fp, 240, SEEK_SET) != 0) + error(_("problem reading SAS XPORT file '%s'"), + CHAR(STRING_ELT(xportFile, 0))); + + for(i = 0; i < ansLength; i++) { + dataInfo = VECTOR_ELT(xportInfo, i); + dataName = getListElement(dataInfo, "name"); + nvar = LENGTH(dataName); + dataLength = asInteger(getListElement(dataInfo, "length")); + SET_VECTOR_ELT(ans, i, data = allocVector(VECSXP, nvar)); + setAttrib(data, R_NamesSymbol, dataName); + dataType = (SEXPTYPE *) INTEGER(getListElement(dataInfo, "sexptype")); + for(j = 0; j < nvar; j++) + SET_VECTOR_ELT(data, j, allocVector(dataType[j], dataLength)); + + dataWidth = INTEGER(getListElement(dataInfo, "width")); + dataPosition = INTEGER(getListElement(dataInfo, "position")); + + totalWidth = 0; + for(j = 0; j < nvar; j++) + totalWidth += dataWidth[j]; + record = Calloc(totalWidth + 1, char); + + dataHeadPad = asInteger(getListElement(dataInfo, "headpad")); + dataTailPad = asInteger(getListElement(dataInfo, "tailpad")); + fseek(fp, dataHeadPad, SEEK_CUR); + + for(j = 0; j < dataLength; j++) { + n = GET_RECORD(record, fp, totalWidth); + if(n != totalWidth) { + error(_("problem reading SAS transport file")); + } + + for(k = nvar-1; k >= 0; k--) { + tmpchar = record + dataPosition[k]; + if(dataType[k] == REALSXP) { + REAL(VECTOR_ELT(data, k))[j] = + get_IBM_double(tmpchar, dataWidth[k]); + } else { + tmpchar[dataWidth[k]] = '\0'; + /* strip trailing blanks */ + c = tmpchar + dataWidth[k]; + while (c-- > tmpchar && *c == ' ') + *c ='\0'; + + SET_STRING_ELT(VECTOR_ELT(data, k), j, + (c < tmpchar) ? R_BlankString : + mkChar(tmpchar)); + } + } + } + + fseek(fp, dataTailPad, SEEK_CUR); + + Free(record); + } + UNPROTECT(1); + fclose(fp); + return ans; +} Added: trunk/SASxport/src/SASxport.h =================================================================== --- trunk/SASxport/src/SASxport.h (rev 0) +++ trunk/SASxport/src/SASxport.h 2007-11-01 06:01:50 UTC (rev 1202) @@ -0,0 +1,86 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, a copy is available at + * http://www.r-project.org/Licenses/ + */ + +/* + * This file is derived from code in the SAS Technical Support + * document TS-140 "The Record Layout of a Data Set in SAS Transport + * (XPORT) Format" available as + * http://ftp.sas.com/techsup/download/technote/ts140.html + */ + +#ifndef SASEXPORT_H +#define SASEXPORT_H + +#include <string.h> /* for memcpy and memset */ +#include "swap_bytes.h" + +/* double cnxptiee(double from, int fromtype, int totype); */ + + + +struct SAS_XPORT_header { + char sas_symbol[2][8]; /* should be "SAS " */ + char saslib[8]; /* should be "SASLIB " */ + char sasver[8]; + char sas_os[8]; + char sas_create[16]; + char sas_mod[16]; +}; + +struct SAS_XPORT_member { + char sas_symbol[8]; + char sas_dsname[8]; + char sasdata[8]; + char sasver[8]; + char sas_osname[8]; + char sas_create[16]; + char sas_mod[16]; +}; + +struct SAS_XPORT_namestr { + short ntype; /* VARIABLE TYPE: 1=NUMERIC, 2=CHAR */ + short nhfun; /* HASH OF NNAME (always 0) */ + short nlng; /* LENGTH OF VARIABLE IN OBSERVATION */ + short nvar0; /* VARNUM */ + char nname[8]; /* NAME OF VARIABLE */ + char nlabel[40]; /* LABEL OF VARIABLE */ + char nform[8]; /* NAME OF FORMAT */ + short nfl; /* FORMAT FIELD LENGTH OR 0 */ + short nfd; /* FORMAT NUMBER OF DECIMALS */ + short nfj; /* 0=LEFT JUSTIFICATION, 1=RIGHT JUST */ + char nfill[2]; /* (UNUSED, FOR ALIGNMENT AND FUTURE) */ + char niform[8]; /* NAME OF INPUT FORMAT */ + short nifl; /* INFORMAT LENGTH ATTRIBUTE */ + short nifd; /* INFORMAT NUMBER OF DECIMALS */ + int npos; /* POSITION OF VALUE IN OBSERVATION */ + char rest[52]; /* remaining fields are irrelevant */ +}; + +#ifdef WORDS_BIGENDIAN + +#define char_to_short(from, to) memcpy(&to, from, 2) +#define char_to_int(from, to) memcpy(&to, from, 4) +#define char_to_uint(from, to) memcpy(&to, from, 4) + +#else + +#define char_to_short(from, to) memcpy(&to, from, 2); reverse_short(to); +#define char_to_int(from, to) memcpy(&to, from, 4); reverse_int(to); +#define char_to_uint(from, to) memcpy(&to, from, 4); reverse_uint(to); + +#endif /* WORDS_BIGENDIAN */ + +#endif /* SASEXPORT_H */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |