I've attached data.restore4.txt, containing the function
data.restore4(), which has the same argument list as
foreign::data.restore() and is mean to be called by the latter if the
first line of the file is "## Dump S Version 4 Dump". It can read
version 4 of the 'S data dump' format, which for which S+ uses the
file extension ".sdd". It stores the objects it reads in the
environment specified by the 'env' argument/.
I think it works pretty well; please report any issues to me.
Bill Dunlap
TIBCO Software
wdunlap tibco.com
On Mon, Apr 17, 2017 at 3:20 PM, Daniel Molinari <d.a.molinari at
gmail.com> wrote:> Hi all,
>
> I have several data files provided in mtw format (Minitab) and sdd format
> (S-Plus) and I need to read them in R.
>
> I do not have access either to Minitab or to S-Plus.
>
> How can I accomplish this task ?
>
> Thank you,
> Daniel
>
> [[alternative HTML version deleted]]
>
> ______________________________________________
> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide
http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
-------------- next part --------------
data.restore4 <- function(file, print = FALSE, verbose = FALSE, env =
.GlobalEnv)
{
# Like foreign::data.restore, but for S Version 4 data.dump format
# TODO: when creating functions within functions or expressions, make the
inner
# ones calls to function(), not already-created functions. Splus does
# not have lexical scoping so this should not affect behavior, but makes
# the new function more R-like.
# Dumping the function to a file and sourcing it back in would have the
same
# effect.
# TODO: deal with stored Splus objects that have an implicit class
# but no "class" attribute. Except for "matrix" and
"array" I don't
# think Splus creates such objects, but they exist in the 'data'
package
# and they depend on getOldClass() to map the class field in the
data.dump
# to a class vector in the object. E.g., get("wafer",
where="data") has
# class 'design' which should become attribute
class=c("design", "data.frame").
# Some 'ordered' objects are analogous - no class attribute and
class field is 'ordered' so you
# have to know that 'ordered' means
class=c("ordered","factor").
# "factor" and "ordered" may also be stored without a
named ".Label" attribute.
# (I have dealt with the factor the matrix/array objects are commonly
stored without
# named attributes - I assume that the structure has length 3 and the
attributes
# are ".Dims" and ".Dimnames".)
origFile <- file
if (!inherits(file, "connection")) {
file <- file(file, "r")
on.exit(close(file))
}
lineNo <- 0
nextLine <- function(n = 1) {
lineNo <<- lineNo + n
readLines(file, n = n)
}
Verbosely <- function(...) {
if (verbose) {
message(simpleMessage(paste("(object ", objName, ",
line ", lineNo, ") ", paste(..., collapse = " ", sep =
""), sep = ""), sys.call(-1)))
}
}
Stop <- function(...) {
stop(simpleError(paste(paste(..., collapse = " ", sep =
""), sep = "",
" (object ", objName, ", file ",
deparse(summary(file)$description), ", line ", lineNo, ")"),
sys.call(-1)))
}
Recurse <- function(length) {
# Never call 'blah <- .data.restore4()' directly as it may
return a missing
# argument object, which will break '<-' but not lapply.
lapply(seq_len(length), function(i) { .data.restore4() })
}
constructMissingArgument <- function() formals(function(x)NULL)$x
txt <- nextLine()
objName <- "<none yet>"
if (length(txt) != 1) {
Stop("File is empty")
}
if (txt != "## Dump S Version 4 Dump ##") {
Stop("File does not start with '## Dump S Version 4 Dump',
so this is not a SV4 data.dump file")
}
.data.restore4 <- function()
{
class <- nextLine()
mode <- nextLine()
length <- as.numeric(tmp <- nextLine())
if (is.na(length) || length%%1 != 0 || length < 0) {
Stop("Expected nonnegative integer 'length' at line
", lineNo, " but got ", deparse(tmp))
}
if (mode == "character") {
ret <- nextLine(length)
# convert \\n to newline, \\t to tab, etc. by using parse()
vapply(ret, function(string)parse(text=paste0("\"",
string, "\""))[[1]], FUN.VALUE="", USE.NAMES=FALSE)
} else if (mode == "logical") {
txt <- nextLine(length)
lglVector <- rep(NA, length)
lglVector[txt != "N"] <- as.logical(as.integer(txt[txt
!= "N"]))
lglVector
} else if (mode %in% c("integer", "single",
"numeric")) {
txt <- nextLine(length)
txt[txt == "M"] <- "NaN"
txt[txt == "I"] <- "Inf"
txt[txt == "J"] <- "-Inf"
if (mode == "single") {
mode <- "numeric"
}
atomicVector <- rep(as(NA, mode), length)
atomicVector[txt != "N"] <- as(txt[txt !=
"N"], mode)
atomicVector
} else if (mode == "complex") {
txt <- nextLine(length)
txt <- gsub("M", "NaN", txt)
txt <- gsub("\\<I\\>", "Inf", txt)
txt <- gsub("\\<J\\>", "-Inf", txt)
atomicVector <- rep(as(NA, mode), length)
atomicVector[txt != "N"] <- as(txt[txt !=
"N"], mode)
atomicVector
} else if (mode == "list") {
vectors <- Recurse(length)
vectors
} else if (mode == "NULL") {
NULL
} else if (mode == "structure") {
vectors <- Recurse(length)
if (class == ".named_I" || class == "named") {
if (length != 2) {
Stop("expected length of '.named_I' component
is 2, but got ", length)
} else if (!is.character(vectors[[2]])) {
Stop("expected second component of '.named_I'
to be character, but got ", deparse(mode(vectors[[2]])))
}
vector <- vectors[[1]]
names <- vectors[[2]]
if (is.call(vector) && identical(vector[[1]],
as.name("for"))) {
if (length(names) != 3 || !all(names[2:3] == ""))
{
Stop("expected only first entry of 'names'
for 'for' to be non-blank, but got ", deparse(names))
}
vector[[2]] <- as.name(names[1])
vector
} else if (is.call(vector) && identical(vector[[1]],
as.name(".Call"))) {
if (length(vector) - 1 != length(names)) {
Stop("expected lengths of names and .Call to be the
same, but got ", length(vector) - 1, " and ", length(names))
}
vector[[2]] <- names[1]
names[1] <- ""
if (any(names != "")) {
names(vector) <- c("", names)
}
vector
} else if (is.call(vector) && identical(vector[[1]],
as.name(".Internal"))) {
if (length(vector) - 1 != length(names)) {
Stop("expected lengths of names and .Internal to be
the same, but got ", length(vector) - 1, " and ", length(names))
}
Verbosely("Splus call to '.Internal' will not
work in R (or TERR)\n")
vector[[3]] <- names[2]
vector
} else if (is.call(vector) && identical(vector[[1]],
as.name("function"))) {
if (length(vector) - 1 != length(names)) {
Stop("expected lengths of argument names and
function to be the same, but got ", length(vector) - 1, " and ",
length(names))
}
func <- function()NULL
formals(func) <- as.pairlist(
structure(as.list(vector)[-c(1,length(vector))], names=names[-length(names)]) )
body(func) <- vector[[length(vector)]]
environment(func) <- env
func
} else if (is.call(vector) && identical(vector[[1]],
as.name("return"))) {
# In Splus, names are added to return(x,y) when return has
more than one argument
Verbosely("Multi-argument returns will fail in R (or
TERR): changing return(...) to return(list(...))\n")
if (length(vector)-1 != length(names)) {
Stop("expected number of returned items length of
their name to be the same, but got ", length(vector)-1, " and ",
length(names))
}
if (any(names != "")) {
names(vector) <- c("", names)
}
vector[[1]] <- as.name("list")
call("return", vector)
} else {
# finally, attributes
if (length(vector) != length(names)) {
Stop("expected lengths of '.named_I'
components to be the same, but got ", length(vector), " and ",
length(names))
}
names(vector) <- names
if (identical(names[1], ".Data")) { # a hack -
really want to know if vector had mode "structure" or not
if (".Tsp" %in% names) {
# ancient Splus objects have dates in .Tsp rounded
to 6 significant digits
i <- which(".Tsp" == names)
if (length(i) != 1) {
Stop("Multiple '.Tsp' attributes on
object")
}
tsp <- vector[[i]]
if (length(tsp) != 3 || !is.numeric(tsp)) {
Stop("'.Tsp' attribute should
contain 3 numbers, but got ", deparse(tsp))
}
n <- round( (tsp[2] - tsp[1]) * tsp[3] + 1)
vector[[i]] <- c(tsp[1], tsp[1] + (n-1) / tsp[3],
tsp[3])
if ( abs(tsp[2] - vector[[i]][2])/abs(tsp[2]) >
1e-8 ) {
Verbosely("Fixed up rounded '.Tsp'
from ", deparse(tsp), " to ", deparse(vector[[i]]))
}
}
do.call(structure, vector, quote = TRUE)
} else {
vector
}
}
} else if (class %in% c("matrix", "array")) {
if (length != 3) {
Stop("Expected 'matrix' or 'array'
structures to have length 3, but got ", length)
}
array(vectors[[1]], dim=vectors[[2]], dimnames=vectors[[3]])
} else {
vectors # TODO: this is ok within a .Named_I/structure object,
but otherwise means we omitted a known class (like 'factor' or
'ordered')
}
} else if (mode == "name") {
if (length != 1) {
Stop("expected length of 'name' objects is 1, but
got", length)
}
name <- as.name(nextLine())
# NULL is the NULL object itself in R, but a name bound to it in
Splus
if (identical(name, as.name("NULL"))) {
NULL
} else {
name
}
} else if (mode == "call") {
callList <- Recurse(length)
as.call(callList)
} else if (mode == "expression") {
exprList <- Recurse(length)
as.expression(exprList)
} else if (mode %in% c("<-", "=",
"<<-", "if", "{", "while",
"repeat", "break", "next", "return")) {
if (mode == "<<-") {
Verbosely("The '<<-' operator acts
differently in R (or TERR) and Splus")
}
as.call(c(list(as.name(mode)), Recurse(length)))
} else if (mode == "for") {
# Splus: list(loopVar = NULL, quote(sequenceCall), quote(bodyCall))
# R: list(as.name("for"), as.name("loopVar"),
quote(sequenceCall), quote(bodyCall))
# In Splus, the loopVar is a name for the list, which gets added
later by .named_I
as.call(c(list(as.name(mode)), Recurse(length)))
} else if (mode == "function") {
# As with "for", this will be further processed by
.named_I (if it has any arguments)
if (length > 1) {
as.call(c(list(as.name(mode)), Recurse(length)))
} else {
func <- function()NULL
# body(func) <- .data.restore4()
body(func) <- Recurse(length)[[1]]
environment(func) <- env
func
}
} else if (mode == ".Call") {
# again, must finish processing via .named_I (the C function name
will be in names(call))
as.call(c(list(as.name(mode)), Recurse(length)))
} else if (mode == "internal") {
# again, must finish processing via .named_I (the C function name
will be in names(call))
as.call(c(list(as.name(".Internal")), Recurse(length)))
} else if (mode == "missing") {
constructMissingArgument()
} else if (mode == "call with ...") {
if (length != 1) {
Stop("Expected length of 'call with ...' item to be
1, but it was ", length)
}
# call <- .data.restore4()
call <- Recurse(length)[[1]]
if (!is.call(call)) {
Stop("Expected child to 'call with ...' to be a
call, but it is a ", mode(call), "\n")
}
call
} else if (mode == "comment expression") {
if (length != 2) {
Stop("Expected length of 'comment expression' is 2,
but it was ", length)
}
commExprList <- Recurse(length)
if (!is.character(commExprList[[1]])) {
Stop("Expected first component of 'comment
expression' to be character, but it was ", mode(commExprList[[1]]))
}
commExprList[[2]]
} else if (mode == "(") {
callExpr <- Recurse(length)
as.call(callExpr)
} else {
# What else did I miss?
Stop("Unimplemented mode: ", deparse(mode))
}
}
while (length(objName <- nextLine()) == 1) {
if (print) {
cat(deparse(objName), ":\n", sep="")
}
Verbosely("Starting to read\n")
obj <- .data.restore4()
Verbosely(" class=", deparse(class(obj)), ",
size=", object.size(obj), "\n")
assign(objName, obj, envir=env)
if (print) {
cat(" ", class(obj), "\n", sep="")
}
}
origFile
}