Salutations:
I have been trying to translate a S-PLUS/ArcInfo (GIS software) application
that I wrote on a SGI (IRIX) platform to public domain R and GrassGIS on
a Linux platform. I am almost on the verge of abandoning it as I find R to
be
rather unstable, slow and frustrating.
I enclose a section of my code for R experts to examine
hoping that they'll point out that all the above three are due to my
ignorance rather than any deficiencies in R.
Thanks much to Prof. Ripley and Dr. Peter Wolf for earlier comments.
Platform:
Linux (Red Hat 6.2), 400 MHz Pentium II with 192 MB of RAM.
R version: 1.1.1 (August 15, 2000)
The foll. function (abridged from the original) is passed from a tcl/tk
applicaiton,
thru perl....
I get the foll. error (I have indicated in the comments on code where
things go wrong)....
The datafile fia802.dat has 2121 rows and 30 variables...
> R --vsize 50M --nsize 6M --no-restore
R> X11()
R>library(tcltk)
R>source("eda_lmtst.rf")
R>f.eda.lm(
dataset="fia802.dat",
pickall="ALFISOL + AVGT + BD + CEC + CLAY +
ERODFAC +INCEPTSL + JANT + JARPPET + JULT + MAXELV + MAYSEPT + MINELV +
MOLLISOL + OM + PERM + PET + PH + PPT + PROD + ROCKDEP + ROCKFRAG + SLOPE +
SPODOSOL + TAWC + TEXCOARS + TEXFINE + ULTISOL",
pickedlist="BD + CEC + CLAY + ERODFAC + INCEPTSL",
resp="iv802",
idvar="FIPS")
Reading in data
DONE Reading in data
Lm Model
Done building model
Plotting the model output
DONE-Plotting the model output
Before tcltk
Segmentation fault (core dumped)
NOTE: Sometimes the program runs (very slowly) without segmentation
fault...
File -> eda_lmtst.rf
NOTE: pickall -> is all the predictor variables in file
pickedlist -> is the subset of pickall that is used by the model
resp -> is the response variable
idvar -> is the Identifying variable
"f.eda.lm"<-
function(dataset="", pickall="", pickedlist="",
resp="",idvar="")
{
options(width=200, length=10000)
# Create the main data object
cat("Reading in data", "\n")
xd <- read.table(dataset, header=T,as.is=T)
xd <- read.table(dataset,header=T, as.is=T,
row.names=as.character(1:nrow(xd)))
cat("DONE Reading in data", "\n")
attach(xd)
xd <- xd[, -c(charmatch(paste(resp), names(xd)))]
assign(resp, get(resp))
ndf <- as.data.frame(get(resp))
names(ndf) <- "addcolx"
xd <- data.frame(xd, ndf)
names(xd)[charmatch("addcolx", names(xd))] <- resp
cat("Lm Model", "\n")
lmexp <- formula(paste(resp, " ~ ", pickedlist,sep=""))
lmexpall <- formula(paste(resp, " ~ .", sep=""))
assign("xd", xd, env=.GlobalEnv)
tmpzx <- lm(lmexp, data=xd)
# NOTE: THE STEP() TAKES FOREVER IN R
# tmpzx2 <-lm(lmexpall,data=xd)
# tmpzxall <- step(tmpzx2, trace=0)
cat ("Done building model", "\n")
# Get predictions
xp <- predict(tmpzx,se.fit=T,interval=c("confidence"))
xpnd <- xp
xpndfit.df <- as.data.frame(xpnd$fit)
xpnd.o <- data.frame(act=get(resp),fit.act=xp$fit)
mapres.opact <- data.frame(act=get(resp))
mapres.opprd <- data.frame(fit.act=xp$fit)
write.table(xpnd.o,"lmtst.dat",quote=F,row.names=F)
mapres.op <- data.frame(idvar=get(idvar), act=mapres.opact,
prd=mapres.opprd)
# Output for mapping response variable
write.table(mapres.op,"lm_mapres.txt",quote=F,row.names=F)
# Output summary statistics
xs <- summary(tmpzx)
# xsall <- summary(tmpzxall)
sink("lmsumm.txt")
cat("\n")
print(xs)
cat("\n")
print("******** STEPWISE REGRESSION *********")
print ("Not done")
sink()
# Plot model output
par(mfrow=c(2,2))
cat("Plotting the model output","\n")
# NOTE: THE LAST PLOT TAKES FOREVER
plot(tmpzx,which=1:4)
dev.print(png, "lmplot.png", width=640,height=640)
par(mfrow=c(1,1))
cat("DONE-Plotting the model output","\n")
# TCLTK INTERFACE TO CHOOSE TYPE OF PLOT AND IDENTIFY OUTLIERS
# NOTE THAT I WANT THE MAIN DIALOG (ACTUALVSPREDICTED AND RESIDUALPLOT)
# TO PERSIST BETWEEN OUTLIER IDENTIFICATION...SO THAT THE USER CAN
# KEEP PLOTTING AND IDENTIFYING TILL HE HITS THE EXIT BUTTON.
xlabel <- paste(resp, "(Actual)",sep="")
ylabel <- paste(resp, "(Predicted)", sep="")
xdnc.act <- get(resp)
xdnc.max <- max(xdnc.act)
xpnd.max <- max(xpndfit.df$fit)
xdnc.min <- min(xdnc.act)
xpnd.min <- min(xpndfit.df$fit)
if(xpnd.max > xdnc.max) max.lim <- xpnd.max
else max.lim <- xdnc.max
if(xpnd.min < xdnc.min) min.lim <- xpnd.min
else min.lim <- xdnc.min
outidap <- as.null()
outidrs <- as.null()
cat("Before tcltk","\n")
tt <- tktoplevel()
tktitle(tt) <- "Diagnostics"
label.widget <- tklabel(tt, text="Choose type of plot!")
idnfyplot <- function() {
outi <- identify(idf.x, idf.y, label=get(idvar))
if(flag == 1) {
outidap <- outi
assign("outidap", outidap, env=.GlobalEnv)
}
if(flag == 2) {
outidrs <- outi
assign("outidrs", outidrs, env=.GlobalEnv)
}
dev.print(png, paste(opfr,".png",sep=""),
width=640,height=640)
}
actvsprd <- function() {
plot(xdnc.act, xpndfit.df$fit, xlim=c(min.lim, max.lim),
ylim=c(min.lim,max.lim),xlab=xlabel, ylab=ylabel)
abline(0,1)
abline(lsfit(xdnc.act, xpndfit.df$fit), lty=2)
opfr <- opf2
idf.x <- xdnc.act
idf.y <- xpndfit.df$fit
flag <- 1
assign("flag", flag, env=.GlobalEnv)
assign("idf.x", idf.x, env=.GlobalEnv)
assign("idf.y",idf.y,env=.GlobalEnv)
assign("opfr",opfr,env=.GlobalEnv)
# define second toplevel widget
tt2a <- tktoplevel()
tktitle(tt2a) <- "IdentifyOutliers"
but.lab2 <- tklabel(tt2a, text="Left button\n to identify,\n
middle
to quit",foreground="maroon")
but.wid22 <- tkbutton(tt2a, text="Identify Outliers",
command=idnfyplot)
but.wid23 <- tkbutton(tt2a, text="DONE", command=function(){
tclvar$done2<-"T"
tkdestroy(tt2a)
})
tkpack(but.lab2, but.wid22, but.wid23)
# wait until DONE is pressed
tclvar$done2 <- "F"
tkwait.variable("done2")
}
residplot <- function() {
plot(tmpzx$fit, tmpzx$residuals)
abline(h=0)
opfr <- opf2b
idf.x <- tmpzx$fit
idf.y <- tmpzx$residuals
flag <- 2
assign("flag", flag, env=.GlobalEnv)
assign("idf.x", idf.x, env=.GlobalEnv)
assign("idf.y",idf.y,env=.GlobalEnv)
assign("opfr",opfr,env=.GlobalEnv)
# define second toplevel widget
tt2b <- tktoplevel()
tktitle(tt2b) <- "IdentifyOutliers"
but.lab2 <- tklabel(tt2b, text="Left button\n to identify,\n
middle
to quit",foreground="maroon")
but.wid22 <- tkbutton(tt2b, text="Identify Outliers",
command=idnfyplot)
but.wid23 <- tkbutton(tt2b, text="DONE", command=function(){
tclvar$done3<-"T"
tkdestroy(tt2b)
})
tkpack(but.lab2, but.wid22, but.wid23)
# wait until DONE is pressed
tclvar$done3 <- "F"
tkwait.variable("done3")
}
tclvar$choice <- 99
# THE PROGRAM STARTS DISK SWAPPING HERE...AND FINALLY DOES A
# SEGMENTATION FAULT...SOMETIMES....
rbut.wid1 <- tkradiobutton(tt, text="ActualVsPredicted",
value=0,
variable="choice", command=actvsprd)
rbut.wid2 <- tkradiobutton(tt, text="ResidualPlot", value=1,
variable="choice", command=residplot)
but.plot <- tkbutton(tt, text="Exit", command=function(){
tclvar$done <-
"T"
tkdestroy(tt)
})
tkpack(label.widget, rbut.wid1, rbut.wid2, but.plot)
# wait until Exit is pressed
tclvar$choice <- "99"
tkwait.variable("done")
# End Tcl/tk section
# Plot more diagnostics..
par(mfrow=c(1,2))
par(pty="s")
hist(tmpzx$residuals)
qqnorm(tmpzx$residuals)
qqline(tmpzx$residuals)
par(mfrow=c(1,1))
dev.print(png, "lmdiag.png", width=640,height=640)
# I HAVE A PROBLEM HERE...OUTIDAP AND OUTIDRS ARE GENERATED BY IDENTIFY()
# EVEN THOUGH I HAVE ASSIGNED IT TO .GLOBALENV, IT REPORTS NULL HERE..
# WHY??
cat(outidap, "\n")
cat(outidrs, "\n")
outid <- append(outidap, outidrs)
cat(outid, "\n")
if(idvar != "NULL") {
idvardf <- data.frame(get(idvar))
names(idvardf) <- idvar
outidf <- as.data.frame(idvardf[outid, paste(idvar)])
cat(dim(outidf), "\n")
write.table(outidf, "lmoutliers.txt", quote=F,row.names=F)
}
detach("xd")
rm(xd)
rm(xdn)
rm(xdnc)
}
***********************************************************
Mr. Anantha Prasad, Ecologist/GIS Specialist
USDA Forest Service, 359 Main Rd.
Delaware OHIO 43015 USA
Ph: 740-368-0103 Email: aprasad at fs.fed.us
Web: http://www.fs.fed.us/ne/delaware/index.html
Don't Miss Climate Change Tree Atlas at:
http://www.fs.fed.us/ne/delaware/atlas/index.html
******************************************************************
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !) To: r-help-request at
stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._