Hello, everyone! I picked up R several months ago and have adopted it
as my choice for statistical programming. Coming from a Java
background, I can honestly say that R is not only free, it is better
tha S-plus: the lexical scope in R makes it very simple to simulate
Java's object model. For this, I encourage everyone to read the artcle:
Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical
Computing", Journal of Computational and Graphical Statistics, 9,
491-508.
I have coded a simulation program which runs at 1/5 the speed of
minimum usability on a Pentium 700 PC (windows me). I have spent quite
some time trying to optimize the program. So my question is: on which
platform does R run the fastest? I would be willing to invest on a
faster platform. Thanks in advance.
My program follows in case someone can help me improve it.
Thanks.
Jason
# Liao and Lipsitz (2001)
# A REML type estimate of variance components in generalized linear
mixed models
# This program runs on R. It does not run on S-plus.
rm(list=ls(all=TRUE))
###small and common functions##################################
glmm.sample.y <- function(x, z, b, D.u)
{
pp <- matrix(0, nrow = m, ncol = n);
D.u.half <- t( chol(D.u) );
zero <- numeric(n.random);
for(i in 1:m)
{
obj <- rmulti.norm(zero, D.u.half);
pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran );
}
pp <- exp(pp)/( 1+exp(pp) );
y <- runif(m*n);
dim(y) <- c(m, n);
ifelse(y < pp, 1, 0); #returning vector
}
################
rmulti.norm <- function(mean, var.half)
{
ran <- rnorm( length(mean) );
log.prob <- -sum(ran*ran)/2;
ran <- mean + as.vector( var.half %*% ran );
return(ran, log.prob);
}
#################
my.outer <- function(x) #this is much faster than R's outer()
function
{
dim(x) <- c(length(x), 1);
x %*% t(x);
}
quadratic.form <- function(A, x) { as.vector(x %*% A %*% x) }
#######################################################################
REML.b.D.u <- function(b, D.u, y, x, z)
{
TT <- matrix(0, n.random, n.random);
for(i in 1:30)
{
obj <- sample.u(b, D.u, x, y, z);
b <- b - solve(obj$Hessian, obj$score);
D.u <- obj$uu;
yy <- glmm.sample.y(x, z, b, D.u); #this is the sampling stage
obj <- mle.b(b, D.u, yy, x, z); #given D.u, solves for the
mle of b
TT <- TT*(1-1/i) + obj$uu.diff/i;
D.u <- D.u + TT;
print(i); print(date());
print("inside REML");
print(D.u);print("TT"); print(TT);
}
return(b, D.u);
}
mle.b <- function(b, D.u, y, x, z) #b here is the initial value
{
for(i in 1:30)
{
obj <- sample.u(b, D.u, x, y, z);
b <- b - solve(obj$Hessian, obj$score);
if(i==1) uu.first <- obj$uu;
}
uu.diff <- uu.first - obj$uu;
return(b, uu=obj$uu, uu.diff);
}
mle.b.D.u <- function(b, D.u, y, x, z)
{
for(i in 1:30)
{
obj <- sample.u(b, D.u, x, y, z);
b <- b - solve(obj$Hessian, obj$score);
D.u <- obj$uu;
}
return(b, D.u);
}
sample.u <- function(b, D.u, x, y, z)
{
D.u.inv <- solve(D.u);
uu <- matrix(0, n.random, n.random);
score <- numeric(n.fixed);
Hessian <- matrix(0, n.fixed, n.fixed);
for(i in 1:m)
{
obj <- sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b),
x[i, ,], y[i,], z[i, ,]);
uu <- uu + obj$uu;
score <- score + obj$score;
Hessian <- Hessian + obj$Hessian;
}
uu <- uu/m;
return(uu, score, Hessian);
}
sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y,
z for one subject only
{
func <- one.log.like(D.u.inv, ada.part, y, z); #function to be
maximized
obj <- optim( numeric(n.random), func, control=list(fnscale=-1) )
obj <- optim(obj$par, func, method = c("BFGS"),
control=list(fnscale=-1), hessian = T )
mean <- obj$par;
var.half <- solve(-obj$hessian)*1.2;
var.half <- t( chol(var.half) );
n.simu <- 1000 #sample size of importance sampling
sum.weight <- 0;
uu <- matrix(0, n.random, n.random);
pi <- numeric(n);
product <- numeric(n);
func <- one.score.Hessian(D.u.inv, ada.part, x, y, z);
for(i in 1:n.simu)
{
obj1 <- rmulti.norm(mean, var.half);
obj2 <- func(obj1$ran);
weight <- exp(obj2$log.likelihood - obj1$log.prob);
uu <- uu + my.outer(obj1$ran) * weight;
pi <- pi + obj2$pi * weight;
product <- product + obj2$pi*(1-obj2$pi) * weight;
sum.weight <- sum.weight + weight;
}
uu <- uu/sum.weight;
pi <- pi/sum.weight;
product <- product/sum.weight;
score <- as.vector( (y - pi) %*% x );
Hessian <- -t(x) %*% diag(product) %*% x;
return(uu, score, Hessian);
}
one.log.like <- function(D.u.inv, ada.part, y, z)
{
function(u)
{
pp <- exp( ada.part + as.vector( z %*% u ) );
pp <- pp/(1+pp);
-quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 -
y)*log(1-pp) );
}
}
one.score.Hessian <- function(D.u.inv, ada.part, x, y, z)
{
function(u)
{
pi <- exp( ada.part + as.vector( z %*% u ) );
pi <- pi/(1 + pi);
log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum(
y*log(pi) + (1-y)*log(1-pi) );
return(pi, log.likelihood);
}
}
main <- function()
{
b <- c(.5 , 1, -1, -.5);
if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) );
D.u <- c(.5, 0, 0, .25);
D.u <- matrix(D.u, nrow=2);
x <- array(0, c(m, n, n.fixed) );
x[, , 1] <- 1;
for(j in 1:n) x[, j, 2] <- j-5;
x[1:trunc(m/2), , 3] <- 0;
x[trunc(m/2+1):m, , 3] <- 1;
x[, , 4] <- x[, , 2]*x[, , 3];
if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) );
z <- x[, , 1:2];
for(i in 1:50)
{
y <- glmm.sample.y(x, z, b, D.u);
obj <- mle.b.D.u(b, D.u, y, x, z); print("mle");
print(obj$D.u);
write(obj$D.u, file = "simu1.dat", append=T);
obj <- REML.b.D.u(b, D.u, y, x, z); print("REML");
print(obj$D.u);
write(obj$D.u, file = "simu2.dat", append=T);
}
}
###################################################
#global variable m, n, n.fixed, n.random
n <- 10;
m <- 25;
n.fixed <- 4;
n.random <- 2;
main();
print(date());
====Jason G. Liao
Department of Biometry and Epidemiology
Medical University of South Carolina
135 Rutledge Ave., STE 1148, Charleston, SC 29425
phone (843) 876-1114, fax (843) 876-1126
http://www.geocities.com/jg_liao/index.html
__________________________________________________
Do You Yahoo!?
Get email at your own domain with Yahoo! Mail.
http://personal.mail.yahoo.com/
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !) To: r-help-request at
stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
Hello, everyone! I picked up R several months ago and have adopted it
as my choice for statistical programming. Coming from a Java
background, I can honestly say that R is not only free, it is better
tha S-plus: the lexical scope in R makes it very simple to simulate
Java's object model. For this, I encourage everyone to read the artcle:
Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical
Computing", Journal of Computational and Graphical Statistics, 9,
491-508.
I have coded a simulation program which runs at 1/5 the speed of
minimum usability on a Pentium 700 PC (windows me). I have spent quite
some time trying to optimize the program. So my question is: on which
platform does R run the fastest? I would be willing to invest on a
faster platform. Thanks in advance.
My program follows in case someone can help me improve it.
Thanks.
Jason
# Liao and Lipsitz (2001)
# A REML type estimate of variance components in generalized linear
mixed models
# This program runs on R. It does not run on S-plus.
rm(list=ls(all=TRUE))
###small and common functions##################################
glmm.sample.y <- function(x, z, b, D.u)
{
pp <- matrix(0, nrow = m, ncol = n);
D.u.half <- t( chol(D.u) );
zero <- numeric(n.random);
for(i in 1:m)
{
obj <- rmulti.norm(zero, D.u.half);
pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran );
}
pp <- exp(pp)/( 1+exp(pp) );
y <- runif(m*n);
dim(y) <- c(m, n);
ifelse(y < pp, 1, 0); #returning vector
}
################
rmulti.norm <- function(mean, var.half)
{
ran <- rnorm( length(mean) );
log.prob <- -sum(ran*ran)/2;
ran <- mean + as.vector( var.half %*% ran );
return(ran, log.prob);
}
#################
my.outer <- function(x) #this is much faster than R's outer()
function
{
dim(x) <- c(length(x), 1);
x %*% t(x);
}
quadratic.form <- function(A, x) { as.vector(x %*% A %*% x) }
#######################################################################
REML.b.D.u <- function(b, D.u, y, x, z)
{
TT <- matrix(0, n.random, n.random);
for(i in 1:30)
{
obj <- sample.u(b, D.u, x, y, z);
b <- b - solve(obj$Hessian, obj$score);
D.u <- obj$uu;
yy <- glmm.sample.y(x, z, b, D.u); #this is the sampling stage
obj <- mle.b(b, D.u, yy, x, z); #given D.u, solves for the
mle of b
TT <- TT*(1-1/i) + obj$uu.diff/i;
D.u <- D.u + TT;
print(i); print(date());
print("inside REML");
print(D.u);print("TT"); print(TT);
}
return(b, D.u);
}
mle.b <- function(b, D.u, y, x, z) #b here is the initial value
{
for(i in 1:30)
{
obj <- sample.u(b, D.u, x, y, z);
b <- b - solve(obj$Hessian, obj$score);
if(i==1) uu.first <- obj$uu;
}
uu.diff <- uu.first - obj$uu;
return(b, uu=obj$uu, uu.diff);
}
mle.b.D.u <- function(b, D.u, y, x, z)
{
for(i in 1:30)
{
obj <- sample.u(b, D.u, x, y, z);
b <- b - solve(obj$Hessian, obj$score);
D.u <- obj$uu;
}
return(b, D.u);
}
sample.u <- function(b, D.u, x, y, z)
{
D.u.inv <- solve(D.u);
uu <- matrix(0, n.random, n.random);
score <- numeric(n.fixed);
Hessian <- matrix(0, n.fixed, n.fixed);
for(i in 1:m)
{
obj <- sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b),
x[i, ,], y[i,], z[i, ,]);
uu <- uu + obj$uu;
score <- score + obj$score;
Hessian <- Hessian + obj$Hessian;
}
uu <- uu/m;
return(uu, score, Hessian);
}
sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y,
z for one subject only
{
func <- one.log.like(D.u.inv, ada.part, y, z); #function to be
maximized
obj <- optim( numeric(n.random), func, control=list(fnscale=-1) )
obj <- optim(obj$par, func, method = c("BFGS"),
control=list(fnscale=-1), hessian = T )
mean <- obj$par;
var.half <- solve(-obj$hessian)*1.2;
var.half <- t( chol(var.half) );
n.simu <- 1000 #sample size of importance sampling
sum.weight <- 0;
uu <- matrix(0, n.random, n.random);
pi <- numeric(n);
product <- numeric(n);
func <- one.score.Hessian(D.u.inv, ada.part, x, y, z);
for(i in 1:n.simu)
{
obj1 <- rmulti.norm(mean, var.half);
obj2 <- func(obj1$ran);
weight <- exp(obj2$log.likelihood - obj1$log.prob);
uu <- uu + my.outer(obj1$ran) * weight;
pi <- pi + obj2$pi * weight;
product <- product + obj2$pi*(1-obj2$pi) * weight;
sum.weight <- sum.weight + weight;
}
uu <- uu/sum.weight;
pi <- pi/sum.weight;
product <- product/sum.weight;
score <- as.vector( (y - pi) %*% x );
Hessian <- -t(x) %*% diag(product) %*% x;
return(uu, score, Hessian);
}
one.log.like <- function(D.u.inv, ada.part, y, z)
{
function(u)
{
pp <- exp( ada.part + as.vector( z %*% u ) );
pp <- pp/(1+pp);
-quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 -
y)*log(1-pp) );
}
}
one.score.Hessian <- function(D.u.inv, ada.part, x, y, z)
{
function(u)
{
pi <- exp( ada.part + as.vector( z %*% u ) );
pi <- pi/(1 + pi);
log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum(
y*log(pi) + (1-y)*log(1-pi) );
return(pi, log.likelihood);
}
}
main <- function()
{
b <- c(.5 , 1, -1, -.5);
if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) );
D.u <- c(.5, 0, 0, .25);
D.u <- matrix(D.u, nrow=2);
x <- array(0, c(m, n, n.fixed) );
x[, , 1] <- 1;
for(j in 1:n) x[, j, 2] <- j-5;
x[1:trunc(m/2), , 3] <- 0;
x[trunc(m/2+1):m, , 3] <- 1;
x[, , 4] <- x[, , 2]*x[, , 3];
if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) );
z <- x[, , 1:2];
for(i in 1:50)
{
y <- glmm.sample.y(x, z, b, D.u);
obj <- mle.b.D.u(b, D.u, y, x, z); print("mle");
print(obj$D.u);
write(obj$D.u, file = "simu1.dat", append=T);
obj <- REML.b.D.u(b, D.u, y, x, z); print("REML");
print(obj$D.u);
write(obj$D.u, file = "simu2.dat", append=T);
}
}
###################################################
#global variable m, n, n.fixed, n.random
n <- 10;
m <- 25;
n.fixed <- 4;
n.random <- 2;
main();
print(date());
====Jason G. Liao
Department of Biometry and Epidemiology
Medical University of South Carolina
135 Rutledge Ave., STE 1148, Charleston, SC 29425
phone (843) 876-1114, fax (843) 876-1126
http://www.geocities.com/jg_liao/index.html
__________________________________________________
Do You Yahoo!?
Get email at your own domain with Yahoo! Mail.
http://personal.mail.yahoo.com/
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !) To: r-help-request at
stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
Hello, everyone! I picked up R several months ago and have adopted it
as my choice for statistical programming. Coming from a Java
background, I can honestly say that R is not only free, it is better
tha S-plus: the lexical scope in R makes it very simple to simulate
Java's object model. For this, I encourage everyone to read the artcle:
Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical
Computing", Journal of Computational and Graphical Statistics, 9,
491-508.
I have coded a simulation program which runs at 1/5 the speed of
minimum usability on a Pentium 700 PC (windows me). I have spent quite
some time trying to optimize the program. So my question is: on which
platform does R run the fastest? I would be willing to invest on a
faster platform. Thanks in advance.
My program follows in case someone can help me improve it.
Thanks.
Jason
# Liao and Lipsitz (2001)
# A REML type estimate of variance components in generalized linear
mixed models
# This program runs on R. It does not run on S-plus.
rm(list=ls(all=TRUE))
###small and common functions##################################
glmm.sample.y <- function(x, z, b, D.u)
{
pp <- matrix(0, nrow = m, ncol = n);
D.u.half <- t( chol(D.u) );
zero <- numeric(n.random);
for(i in 1:m)
{
obj <- rmulti.norm(zero, D.u.half);
pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran );
}
pp <- exp(pp)/( 1+exp(pp) );
y <- runif(m*n);
dim(y) <- c(m, n);
ifelse(y < pp, 1, 0); #returning vector
}
################
rmulti.norm <- function(mean, var.half)
{
ran <- rnorm( length(mean) );
log.prob <- -sum(ran*ran)/2;
ran <- mean + as.vector( var.half %*% ran );
return(ran, log.prob);
}
#################
my.outer <- function(x) #this is much faster than R's outer()
function
{
dim(x) <- c(length(x), 1);
x %*% t(x);
}
quadratic.form <- function(A, x) { as.vector(x %*% A %*% x) }
#######################################################################
REML.b.D.u <- function(b, D.u, y, x, z)
{
TT <- matrix(0, n.random, n.random);
for(i in 1:30)
{
obj <- sample.u(b, D.u, x, y, z);
b <- b - solve(obj$Hessian, obj$score);
D.u <- obj$uu;
yy <- glmm.sample.y(x, z, b, D.u); #this is the sampling stage
obj <- mle.b(b, D.u, yy, x, z); #given D.u, solves for the
mle of b
TT <- TT*(1-1/i) + obj$uu.diff/i;
D.u <- D.u + TT;
print(i); print(date());
print("inside REML");
print(D.u);print("TT"); print(TT);
}
return(b, D.u);
}
mle.b <- function(b, D.u, y, x, z) #b here is the initial value
{
for(i in 1:30)
{
obj <- sample.u(b, D.u, x, y, z);
b <- b - solve(obj$Hessian, obj$score);
if(i==1) uu.first <- obj$uu;
}
uu.diff <- uu.first - obj$uu;
return(b, uu=obj$uu, uu.diff);
}
mle.b.D.u <- function(b, D.u, y, x, z)
{
for(i in 1:30)
{
obj <- sample.u(b, D.u, x, y, z);
b <- b - solve(obj$Hessian, obj$score);
D.u <- obj$uu;
}
return(b, D.u);
}
sample.u <- function(b, D.u, x, y, z)
{
D.u.inv <- solve(D.u);
uu <- matrix(0, n.random, n.random);
score <- numeric(n.fixed);
Hessian <- matrix(0, n.fixed, n.fixed);
for(i in 1:m)
{
obj <- sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b),
x[i, ,], y[i,], z[i, ,]);
uu <- uu + obj$uu;
score <- score + obj$score;
Hessian <- Hessian + obj$Hessian;
}
uu <- uu/m;
return(uu, score, Hessian);
}
sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y,
z for one subject only
{
func <- one.log.like(D.u.inv, ada.part, y, z); #function to be
maximized
obj <- optim( numeric(n.random), func, control=list(fnscale=-1) )
obj <- optim(obj$par, func, method = c("BFGS"),
control=list(fnscale=-1), hessian = T )
mean <- obj$par;
var.half <- solve(-obj$hessian)*1.2;
var.half <- t( chol(var.half) );
n.simu <- 1000 #sample size of importance sampling
sum.weight <- 0;
uu <- matrix(0, n.random, n.random);
pi <- numeric(n);
product <- numeric(n);
func <- one.score.Hessian(D.u.inv, ada.part, x, y, z);
for(i in 1:n.simu)
{
obj1 <- rmulti.norm(mean, var.half);
obj2 <- func(obj1$ran);
weight <- exp(obj2$log.likelihood - obj1$log.prob);
uu <- uu + my.outer(obj1$ran) * weight;
pi <- pi + obj2$pi * weight;
product <- product + obj2$pi*(1-obj2$pi) * weight;
sum.weight <- sum.weight + weight;
}
uu <- uu/sum.weight;
pi <- pi/sum.weight;
product <- product/sum.weight;
score <- as.vector( (y - pi) %*% x );
Hessian <- -t(x) %*% diag(product) %*% x;
return(uu, score, Hessian);
}
one.log.like <- function(D.u.inv, ada.part, y, z)
{
function(u)
{
pp <- exp( ada.part + as.vector( z %*% u ) );
pp <- pp/(1+pp);
-quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 -
y)*log(1-pp) );
}
}
one.score.Hessian <- function(D.u.inv, ada.part, x, y, z)
{
function(u)
{
pi <- exp( ada.part + as.vector( z %*% u ) );
pi <- pi/(1 + pi);
log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum(
y*log(pi) + (1-y)*log(1-pi) );
return(pi, log.likelihood);
}
}
main <- function()
{
b <- c(.5 , 1, -1, -.5);
if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) );
D.u <- c(.5, 0, 0, .25);
D.u <- matrix(D.u, nrow=2);
x <- array(0, c(m, n, n.fixed) );
x[, , 1] <- 1;
for(j in 1:n) x[, j, 2] <- j-5;
x[1:trunc(m/2), , 3] <- 0;
x[trunc(m/2+1):m, , 3] <- 1;
x[, , 4] <- x[, , 2]*x[, , 3];
if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) );
z <- x[, , 1:2];
for(i in 1:50)
{
y <- glmm.sample.y(x, z, b, D.u);
obj <- mle.b.D.u(b, D.u, y, x, z); print("mle");
print(obj$D.u);
write(obj$D.u, file = "simu1.dat", append=T);
obj <- REML.b.D.u(b, D.u, y, x, z); print("REML");
print(obj$D.u);
write(obj$D.u, file = "simu2.dat", append=T);
}
}
###################################################
#global variable m, n, n.fixed, n.random
n <- 10;
m <- 25;
n.fixed <- 4;
n.random <- 2;
main();
print(date());
====Jason G. Liao
Department of Biometry and Epidemiology
Medical University of South Carolina
135 Rutledge Ave., STE 1148, Charleston, SC 29425
phone (843) 876-1114, fax (843) 876-1126
http://www.geocities.com/jg_liao/index.html
__________________________________________________
Do You Yahoo!?
Get email at your own domain with Yahoo! Mail.
http://personal.mail.yahoo.com/
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !) To: r-help-request at
stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
Hello, everyone! I picked up R several months ago and have adopted it
as my choice for statistical programming. Coming from a Java
background, I can honestly say that R is not only free, it is better
tha S-plus: the lexical scope in R makes it very simple to simulate
Java's object model. For this, I encourage everyone to read the artcle:
Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical
Computing", Journal of Computational and Graphical Statistics, 9,
491-508.
I have coded a simulation program which runs at 1/5 the speed of
minimum usability on a Pentium 700 PC (windows me). I have spent quite
some time trying to optimize the program. So my question is: on which
platform does R run the fastest? I would be willing to invest on a
faster platform. Thanks in advance.
My program follows in case someone can help me improve it.
Thanks.
Jason
# Liao and Lipsitz (2001)
# A REML type estimate of variance components in generalized linear
mixed models
# This program runs on R. It does not run on S-plus.
rm(list=ls(all=TRUE))
###small and common functions##################################
glmm.sample.y <- function(x, z, b, D.u)
{
pp <- matrix(0, nrow = m, ncol = n);
D.u.half <- t( chol(D.u) );
zero <- numeric(n.random);
for(i in 1:m)
{
obj <- rmulti.norm(zero, D.u.half);
pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran );
}
pp <- exp(pp)/( 1+exp(pp) );
y <- runif(m*n);
dim(y) <- c(m, n);
ifelse(y < pp, 1, 0); #returning vector
}
################
rmulti.norm <- function(mean, var.half)
{
ran <- rnorm( length(mean) );
log.prob <- -sum(ran*ran)/2;
ran <- mean + as.vector( var.half %*% ran );
return(ran, log.prob);
}
#################
my.outer <- function(x) #this is much faster than R's outer()
function
{
dim(x) <- c(length(x), 1);
x %*% t(x);
}
quadratic.form <- function(A, x) { as.vector(x %*% A %*% x) }
#######################################################################
REML.b.D.u <- function(b, D.u, y, x, z)
{
TT <- matrix(0, n.random, n.random);
for(i in 1:30)
{
obj <- sample.u(b, D.u, x, y, z);
b <- b - solve(obj$Hessian, obj$score);
D.u <- obj$uu;
yy <- glmm.sample.y(x, z, b, D.u); #this is the sampling stage
obj <- mle.b(b, D.u, yy, x, z); #given D.u, solves for the
mle of b
TT <- TT*(1-1/i) + obj$uu.diff/i;
D.u <- D.u + TT;
print(i); print(date());
print("inside REML");
print(D.u);print("TT"); print(TT);
}
return(b, D.u);
}
mle.b <- function(b, D.u, y, x, z) #b here is the initial value
{
for(i in 1:30)
{
obj <- sample.u(b, D.u, x, y, z);
b <- b - solve(obj$Hessian, obj$score);
if(i==1) uu.first <- obj$uu;
}
uu.diff <- uu.first - obj$uu;
return(b, uu=obj$uu, uu.diff);
}
mle.b.D.u <- function(b, D.u, y, x, z)
{
for(i in 1:30)
{
obj <- sample.u(b, D.u, x, y, z);
b <- b - solve(obj$Hessian, obj$score);
D.u <- obj$uu;
}
return(b, D.u);
}
sample.u <- function(b, D.u, x, y, z)
{
D.u.inv <- solve(D.u);
uu <- matrix(0, n.random, n.random);
score <- numeric(n.fixed);
Hessian <- matrix(0, n.fixed, n.fixed);
for(i in 1:m)
{
obj <- sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b),
x[i, ,], y[i,], z[i, ,]);
uu <- uu + obj$uu;
score <- score + obj$score;
Hessian <- Hessian + obj$Hessian;
}
uu <- uu/m;
return(uu, score, Hessian);
}
sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y,
z for one subject only
{
func <- one.log.like(D.u.inv, ada.part, y, z); #function to be
maximized
obj <- optim( numeric(n.random), func, control=list(fnscale=-1) )
obj <- optim(obj$par, func, method = c("BFGS"),
control=list(fnscale=-1), hessian = T )
mean <- obj$par;
var.half <- solve(-obj$hessian)*1.2;
var.half <- t( chol(var.half) );
n.simu <- 1000 #sample size of importance sampling
sum.weight <- 0;
uu <- matrix(0, n.random, n.random);
pi <- numeric(n);
product <- numeric(n);
func <- one.score.Hessian(D.u.inv, ada.part, x, y, z);
for(i in 1:n.simu)
{
obj1 <- rmulti.norm(mean, var.half);
obj2 <- func(obj1$ran);
weight <- exp(obj2$log.likelihood - obj1$log.prob);
uu <- uu + my.outer(obj1$ran) * weight;
pi <- pi + obj2$pi * weight;
product <- product + obj2$pi*(1-obj2$pi) * weight;
sum.weight <- sum.weight + weight;
}
uu <- uu/sum.weight;
pi <- pi/sum.weight;
product <- product/sum.weight;
score <- as.vector( (y - pi) %*% x );
Hessian <- -t(x) %*% diag(product) %*% x;
return(uu, score, Hessian);
}
one.log.like <- function(D.u.inv, ada.part, y, z)
{
function(u)
{
pp <- exp( ada.part + as.vector( z %*% u ) );
pp <- pp/(1+pp);
-quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 -
y)*log(1-pp) );
}
}
one.score.Hessian <- function(D.u.inv, ada.part, x, y, z)
{
function(u)
{
pi <- exp( ada.part + as.vector( z %*% u ) );
pi <- pi/(1 + pi);
log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum(
y*log(pi) + (1-y)*log(1-pi) );
return(pi, log.likelihood);
}
}
main <- function()
{
b <- c(.5 , 1, -1, -.5);
if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) );
D.u <- c(.5, 0, 0, .25);
D.u <- matrix(D.u, nrow=2);
x <- array(0, c(m, n, n.fixed) );
x[, , 1] <- 1;
for(j in 1:n) x[, j, 2] <- j-5;
x[1:trunc(m/2), , 3] <- 0;
x[trunc(m/2+1):m, , 3] <- 1;
x[, , 4] <- x[, , 2]*x[, , 3];
if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) );
z <- x[, , 1:2];
for(i in 1:50)
{
y <- glmm.sample.y(x, z, b, D.u);
obj <- mle.b.D.u(b, D.u, y, x, z); print("mle");
print(obj$D.u);
write(obj$D.u, file = "simu1.dat", append=T);
obj <- REML.b.D.u(b, D.u, y, x, z); print("REML");
print(obj$D.u);
write(obj$D.u, file = "simu2.dat", append=T);
}
}
###################################################
#global variable m, n, n.fixed, n.random
n <- 10;
m <- 25;
n.fixed <- 4;
n.random <- 2;
main();
print(date());
====Jason G. Liao
Department of Biometry and Epidemiology
Medical University of South Carolina
135 Rutledge Ave., STE 1148, Charleston, SC 29425
phone (843) 876-1114, fax (843) 876-1126
http://www.geocities.com/jg_liao/index.html
__________________________________________________
Do You Yahoo!?
Get email at your own domain with Yahoo! Mail.
http://personal.mail.yahoo.com/
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !) To: r-help-request at
stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
I haven't looked into this with R in particular, but with some textual analysis software I developed in perl, the same job ran about 3.5x faster on the same hardware running linux over running Windows NT. The hardware was much slower than yours (Pentium 150, 96M RAM). The linux variant was Debian potato (2.2r2). YMMV. Andy Perrin ------------------------------------------------------------------ Andrew J Perrin - Ph.D. Candidate, UC Berkeley, Dept. of Sociology Chapel Hill NC 27514 USA | aperrin at socrates.berkeley.edu On Sun, 8 Apr 2001, Jason Liao wrote:> Hello, everyone! I picked up R several months ago and have adopted it > as my choice for statistical programming. Coming from a Java > background, I can honestly say that R is not only free, it is better > tha S-plus: the lexical scope in R makes it very simple to simulate > Java's object model. For this, I encourage everyone to read the artcle: > > Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical > Computing", Journal of Computational and Graphical Statistics, 9, > 491-508. > > I have coded a simulation program which runs at 1/5 the speed of > minimum usability on a Pentium 700 PC (windows me). I have spent quite > some time trying to optimize the program. So my question is: on which > platform does R run the fastest? I would be willing to invest on a > faster platform. Thanks in advance. > > My program follows in case someone can help me improve it. > > Thanks. > > Jason > > # Liao and Lipsitz (2001) > # A REML type estimate of variance components in generalized linear > mixed models > # This program runs on R. It does not run on S-plus. > > rm(list=ls(all=TRUE)) > > ###small and common functions################################## > glmm.sample.y <- function(x, z, b, D.u) > { > pp <- matrix(0, nrow = m, ncol = n); > D.u.half <- t( chol(D.u) ); > zero <- numeric(n.random); > > for(i in 1:m) > { > obj <- rmulti.norm(zero, D.u.half); > pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran ); > } > pp <- exp(pp)/( 1+exp(pp) ); > > y <- runif(m*n); > dim(y) <- c(m, n); > ifelse(y < pp, 1, 0); #returning vector > } > ################ > rmulti.norm <- function(mean, var.half) > { > ran <- rnorm( length(mean) ); > log.prob <- -sum(ran*ran)/2; > > ran <- mean + as.vector( var.half %*% ran ); > return(ran, log.prob); > } > ################# > my.outer <- function(x) #this is much faster than R's outer() > function > { > dim(x) <- c(length(x), 1); > x %*% t(x); > } > > quadratic.form <- function(A, x) { as.vector(x %*% A %*% x) } > > > ####################################################################### > > REML.b.D.u <- function(b, D.u, y, x, z) > { > TT <- matrix(0, n.random, n.random); > for(i in 1:30) > { > obj <- sample.u(b, D.u, x, y, z); > b <- b - solve(obj$Hessian, obj$score); > D.u <- obj$uu; > > yy <- glmm.sample.y(x, z, b, D.u); #this is the sampling stage > obj <- mle.b(b, D.u, yy, x, z); #given D.u, solves for the > mle of b > > TT <- TT*(1-1/i) + obj$uu.diff/i; > D.u <- D.u + TT; > print(i); print(date()); > print("inside REML"); > print(D.u);print("TT"); print(TT); > } > > return(b, D.u); > } > mle.b <- function(b, D.u, y, x, z) #b here is the initial value > { > for(i in 1:30) > { > obj <- sample.u(b, D.u, x, y, z); > b <- b - solve(obj$Hessian, obj$score); > if(i==1) uu.first <- obj$uu; > } > > uu.diff <- uu.first - obj$uu; > return(b, uu=obj$uu, uu.diff); > } > > mle.b.D.u <- function(b, D.u, y, x, z) > { > for(i in 1:30) > { > obj <- sample.u(b, D.u, x, y, z); > b <- b - solve(obj$Hessian, obj$score); > D.u <- obj$uu; > } > return(b, D.u); > } > > sample.u <- function(b, D.u, x, y, z) > { > D.u.inv <- solve(D.u); > > uu <- matrix(0, n.random, n.random); > score <- numeric(n.fixed); > Hessian <- matrix(0, n.fixed, n.fixed); > > for(i in 1:m) > { > obj <- sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b), > x[i, ,], y[i,], z[i, ,]); > uu <- uu + obj$uu; > score <- score + obj$score; > Hessian <- Hessian + obj$Hessian; > } > > uu <- uu/m; > return(uu, score, Hessian); > } > > sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y, > z for one subject only > { > func <- one.log.like(D.u.inv, ada.part, y, z); #function to be > maximized > obj <- optim( numeric(n.random), func, control=list(fnscale=-1) ) > > obj <- optim(obj$par, func, method = c("BFGS"), > control=list(fnscale=-1), hessian = T ) > > mean <- obj$par; > var.half <- solve(-obj$hessian)*1.2; > var.half <- t( chol(var.half) ); > > n.simu <- 1000 #sample size of importance sampling > > sum.weight <- 0; > uu <- matrix(0, n.random, n.random); > pi <- numeric(n); > product <- numeric(n); > > func <- one.score.Hessian(D.u.inv, ada.part, x, y, z); > for(i in 1:n.simu) > { > obj1 <- rmulti.norm(mean, var.half); > obj2 <- func(obj1$ran); > weight <- exp(obj2$log.likelihood - obj1$log.prob); > > uu <- uu + my.outer(obj1$ran) * weight; > pi <- pi + obj2$pi * weight; > product <- product + obj2$pi*(1-obj2$pi) * weight; > > sum.weight <- sum.weight + weight; > } > > uu <- uu/sum.weight; > pi <- pi/sum.weight; > product <- product/sum.weight; > > score <- as.vector( (y - pi) %*% x ); > Hessian <- -t(x) %*% diag(product) %*% x; > > return(uu, score, Hessian); > } > > one.log.like <- function(D.u.inv, ada.part, y, z) > { > function(u) > { > pp <- exp( ada.part + as.vector( z %*% u ) ); > pp <- pp/(1+pp); > -quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 - > y)*log(1-pp) ); > } > } > > one.score.Hessian <- function(D.u.inv, ada.part, x, y, z) > { > function(u) > { > pi <- exp( ada.part + as.vector( z %*% u ) ); > pi <- pi/(1 + pi); > log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum( > y*log(pi) + (1-y)*log(1-pi) ); > > return(pi, log.likelihood); > } > } > > main <- function() > { > b <- c(.5 , 1, -1, -.5); > if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) ); > D.u <- c(.5, 0, 0, .25); > D.u <- matrix(D.u, nrow=2); > > x <- array(0, c(m, n, n.fixed) ); > x[, , 1] <- 1; > > for(j in 1:n) x[, j, 2] <- j-5; > x[1:trunc(m/2), , 3] <- 0; > x[trunc(m/2+1):m, , 3] <- 1; > > x[, , 4] <- x[, , 2]*x[, , 3]; > if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) ); > > z <- x[, , 1:2]; > > for(i in 1:50) > { > y <- glmm.sample.y(x, z, b, D.u); > > obj <- mle.b.D.u(b, D.u, y, x, z); print("mle"); print(obj$D.u); > > write(obj$D.u, file = "simu1.dat", append=T); > > obj <- REML.b.D.u(b, D.u, y, x, z); print("REML"); > print(obj$D.u); > write(obj$D.u, file = "simu2.dat", append=T); > } > } > ################################################### > #global variable m, n, n.fixed, n.random > > n <- 10; > m <- 25; > > n.fixed <- 4; > n.random <- 2; > > main(); > print(date()); > > > > > ====> Jason G. Liao > Department of Biometry and Epidemiology > Medical University of South Carolina > 135 Rutledge Ave., STE 1148, Charleston, SC 29425 > phone (843) 876-1114, fax (843) 876-1126 > > http://www.geocities.com/jg_liao/index.html > > __________________________________________________ > Do You Yahoo!? > Get email at your own domain with Yahoo! Mail. > http://personal.mail.yahoo.com/ > -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.- > r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html > Send "info", "help", or "[un]subscribe" > (in the "body", not the subject !) To: r-help-request at stat.math.ethz.ch > _._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._ >-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.- r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html Send "info", "help", or "[un]subscribe" (in the "body", not the subject !) To: r-help-request at stat.math.ethz.ch _._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._