Hello, everyone! I picked up R several months ago and have adopted it as my choice for statistical programming. Coming from a Java background, I can honestly say that R is not only free, it is better tha S-plus: the lexical scope in R makes it very simple to simulate Java's object model. For this, I encourage everyone to read the artcle: Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical Computing", Journal of Computational and Graphical Statistics, 9, 491-508. I have coded a simulation program which runs at 1/5 the speed of minimum usability on a Pentium 700 PC (windows me). I have spent quite some time trying to optimize the program. So my question is: on which platform does R run the fastest? I would be willing to invest on a faster platform. Thanks in advance. My program follows in case someone can help me improve it. Thanks. Jason # Liao and Lipsitz (2001) # A REML type estimate of variance components in generalized linear mixed models # This program runs on R. It does not run on S-plus. rm(list=ls(all=TRUE)) ###small and common functions################################## glmm.sample.y <- function(x, z, b, D.u) { pp <- matrix(0, nrow = m, ncol = n); D.u.half <- t( chol(D.u) ); zero <- numeric(n.random); for(i in 1:m) { obj <- rmulti.norm(zero, D.u.half); pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran ); } pp <- exp(pp)/( 1+exp(pp) ); y <- runif(m*n); dim(y) <- c(m, n); ifelse(y < pp, 1, 0); #returning vector } ################ rmulti.norm <- function(mean, var.half) { ran <- rnorm( length(mean) ); log.prob <- -sum(ran*ran)/2; ran <- mean + as.vector( var.half %*% ran ); return(ran, log.prob); } ################# my.outer <- function(x) #this is much faster than R's outer() function { dim(x) <- c(length(x), 1); x %*% t(x); } quadratic.form <- function(A, x) { as.vector(x %*% A %*% x) } ####################################################################### REML.b.D.u <- function(b, D.u, y, x, z) { TT <- matrix(0, n.random, n.random); for(i in 1:30) { obj <- sample.u(b, D.u, x, y, z); b <- b - solve(obj$Hessian, obj$score); D.u <- obj$uu; yy <- glmm.sample.y(x, z, b, D.u); #this is the sampling stage obj <- mle.b(b, D.u, yy, x, z); #given D.u, solves for the mle of b TT <- TT*(1-1/i) + obj$uu.diff/i; D.u <- D.u + TT; print(i); print(date()); print("inside REML"); print(D.u);print("TT"); print(TT); } return(b, D.u); } mle.b <- function(b, D.u, y, x, z) #b here is the initial value { for(i in 1:30) { obj <- sample.u(b, D.u, x, y, z); b <- b - solve(obj$Hessian, obj$score); if(i==1) uu.first <- obj$uu; } uu.diff <- uu.first - obj$uu; return(b, uu=obj$uu, uu.diff); } mle.b.D.u <- function(b, D.u, y, x, z) { for(i in 1:30) { obj <- sample.u(b, D.u, x, y, z); b <- b - solve(obj$Hessian, obj$score); D.u <- obj$uu; } return(b, D.u); } sample.u <- function(b, D.u, x, y, z) { D.u.inv <- solve(D.u); uu <- matrix(0, n.random, n.random); score <- numeric(n.fixed); Hessian <- matrix(0, n.fixed, n.fixed); for(i in 1:m) { obj <- sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b), x[i, ,], y[i,], z[i, ,]); uu <- uu + obj$uu; score <- score + obj$score; Hessian <- Hessian + obj$Hessian; } uu <- uu/m; return(uu, score, Hessian); } sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y, z for one subject only { func <- one.log.like(D.u.inv, ada.part, y, z); #function to be maximized obj <- optim( numeric(n.random), func, control=list(fnscale=-1) ) obj <- optim(obj$par, func, method = c("BFGS"), control=list(fnscale=-1), hessian = T ) mean <- obj$par; var.half <- solve(-obj$hessian)*1.2; var.half <- t( chol(var.half) ); n.simu <- 1000 #sample size of importance sampling sum.weight <- 0; uu <- matrix(0, n.random, n.random); pi <- numeric(n); product <- numeric(n); func <- one.score.Hessian(D.u.inv, ada.part, x, y, z); for(i in 1:n.simu) { obj1 <- rmulti.norm(mean, var.half); obj2 <- func(obj1$ran); weight <- exp(obj2$log.likelihood - obj1$log.prob); uu <- uu + my.outer(obj1$ran) * weight; pi <- pi + obj2$pi * weight; product <- product + obj2$pi*(1-obj2$pi) * weight; sum.weight <- sum.weight + weight; } uu <- uu/sum.weight; pi <- pi/sum.weight; product <- product/sum.weight; score <- as.vector( (y - pi) %*% x ); Hessian <- -t(x) %*% diag(product) %*% x; return(uu, score, Hessian); } one.log.like <- function(D.u.inv, ada.part, y, z) { function(u) { pp <- exp( ada.part + as.vector( z %*% u ) ); pp <- pp/(1+pp); -quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 - y)*log(1-pp) ); } } one.score.Hessian <- function(D.u.inv, ada.part, x, y, z) { function(u) { pi <- exp( ada.part + as.vector( z %*% u ) ); pi <- pi/(1 + pi); log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum( y*log(pi) + (1-y)*log(1-pi) ); return(pi, log.likelihood); } } main <- function() { b <- c(.5 , 1, -1, -.5); if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) ); D.u <- c(.5, 0, 0, .25); D.u <- matrix(D.u, nrow=2); x <- array(0, c(m, n, n.fixed) ); x[, , 1] <- 1; for(j in 1:n) x[, j, 2] <- j-5; x[1:trunc(m/2), , 3] <- 0; x[trunc(m/2+1):m, , 3] <- 1; x[, , 4] <- x[, , 2]*x[, , 3]; if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) ); z <- x[, , 1:2]; for(i in 1:50) { y <- glmm.sample.y(x, z, b, D.u); obj <- mle.b.D.u(b, D.u, y, x, z); print("mle"); print(obj$D.u); write(obj$D.u, file = "simu1.dat", append=T); obj <- REML.b.D.u(b, D.u, y, x, z); print("REML"); print(obj$D.u); write(obj$D.u, file = "simu2.dat", append=T); } } ################################################### #global variable m, n, n.fixed, n.random n <- 10; m <- 25; n.fixed <- 4; n.random <- 2; main(); print(date()); ====Jason G. Liao Department of Biometry and Epidemiology Medical University of South Carolina 135 Rutledge Ave., STE 1148, Charleston, SC 29425 phone (843) 876-1114, fax (843) 876-1126 geocities.com/jg_liao/index.html __________________________________________________ Do You Yahoo!? Get email at your own domain with Yahoo! Mail. personal.mail.yahoo.com -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.- r-help mailing list -- Read ci.tuwien.ac.at/~hornik/R/R-FAQ.html Send "info", "help", or "[un]subscribe" (in the "body", not the subject !) To: r-help-request at stat.math.ethz.ch _._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
Hello, everyone! I picked up R several months ago and have adopted it as my choice for statistical programming. Coming from a Java background, I can honestly say that R is not only free, it is better tha S-plus: the lexical scope in R makes it very simple to simulate Java's object model. For this, I encourage everyone to read the artcle: Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical Computing", Journal of Computational and Graphical Statistics, 9, 491-508. I have coded a simulation program which runs at 1/5 the speed of minimum usability on a Pentium 700 PC (windows me). I have spent quite some time trying to optimize the program. So my question is: on which platform does R run the fastest? I would be willing to invest on a faster platform. Thanks in advance. My program follows in case someone can help me improve it. Thanks. Jason # Liao and Lipsitz (2001) # A REML type estimate of variance components in generalized linear mixed models # This program runs on R. It does not run on S-plus. rm(list=ls(all=TRUE)) ###small and common functions################################## glmm.sample.y <- function(x, z, b, D.u) { pp <- matrix(0, nrow = m, ncol = n); D.u.half <- t( chol(D.u) ); zero <- numeric(n.random); for(i in 1:m) { obj <- rmulti.norm(zero, D.u.half); pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran ); } pp <- exp(pp)/( 1+exp(pp) ); y <- runif(m*n); dim(y) <- c(m, n); ifelse(y < pp, 1, 0); #returning vector } ################ rmulti.norm <- function(mean, var.half) { ran <- rnorm( length(mean) ); log.prob <- -sum(ran*ran)/2; ran <- mean + as.vector( var.half %*% ran ); return(ran, log.prob); } ################# my.outer <- function(x) #this is much faster than R's outer() function { dim(x) <- c(length(x), 1); x %*% t(x); } quadratic.form <- function(A, x) { as.vector(x %*% A %*% x) } ####################################################################### REML.b.D.u <- function(b, D.u, y, x, z) { TT <- matrix(0, n.random, n.random); for(i in 1:30) { obj <- sample.u(b, D.u, x, y, z); b <- b - solve(obj$Hessian, obj$score); D.u <- obj$uu; yy <- glmm.sample.y(x, z, b, D.u); #this is the sampling stage obj <- mle.b(b, D.u, yy, x, z); #given D.u, solves for the mle of b TT <- TT*(1-1/i) + obj$uu.diff/i; D.u <- D.u + TT; print(i); print(date()); print("inside REML"); print(D.u);print("TT"); print(TT); } return(b, D.u); } mle.b <- function(b, D.u, y, x, z) #b here is the initial value { for(i in 1:30) { obj <- sample.u(b, D.u, x, y, z); b <- b - solve(obj$Hessian, obj$score); if(i==1) uu.first <- obj$uu; } uu.diff <- uu.first - obj$uu; return(b, uu=obj$uu, uu.diff); } mle.b.D.u <- function(b, D.u, y, x, z) { for(i in 1:30) { obj <- sample.u(b, D.u, x, y, z); b <- b - solve(obj$Hessian, obj$score); D.u <- obj$uu; } return(b, D.u); } sample.u <- function(b, D.u, x, y, z) { D.u.inv <- solve(D.u); uu <- matrix(0, n.random, n.random); score <- numeric(n.fixed); Hessian <- matrix(0, n.fixed, n.fixed); for(i in 1:m) { obj <- sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b), x[i, ,], y[i,], z[i, ,]); uu <- uu + obj$uu; score <- score + obj$score; Hessian <- Hessian + obj$Hessian; } uu <- uu/m; return(uu, score, Hessian); } sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y, z for one subject only { func <- one.log.like(D.u.inv, ada.part, y, z); #function to be maximized obj <- optim( numeric(n.random), func, control=list(fnscale=-1) ) obj <- optim(obj$par, func, method = c("BFGS"), control=list(fnscale=-1), hessian = T ) mean <- obj$par; var.half <- solve(-obj$hessian)*1.2; var.half <- t( chol(var.half) ); n.simu <- 1000 #sample size of importance sampling sum.weight <- 0; uu <- matrix(0, n.random, n.random); pi <- numeric(n); product <- numeric(n); func <- one.score.Hessian(D.u.inv, ada.part, x, y, z); for(i in 1:n.simu) { obj1 <- rmulti.norm(mean, var.half); obj2 <- func(obj1$ran); weight <- exp(obj2$log.likelihood - obj1$log.prob); uu <- uu + my.outer(obj1$ran) * weight; pi <- pi + obj2$pi * weight; product <- product + obj2$pi*(1-obj2$pi) * weight; sum.weight <- sum.weight + weight; } uu <- uu/sum.weight; pi <- pi/sum.weight; product <- product/sum.weight; score <- as.vector( (y - pi) %*% x ); Hessian <- -t(x) %*% diag(product) %*% x; return(uu, score, Hessian); } one.log.like <- function(D.u.inv, ada.part, y, z) { function(u) { pp <- exp( ada.part + as.vector( z %*% u ) ); pp <- pp/(1+pp); -quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 - y)*log(1-pp) ); } } one.score.Hessian <- function(D.u.inv, ada.part, x, y, z) { function(u) { pi <- exp( ada.part + as.vector( z %*% u ) ); pi <- pi/(1 + pi); log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum( y*log(pi) + (1-y)*log(1-pi) ); return(pi, log.likelihood); } } main <- function() { b <- c(.5 , 1, -1, -.5); if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) ); D.u <- c(.5, 0, 0, .25); D.u <- matrix(D.u, nrow=2); x <- array(0, c(m, n, n.fixed) ); x[, , 1] <- 1; for(j in 1:n) x[, j, 2] <- j-5; x[1:trunc(m/2), , 3] <- 0; x[trunc(m/2+1):m, , 3] <- 1; x[, , 4] <- x[, , 2]*x[, , 3]; if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) ); z <- x[, , 1:2]; for(i in 1:50) { y <- glmm.sample.y(x, z, b, D.u); obj <- mle.b.D.u(b, D.u, y, x, z); print("mle"); print(obj$D.u); write(obj$D.u, file = "simu1.dat", append=T); obj <- REML.b.D.u(b, D.u, y, x, z); print("REML"); print(obj$D.u); write(obj$D.u, file = "simu2.dat", append=T); } } ################################################### #global variable m, n, n.fixed, n.random n <- 10; m <- 25; n.fixed <- 4; n.random <- 2; main(); print(date()); ====Jason G. Liao Department of Biometry and Epidemiology Medical University of South Carolina 135 Rutledge Ave., STE 1148, Charleston, SC 29425 phone (843) 876-1114, fax (843) 876-1126 geocities.com/jg_liao/index.html __________________________________________________ Do You Yahoo!? Get email at your own domain with Yahoo! Mail. personal.mail.yahoo.com -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.- r-help mailing list -- Read ci.tuwien.ac.at/~hornik/R/R-FAQ.html Send "info", "help", or "[un]subscribe" (in the "body", not the subject !) To: r-help-request at stat.math.ethz.ch _._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
Hello, everyone! I picked up R several months ago and have adopted it as my choice for statistical programming. Coming from a Java background, I can honestly say that R is not only free, it is better tha S-plus: the lexical scope in R makes it very simple to simulate Java's object model. For this, I encourage everyone to read the artcle: Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical Computing", Journal of Computational and Graphical Statistics, 9, 491-508. I have coded a simulation program which runs at 1/5 the speed of minimum usability on a Pentium 700 PC (windows me). I have spent quite some time trying to optimize the program. So my question is: on which platform does R run the fastest? I would be willing to invest on a faster platform. Thanks in advance. My program follows in case someone can help me improve it. Thanks. Jason # Liao and Lipsitz (2001) # A REML type estimate of variance components in generalized linear mixed models # This program runs on R. It does not run on S-plus. rm(list=ls(all=TRUE)) ###small and common functions################################## glmm.sample.y <- function(x, z, b, D.u) { pp <- matrix(0, nrow = m, ncol = n); D.u.half <- t( chol(D.u) ); zero <- numeric(n.random); for(i in 1:m) { obj <- rmulti.norm(zero, D.u.half); pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran ); } pp <- exp(pp)/( 1+exp(pp) ); y <- runif(m*n); dim(y) <- c(m, n); ifelse(y < pp, 1, 0); #returning vector } ################ rmulti.norm <- function(mean, var.half) { ran <- rnorm( length(mean) ); log.prob <- -sum(ran*ran)/2; ran <- mean + as.vector( var.half %*% ran ); return(ran, log.prob); } ################# my.outer <- function(x) #this is much faster than R's outer() function { dim(x) <- c(length(x), 1); x %*% t(x); } quadratic.form <- function(A, x) { as.vector(x %*% A %*% x) } ####################################################################### REML.b.D.u <- function(b, D.u, y, x, z) { TT <- matrix(0, n.random, n.random); for(i in 1:30) { obj <- sample.u(b, D.u, x, y, z); b <- b - solve(obj$Hessian, obj$score); D.u <- obj$uu; yy <- glmm.sample.y(x, z, b, D.u); #this is the sampling stage obj <- mle.b(b, D.u, yy, x, z); #given D.u, solves for the mle of b TT <- TT*(1-1/i) + obj$uu.diff/i; D.u <- D.u + TT; print(i); print(date()); print("inside REML"); print(D.u);print("TT"); print(TT); } return(b, D.u); } mle.b <- function(b, D.u, y, x, z) #b here is the initial value { for(i in 1:30) { obj <- sample.u(b, D.u, x, y, z); b <- b - solve(obj$Hessian, obj$score); if(i==1) uu.first <- obj$uu; } uu.diff <- uu.first - obj$uu; return(b, uu=obj$uu, uu.diff); } mle.b.D.u <- function(b, D.u, y, x, z) { for(i in 1:30) { obj <- sample.u(b, D.u, x, y, z); b <- b - solve(obj$Hessian, obj$score); D.u <- obj$uu; } return(b, D.u); } sample.u <- function(b, D.u, x, y, z) { D.u.inv <- solve(D.u); uu <- matrix(0, n.random, n.random); score <- numeric(n.fixed); Hessian <- matrix(0, n.fixed, n.fixed); for(i in 1:m) { obj <- sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b), x[i, ,], y[i,], z[i, ,]); uu <- uu + obj$uu; score <- score + obj$score; Hessian <- Hessian + obj$Hessian; } uu <- uu/m; return(uu, score, Hessian); } sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y, z for one subject only { func <- one.log.like(D.u.inv, ada.part, y, z); #function to be maximized obj <- optim( numeric(n.random), func, control=list(fnscale=-1) ) obj <- optim(obj$par, func, method = c("BFGS"), control=list(fnscale=-1), hessian = T ) mean <- obj$par; var.half <- solve(-obj$hessian)*1.2; var.half <- t( chol(var.half) ); n.simu <- 1000 #sample size of importance sampling sum.weight <- 0; uu <- matrix(0, n.random, n.random); pi <- numeric(n); product <- numeric(n); func <- one.score.Hessian(D.u.inv, ada.part, x, y, z); for(i in 1:n.simu) { obj1 <- rmulti.norm(mean, var.half); obj2 <- func(obj1$ran); weight <- exp(obj2$log.likelihood - obj1$log.prob); uu <- uu + my.outer(obj1$ran) * weight; pi <- pi + obj2$pi * weight; product <- product + obj2$pi*(1-obj2$pi) * weight; sum.weight <- sum.weight + weight; } uu <- uu/sum.weight; pi <- pi/sum.weight; product <- product/sum.weight; score <- as.vector( (y - pi) %*% x ); Hessian <- -t(x) %*% diag(product) %*% x; return(uu, score, Hessian); } one.log.like <- function(D.u.inv, ada.part, y, z) { function(u) { pp <- exp( ada.part + as.vector( z %*% u ) ); pp <- pp/(1+pp); -quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 - y)*log(1-pp) ); } } one.score.Hessian <- function(D.u.inv, ada.part, x, y, z) { function(u) { pi <- exp( ada.part + as.vector( z %*% u ) ); pi <- pi/(1 + pi); log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum( y*log(pi) + (1-y)*log(1-pi) ); return(pi, log.likelihood); } } main <- function() { b <- c(.5 , 1, -1, -.5); if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) ); D.u <- c(.5, 0, 0, .25); D.u <- matrix(D.u, nrow=2); x <- array(0, c(m, n, n.fixed) ); x[, , 1] <- 1; for(j in 1:n) x[, j, 2] <- j-5; x[1:trunc(m/2), , 3] <- 0; x[trunc(m/2+1):m, , 3] <- 1; x[, , 4] <- x[, , 2]*x[, , 3]; if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) ); z <- x[, , 1:2]; for(i in 1:50) { y <- glmm.sample.y(x, z, b, D.u); obj <- mle.b.D.u(b, D.u, y, x, z); print("mle"); print(obj$D.u); write(obj$D.u, file = "simu1.dat", append=T); obj <- REML.b.D.u(b, D.u, y, x, z); print("REML"); print(obj$D.u); write(obj$D.u, file = "simu2.dat", append=T); } } ################################################### #global variable m, n, n.fixed, n.random n <- 10; m <- 25; n.fixed <- 4; n.random <- 2; main(); print(date()); ====Jason G. Liao Department of Biometry and Epidemiology Medical University of South Carolina 135 Rutledge Ave., STE 1148, Charleston, SC 29425 phone (843) 876-1114, fax (843) 876-1126 geocities.com/jg_liao/index.html __________________________________________________ Do You Yahoo!? Get email at your own domain with Yahoo! Mail. personal.mail.yahoo.com -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.- r-help mailing list -- Read ci.tuwien.ac.at/~hornik/R/R-FAQ.html Send "info", "help", or "[un]subscribe" (in the "body", not the subject !) To: r-help-request at stat.math.ethz.ch _._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
Hello, everyone! I picked up R several months ago and have adopted it as my choice for statistical programming. Coming from a Java background, I can honestly say that R is not only free, it is better tha S-plus: the lexical scope in R makes it very simple to simulate Java's object model. For this, I encourage everyone to read the artcle: Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical Computing", Journal of Computational and Graphical Statistics, 9, 491-508. I have coded a simulation program which runs at 1/5 the speed of minimum usability on a Pentium 700 PC (windows me). I have spent quite some time trying to optimize the program. So my question is: on which platform does R run the fastest? I would be willing to invest on a faster platform. Thanks in advance. My program follows in case someone can help me improve it. Thanks. Jason # Liao and Lipsitz (2001) # A REML type estimate of variance components in generalized linear mixed models # This program runs on R. It does not run on S-plus. rm(list=ls(all=TRUE)) ###small and common functions################################## glmm.sample.y <- function(x, z, b, D.u) { pp <- matrix(0, nrow = m, ncol = n); D.u.half <- t( chol(D.u) ); zero <- numeric(n.random); for(i in 1:m) { obj <- rmulti.norm(zero, D.u.half); pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran ); } pp <- exp(pp)/( 1+exp(pp) ); y <- runif(m*n); dim(y) <- c(m, n); ifelse(y < pp, 1, 0); #returning vector } ################ rmulti.norm <- function(mean, var.half) { ran <- rnorm( length(mean) ); log.prob <- -sum(ran*ran)/2; ran <- mean + as.vector( var.half %*% ran ); return(ran, log.prob); } ################# my.outer <- function(x) #this is much faster than R's outer() function { dim(x) <- c(length(x), 1); x %*% t(x); } quadratic.form <- function(A, x) { as.vector(x %*% A %*% x) } ####################################################################### REML.b.D.u <- function(b, D.u, y, x, z) { TT <- matrix(0, n.random, n.random); for(i in 1:30) { obj <- sample.u(b, D.u, x, y, z); b <- b - solve(obj$Hessian, obj$score); D.u <- obj$uu; yy <- glmm.sample.y(x, z, b, D.u); #this is the sampling stage obj <- mle.b(b, D.u, yy, x, z); #given D.u, solves for the mle of b TT <- TT*(1-1/i) + obj$uu.diff/i; D.u <- D.u + TT; print(i); print(date()); print("inside REML"); print(D.u);print("TT"); print(TT); } return(b, D.u); } mle.b <- function(b, D.u, y, x, z) #b here is the initial value { for(i in 1:30) { obj <- sample.u(b, D.u, x, y, z); b <- b - solve(obj$Hessian, obj$score); if(i==1) uu.first <- obj$uu; } uu.diff <- uu.first - obj$uu; return(b, uu=obj$uu, uu.diff); } mle.b.D.u <- function(b, D.u, y, x, z) { for(i in 1:30) { obj <- sample.u(b, D.u, x, y, z); b <- b - solve(obj$Hessian, obj$score); D.u <- obj$uu; } return(b, D.u); } sample.u <- function(b, D.u, x, y, z) { D.u.inv <- solve(D.u); uu <- matrix(0, n.random, n.random); score <- numeric(n.fixed); Hessian <- matrix(0, n.fixed, n.fixed); for(i in 1:m) { obj <- sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b), x[i, ,], y[i,], z[i, ,]); uu <- uu + obj$uu; score <- score + obj$score; Hessian <- Hessian + obj$Hessian; } uu <- uu/m; return(uu, score, Hessian); } sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y, z for one subject only { func <- one.log.like(D.u.inv, ada.part, y, z); #function to be maximized obj <- optim( numeric(n.random), func, control=list(fnscale=-1) ) obj <- optim(obj$par, func, method = c("BFGS"), control=list(fnscale=-1), hessian = T ) mean <- obj$par; var.half <- solve(-obj$hessian)*1.2; var.half <- t( chol(var.half) ); n.simu <- 1000 #sample size of importance sampling sum.weight <- 0; uu <- matrix(0, n.random, n.random); pi <- numeric(n); product <- numeric(n); func <- one.score.Hessian(D.u.inv, ada.part, x, y, z); for(i in 1:n.simu) { obj1 <- rmulti.norm(mean, var.half); obj2 <- func(obj1$ran); weight <- exp(obj2$log.likelihood - obj1$log.prob); uu <- uu + my.outer(obj1$ran) * weight; pi <- pi + obj2$pi * weight; product <- product + obj2$pi*(1-obj2$pi) * weight; sum.weight <- sum.weight + weight; } uu <- uu/sum.weight; pi <- pi/sum.weight; product <- product/sum.weight; score <- as.vector( (y - pi) %*% x ); Hessian <- -t(x) %*% diag(product) %*% x; return(uu, score, Hessian); } one.log.like <- function(D.u.inv, ada.part, y, z) { function(u) { pp <- exp( ada.part + as.vector( z %*% u ) ); pp <- pp/(1+pp); -quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 - y)*log(1-pp) ); } } one.score.Hessian <- function(D.u.inv, ada.part, x, y, z) { function(u) { pi <- exp( ada.part + as.vector( z %*% u ) ); pi <- pi/(1 + pi); log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum( y*log(pi) + (1-y)*log(1-pi) ); return(pi, log.likelihood); } } main <- function() { b <- c(.5 , 1, -1, -.5); if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) ); D.u <- c(.5, 0, 0, .25); D.u <- matrix(D.u, nrow=2); x <- array(0, c(m, n, n.fixed) ); x[, , 1] <- 1; for(j in 1:n) x[, j, 2] <- j-5; x[1:trunc(m/2), , 3] <- 0; x[trunc(m/2+1):m, , 3] <- 1; x[, , 4] <- x[, , 2]*x[, , 3]; if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) ); z <- x[, , 1:2]; for(i in 1:50) { y <- glmm.sample.y(x, z, b, D.u); obj <- mle.b.D.u(b, D.u, y, x, z); print("mle"); print(obj$D.u); write(obj$D.u, file = "simu1.dat", append=T); obj <- REML.b.D.u(b, D.u, y, x, z); print("REML"); print(obj$D.u); write(obj$D.u, file = "simu2.dat", append=T); } } ################################################### #global variable m, n, n.fixed, n.random n <- 10; m <- 25; n.fixed <- 4; n.random <- 2; main(); print(date()); ====Jason G. Liao Department of Biometry and Epidemiology Medical University of South Carolina 135 Rutledge Ave., STE 1148, Charleston, SC 29425 phone (843) 876-1114, fax (843) 876-1126 geocities.com/jg_liao/index.html __________________________________________________ Do You Yahoo!? Get email at your own domain with Yahoo! Mail. personal.mail.yahoo.com -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.- r-help mailing list -- Read ci.tuwien.ac.at/~hornik/R/R-FAQ.html Send "info", "help", or "[un]subscribe" (in the "body", not the subject !) To: r-help-request at stat.math.ethz.ch _._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
I haven't looked into this with R in particular, but with some textual analysis software I developed in perl, the same job ran about 3.5x faster on the same hardware running linux over running Windows NT. The hardware was much slower than yours (Pentium 150, 96M RAM). The linux variant was Debian potato (2.2r2). YMMV. Andy Perrin ------------------------------------------------------------------ Andrew J Perrin - Ph.D. Candidate, UC Berkeley, Dept. of Sociology Chapel Hill NC 27514 USA | aperrin at socrates.berkeley.edu On Sun, 8 Apr 2001, Jason Liao wrote:> Hello, everyone! I picked up R several months ago and have adopted it > as my choice for statistical programming. Coming from a Java > background, I can honestly say that R is not only free, it is better > tha S-plus: the lexical scope in R makes it very simple to simulate > Java's object model. For this, I encourage everyone to read the artcle: > > Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical > Computing", Journal of Computational and Graphical Statistics, 9, > 491-508. > > I have coded a simulation program which runs at 1/5 the speed of > minimum usability on a Pentium 700 PC (windows me). I have spent quite > some time trying to optimize the program. So my question is: on which > platform does R run the fastest? I would be willing to invest on a > faster platform. Thanks in advance. > > My program follows in case someone can help me improve it. > > Thanks. > > Jason > > # Liao and Lipsitz (2001) > # A REML type estimate of variance components in generalized linear > mixed models > # This program runs on R. It does not run on S-plus. > > rm(list=ls(all=TRUE)) > > ###small and common functions################################## > glmm.sample.y <- function(x, z, b, D.u) > { > pp <- matrix(0, nrow = m, ncol = n); > D.u.half <- t( chol(D.u) ); > zero <- numeric(n.random); > > for(i in 1:m) > { > obj <- rmulti.norm(zero, D.u.half); > pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran ); > } > pp <- exp(pp)/( 1+exp(pp) ); > > y <- runif(m*n); > dim(y) <- c(m, n); > ifelse(y < pp, 1, 0); #returning vector > } > ################ > rmulti.norm <- function(mean, var.half) > { > ran <- rnorm( length(mean) ); > log.prob <- -sum(ran*ran)/2; > > ran <- mean + as.vector( var.half %*% ran ); > return(ran, log.prob); > } > ################# > my.outer <- function(x) #this is much faster than R's outer() > function > { > dim(x) <- c(length(x), 1); > x %*% t(x); > } > > quadratic.form <- function(A, x) { as.vector(x %*% A %*% x) } > > > ####################################################################### > > REML.b.D.u <- function(b, D.u, y, x, z) > { > TT <- matrix(0, n.random, n.random); > for(i in 1:30) > { > obj <- sample.u(b, D.u, x, y, z); > b <- b - solve(obj$Hessian, obj$score); > D.u <- obj$uu; > > yy <- glmm.sample.y(x, z, b, D.u); #this is the sampling stage > obj <- mle.b(b, D.u, yy, x, z); #given D.u, solves for the > mle of b > > TT <- TT*(1-1/i) + obj$uu.diff/i; > D.u <- D.u + TT; > print(i); print(date()); > print("inside REML"); > print(D.u);print("TT"); print(TT); > } > > return(b, D.u); > } > mle.b <- function(b, D.u, y, x, z) #b here is the initial value > { > for(i in 1:30) > { > obj <- sample.u(b, D.u, x, y, z); > b <- b - solve(obj$Hessian, obj$score); > if(i==1) uu.first <- obj$uu; > } > > uu.diff <- uu.first - obj$uu; > return(b, uu=obj$uu, uu.diff); > } > > mle.b.D.u <- function(b, D.u, y, x, z) > { > for(i in 1:30) > { > obj <- sample.u(b, D.u, x, y, z); > b <- b - solve(obj$Hessian, obj$score); > D.u <- obj$uu; > } > return(b, D.u); > } > > sample.u <- function(b, D.u, x, y, z) > { > D.u.inv <- solve(D.u); > > uu <- matrix(0, n.random, n.random); > score <- numeric(n.fixed); > Hessian <- matrix(0, n.fixed, n.fixed); > > for(i in 1:m) > { > obj <- sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b), > x[i, ,], y[i,], z[i, ,]); > uu <- uu + obj$uu; > score <- score + obj$score; > Hessian <- Hessian + obj$Hessian; > } > > uu <- uu/m; > return(uu, score, Hessian); > } > > sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y, > z for one subject only > { > func <- one.log.like(D.u.inv, ada.part, y, z); #function to be > maximized > obj <- optim( numeric(n.random), func, control=list(fnscale=-1) ) > > obj <- optim(obj$par, func, method = c("BFGS"), > control=list(fnscale=-1), hessian = T ) > > mean <- obj$par; > var.half <- solve(-obj$hessian)*1.2; > var.half <- t( chol(var.half) ); > > n.simu <- 1000 #sample size of importance sampling > > sum.weight <- 0; > uu <- matrix(0, n.random, n.random); > pi <- numeric(n); > product <- numeric(n); > > func <- one.score.Hessian(D.u.inv, ada.part, x, y, z); > for(i in 1:n.simu) > { > obj1 <- rmulti.norm(mean, var.half); > obj2 <- func(obj1$ran); > weight <- exp(obj2$log.likelihood - obj1$log.prob); > > uu <- uu + my.outer(obj1$ran) * weight; > pi <- pi + obj2$pi * weight; > product <- product + obj2$pi*(1-obj2$pi) * weight; > > sum.weight <- sum.weight + weight; > } > > uu <- uu/sum.weight; > pi <- pi/sum.weight; > product <- product/sum.weight; > > score <- as.vector( (y - pi) %*% x ); > Hessian <- -t(x) %*% diag(product) %*% x; > > return(uu, score, Hessian); > } > > one.log.like <- function(D.u.inv, ada.part, y, z) > { > function(u) > { > pp <- exp( ada.part + as.vector( z %*% u ) ); > pp <- pp/(1+pp); > -quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 - > y)*log(1-pp) ); > } > } > > one.score.Hessian <- function(D.u.inv, ada.part, x, y, z) > { > function(u) > { > pi <- exp( ada.part + as.vector( z %*% u ) ); > pi <- pi/(1 + pi); > log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum( > y*log(pi) + (1-y)*log(1-pi) ); > > return(pi, log.likelihood); > } > } > > main <- function() > { > b <- c(.5 , 1, -1, -.5); > if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) ); > D.u <- c(.5, 0, 0, .25); > D.u <- matrix(D.u, nrow=2); > > x <- array(0, c(m, n, n.fixed) ); > x[, , 1] <- 1; > > for(j in 1:n) x[, j, 2] <- j-5; > x[1:trunc(m/2), , 3] <- 0; > x[trunc(m/2+1):m, , 3] <- 1; > > x[, , 4] <- x[, , 2]*x[, , 3]; > if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) ); > > z <- x[, , 1:2]; > > for(i in 1:50) > { > y <- glmm.sample.y(x, z, b, D.u); > > obj <- mle.b.D.u(b, D.u, y, x, z); print("mle"); print(obj$D.u); > > write(obj$D.u, file = "simu1.dat", append=T); > > obj <- REML.b.D.u(b, D.u, y, x, z); print("REML"); > print(obj$D.u); > write(obj$D.u, file = "simu2.dat", append=T); > } > } > ################################################### > #global variable m, n, n.fixed, n.random > > n <- 10; > m <- 25; > > n.fixed <- 4; > n.random <- 2; > > main(); > print(date()); > > > > > ====> Jason G. Liao > Department of Biometry and Epidemiology > Medical University of South Carolina > 135 Rutledge Ave., STE 1148, Charleston, SC 29425 > phone (843) 876-1114, fax (843) 876-1126 > > geocities.com/jg_liao/index.html > > __________________________________________________ > Do You Yahoo!? > Get email at your own domain with Yahoo! Mail. > personal.mail.yahoo.com > -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.- > r-help mailing list -- Read ci.tuwien.ac.at/~hornik/R/R-FAQ.html > Send "info", "help", or "[un]subscribe" > (in the "body", not the subject !) To: r-help-request at stat.math.ethz.ch > _._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._ >-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.- r-help mailing list -- Read ci.tuwien.ac.at/~hornik/R/R-FAQ.html Send "info", "help", or "[un]subscribe" (in the "body", not the subject !) To: r-help-request at stat.math.ethz.ch _._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._