thr3ads.net - R help - [R] fastest R platform [Apr 2001]

If this information is useful, please help other people find it:
Share via:

Jason Liao

2001-Apr-09 02:46 UTC

[R] fastest R platform

Hello, everyone! I picked up R several months ago and have adopted it
as my choice for statistical programming. Coming from a Java
background, I can honestly say that R is not only free, it is better
tha S-plus: the lexical scope in R makes it very simple to simulate
Java's object model. For this, I encourage everyone to read the artcle:

Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical
Computing", Journal of Computational and Graphical Statistics, 9,
491-508. 

I have coded a simulation program which runs at 1/5 the speed of
minimum usability on a Pentium 700 PC (windows me). I have spent quite
some time trying to optimize the program. So my question is: on which
platform does R run the fastest? I would be willing to invest on a
faster platform. Thanks in advance.

My program follows in case someone can help me improve it.

Thanks.

Jason

# Liao and Lipsitz (2001)
# A REML type estimate of variance components in generalized linear
mixed models
# This program runs on R. It does not run on S-plus.

  rm(list=ls(all=TRUE))
  
 ###small and common functions##################################
  glmm.sample.y <- function(x, z, b, D.u)
  {
    pp <- matrix(0, nrow = m, ncol = n);
    D.u.half <- t( chol(D.u) );
    zero <- numeric(n.random);
    
    for(i in 1:m)
    {
      obj <- rmulti.norm(zero, D.u.half);    
      pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran );  
    }   
    pp <- exp(pp)/( 1+exp(pp) );    
        
    y <- runif(m*n);
    dim(y) <- c(m, n);  
    ifelse(y < pp, 1, 0);    #returning vector  
  }
################
  rmulti.norm <- function(mean, var.half)
  {    
     ran <- rnorm( length(mean) );
     log.prob <-  -sum(ran*ran)/2;
     
     ran <- mean + as.vector( var.half %*% ran );   
     return(ran, log.prob);   
  }  
  #################   
   my.outer <- function(x) #this is much faster than R's outer()
function
   {
      dim(x) <- c(length(x), 1);
      x %*% t(x);    
   }
   
   quadratic.form <- function(A, x) {  as.vector(x %*% A %*% x)  }
   

#######################################################################
 
  REML.b.D.u <- function(b, D.u, y, x, z)
  {
     TT <- matrix(0, n.random, n.random);
     for(i in 1:30)
     {             
        obj <- sample.u(b, D.u, x, y, z);  
        b <- b - solve(obj$Hessian, obj$score); 
        D.u <- obj$uu;         
       
        yy <- glmm.sample.y(x, z, b, D.u);  #this is the sampling stage
        obj <- mle.b(b, D.u, yy, x, z);     #given D.u, solves for the
mle of b
       
        TT <-  TT*(1-1/i) + obj$uu.diff/i;     
        D.u <- D.u + TT;
        print(i); print(date());
        print("inside REML");
        print(D.u);print("TT"); print(TT);
     } 
     
      return(b, D.u);
  }
  mle.b <- function(b, D.u, y, x, z)  #b here is the initial value
  {    
     for(i in 1:30)
     { 
       obj <- sample.u(b, D.u, x, y, z);      
       b <- b - solve(obj$Hessian, obj$score); 
       if(i==1) uu.first <- obj$uu;  
     }    
    
     uu.diff <- uu.first - obj$uu;
     return(b, uu=obj$uu, uu.diff);
  }
    
    mle.b.D.u <- function(b, D.u, y, x, z)
    {  
      for(i in 1:30)
      {        
        obj <- sample.u(b, D.u, x, y, z);  
        b <- b - solve(obj$Hessian, obj$score); 
        D.u <- obj$uu;  
      }         
      return(b, D.u);
     }
    
   sample.u <- function(b, D.u, x, y, z)
   {                 
     D.u.inv <- solve(D.u);
     
     uu <- matrix(0, n.random, n.random);
     score <- numeric(n.fixed);
     Hessian <- matrix(0, n.fixed, n.fixed);
   
     for(i in 1:m) 
     { 
       obj <-  sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b),
                            x[i, ,], y[i,], z[i, ,]);
       uu <- uu + obj$uu;
       score <- score + obj$score;
       Hessian <- Hessian + obj$Hessian;
     }

     uu <- uu/m;
     return(uu, score, Hessian);     
  } 
 
   sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y,
z for one subject only 
   {          
      func <- one.log.like(D.u.inv, ada.part, y, z);  #function to be
maximized
      obj <- optim( numeric(n.random), func, control=list(fnscale=-1) )
                  
      obj <- optim(obj$par, func, method = c("BFGS"),
control=list(fnscale=-1), hessian = T )                
                                                
      mean <- obj$par;      
      var.half <-  solve(-obj$hessian)*1.2;    
      var.half <- t( chol(var.half) );        
          
      n.simu <- 1000  #sample size of importance sampling
      
      sum.weight <- 0;    
      uu <- matrix(0, n.random, n.random);  
      pi <- numeric(n);
      product <- numeric(n);
           
      func <- one.score.Hessian(D.u.inv, ada.part, x, y, z);    
      for(i in 1:n.simu)
      {
        obj1 <- rmulti.norm(mean, var.half);
        obj2 <- func(obj1$ran); 
        weight <- exp(obj2$log.likelihood - obj1$log.prob);
        
        uu <- uu + my.outer(obj1$ran) * weight;     
        pi <- pi + obj2$pi * weight;  
        product <- product + obj2$pi*(1-obj2$pi) * weight; 
                
        sum.weight <- sum.weight + weight;  
      }
         
      uu <- uu/sum.weight;
      pi <- pi/sum.weight;
      product <- product/sum.weight; 
      
      score <- as.vector( (y - pi) %*% x );   
      Hessian <- -t(x) %*% diag(product) %*% x;    
      
      return(uu, score, Hessian);
    } 
     
      one.log.like <- function(D.u.inv, ada.part, y, z)
      { 
        function(u)
        {
          pp <- exp(  ada.part + as.vector( z %*% u )  );
          pp <- pp/(1+pp);             
          -quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 -
y)*log(1-pp) );  
        }  
     }
     
    one.score.Hessian <- function(D.u.inv, ada.part, x, y, z)
    {
       function(u)
       {
         pi <- exp(  ada.part + as.vector( z %*% u )  );
         pi <- pi/(1 + pi);           
         log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum(
y*log(pi) + (1-y)*log(1-pi) ); 
                
         return(pi, log.likelihood);
       }
     }    
 
  main <- function()
  {
    b <- c(.5 , 1, -1, -.5);
    if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) );
    D.u <- c(.5, 0, 0, .25); 
    D.u <- matrix(D.u, nrow=2);  

    x <- array(0, c(m, n, n.fixed) );  
    x[, , 1] <- 1;
  
    for(j in 1:n) x[, j, 2] <- j-5;  
    x[1:trunc(m/2), , 3] <- 0;
    x[trunc(m/2+1):m, , 3] <- 1;

    x[, , 4] <- x[, , 2]*x[, , 3];  
    if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) );
  
    z <- x[, , 1:2];      

    for(i in 1:50)
    {    
      y <- glmm.sample.y(x, z, b, D.u); 
      
      obj <- mle.b.D.u(b, D.u, y, x, z); print("mle");
print(obj$D.u);
 
      write(obj$D.u, file = "simu1.dat", append=T);  
      
      obj <- REML.b.D.u(b, D.u, y, x, z); print("REML");
print(obj$D.u);            
      write(obj$D.u, file = "simu2.dat", append=T);  
    } 
  }     
###################################################
#global variable m, n, n.fixed, n.random
  
  n <- 10;
  m <- 25;

  n.fixed <- 4;
  n.random <- 2;
 
  main();
  print(date());




====Jason G. Liao
Department of Biometry and Epidemiology
Medical University of South Carolina
135 Rutledge Ave., STE 1148, Charleston, SC 29425
phone (843) 876-1114, fax (843) 876-1126

geocities.com/jg_liao/index.html

__________________________________________________
Do You Yahoo!?
Get email at your own domain with Yahoo! Mail. 
personal.mail.yahoo.com
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-help-request at
stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._

Jason Liao

2001-Apr-09 02:46 UTC

head link

[R] fastest R platform

Hello, everyone! I picked up R several months ago and have adopted it
as my choice for statistical programming. Coming from a Java
background, I can honestly say that R is not only free, it is better
tha S-plus: the lexical scope in R makes it very simple to simulate
Java's object model. For this, I encourage everyone to read the artcle:

Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical
Computing", Journal of Computational and Graphical Statistics, 9,
491-508. 

I have coded a simulation program which runs at 1/5 the speed of
minimum usability on a Pentium 700 PC (windows me). I have spent quite
some time trying to optimize the program. So my question is: on which
platform does R run the fastest? I would be willing to invest on a
faster platform. Thanks in advance.

My program follows in case someone can help me improve it.

Thanks.

Jason

# Liao and Lipsitz (2001)
# A REML type estimate of variance components in generalized linear
mixed models
# This program runs on R. It does not run on S-plus.

  rm(list=ls(all=TRUE))
  
 ###small and common functions##################################
  glmm.sample.y <- function(x, z, b, D.u)
  {
    pp <- matrix(0, nrow = m, ncol = n);
    D.u.half <- t( chol(D.u) );
    zero <- numeric(n.random);
    
    for(i in 1:m)
    {
      obj <- rmulti.norm(zero, D.u.half);    
      pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran );  
    }   
    pp <- exp(pp)/( 1+exp(pp) );    
        
    y <- runif(m*n);
    dim(y) <- c(m, n);  
    ifelse(y < pp, 1, 0);    #returning vector  
  }
################
  rmulti.norm <- function(mean, var.half)
  {    
     ran <- rnorm( length(mean) );
     log.prob <-  -sum(ran*ran)/2;
     
     ran <- mean + as.vector( var.half %*% ran );   
     return(ran, log.prob);   
  }  
  #################   
   my.outer <- function(x) #this is much faster than R's outer()
function
   {
      dim(x) <- c(length(x), 1);
      x %*% t(x);    
   }
   
   quadratic.form <- function(A, x) {  as.vector(x %*% A %*% x)  }
   

#######################################################################
 
  REML.b.D.u <- function(b, D.u, y, x, z)
  {
     TT <- matrix(0, n.random, n.random);
     for(i in 1:30)
     {             
        obj <- sample.u(b, D.u, x, y, z);  
        b <- b - solve(obj$Hessian, obj$score); 
        D.u <- obj$uu;         
       
        yy <- glmm.sample.y(x, z, b, D.u);  #this is the sampling stage
        obj <- mle.b(b, D.u, yy, x, z);     #given D.u, solves for the
mle of b
       
        TT <-  TT*(1-1/i) + obj$uu.diff/i;     
        D.u <- D.u + TT;
        print(i); print(date());
        print("inside REML");
        print(D.u);print("TT"); print(TT);
     } 
     
      return(b, D.u);
  }
  mle.b <- function(b, D.u, y, x, z)  #b here is the initial value
  {    
     for(i in 1:30)
     { 
       obj <- sample.u(b, D.u, x, y, z);      
       b <- b - solve(obj$Hessian, obj$score); 
       if(i==1) uu.first <- obj$uu;  
     }    
    
     uu.diff <- uu.first - obj$uu;
     return(b, uu=obj$uu, uu.diff);
  }
    
    mle.b.D.u <- function(b, D.u, y, x, z)
    {  
      for(i in 1:30)
      {        
        obj <- sample.u(b, D.u, x, y, z);  
        b <- b - solve(obj$Hessian, obj$score); 
        D.u <- obj$uu;  
      }         
      return(b, D.u);
     }
    
   sample.u <- function(b, D.u, x, y, z)
   {                 
     D.u.inv <- solve(D.u);
     
     uu <- matrix(0, n.random, n.random);
     score <- numeric(n.fixed);
     Hessian <- matrix(0, n.fixed, n.fixed);
   
     for(i in 1:m) 
     { 
       obj <-  sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b),
                            x[i, ,], y[i,], z[i, ,]);
       uu <- uu + obj$uu;
       score <- score + obj$score;
       Hessian <- Hessian + obj$Hessian;
     }

     uu <- uu/m;
     return(uu, score, Hessian);     
  } 
 
   sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y,
z for one subject only 
   {          
      func <- one.log.like(D.u.inv, ada.part, y, z);  #function to be
maximized
      obj <- optim( numeric(n.random), func, control=list(fnscale=-1) )
                  
      obj <- optim(obj$par, func, method = c("BFGS"),
control=list(fnscale=-1), hessian = T )                
                                                
      mean <- obj$par;      
      var.half <-  solve(-obj$hessian)*1.2;    
      var.half <- t( chol(var.half) );        
          
      n.simu <- 1000  #sample size of importance sampling
      
      sum.weight <- 0;    
      uu <- matrix(0, n.random, n.random);  
      pi <- numeric(n);
      product <- numeric(n);
           
      func <- one.score.Hessian(D.u.inv, ada.part, x, y, z);    
      for(i in 1:n.simu)
      {
        obj1 <- rmulti.norm(mean, var.half);
        obj2 <- func(obj1$ran); 
        weight <- exp(obj2$log.likelihood - obj1$log.prob);
        
        uu <- uu + my.outer(obj1$ran) * weight;     
        pi <- pi + obj2$pi * weight;  
        product <- product + obj2$pi*(1-obj2$pi) * weight; 
                
        sum.weight <- sum.weight + weight;  
      }
         
      uu <- uu/sum.weight;
      pi <- pi/sum.weight;
      product <- product/sum.weight; 
      
      score <- as.vector( (y - pi) %*% x );   
      Hessian <- -t(x) %*% diag(product) %*% x;    
      
      return(uu, score, Hessian);
    } 
     
      one.log.like <- function(D.u.inv, ada.part, y, z)
      { 
        function(u)
        {
          pp <- exp(  ada.part + as.vector( z %*% u )  );
          pp <- pp/(1+pp);             
          -quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 -
y)*log(1-pp) );  
        }  
     }
     
    one.score.Hessian <- function(D.u.inv, ada.part, x, y, z)
    {
       function(u)
       {
         pi <- exp(  ada.part + as.vector( z %*% u )  );
         pi <- pi/(1 + pi);           
         log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum(
y*log(pi) + (1-y)*log(1-pi) ); 
                
         return(pi, log.likelihood);
       }
     }    
 
  main <- function()
  {
    b <- c(.5 , 1, -1, -.5);
    if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) );
    D.u <- c(.5, 0, 0, .25); 
    D.u <- matrix(D.u, nrow=2);  

    x <- array(0, c(m, n, n.fixed) );  
    x[, , 1] <- 1;
  
    for(j in 1:n) x[, j, 2] <- j-5;  
    x[1:trunc(m/2), , 3] <- 0;
    x[trunc(m/2+1):m, , 3] <- 1;

    x[, , 4] <- x[, , 2]*x[, , 3];  
    if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) );
  
    z <- x[, , 1:2];      

    for(i in 1:50)
    {    
      y <- glmm.sample.y(x, z, b, D.u); 
      
      obj <- mle.b.D.u(b, D.u, y, x, z); print("mle");
print(obj$D.u);
 
      write(obj$D.u, file = "simu1.dat", append=T);  
      
      obj <- REML.b.D.u(b, D.u, y, x, z); print("REML");
print(obj$D.u);            
      write(obj$D.u, file = "simu2.dat", append=T);  
    } 
  }     
###################################################
#global variable m, n, n.fixed, n.random
  
  n <- 10;
  m <- 25;

  n.fixed <- 4;
  n.random <- 2;
 
  main();
  print(date());




====Jason G. Liao
Department of Biometry and Epidemiology
Medical University of South Carolina
135 Rutledge Ave., STE 1148, Charleston, SC 29425
phone (843) 876-1114, fax (843) 876-1126

geocities.com/jg_liao/index.html

__________________________________________________
Do You Yahoo!?
Get email at your own domain with Yahoo! Mail. 
personal.mail.yahoo.com
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-help-request at
stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._

Jason Liao

2001-Apr-09 02:47 UTC

head link

[R] fastest R platform

Hello, everyone! I picked up R several months ago and have adopted it
as my choice for statistical programming. Coming from a Java
background, I can honestly say that R is not only free, it is better
tha S-plus: the lexical scope in R makes it very simple to simulate
Java's object model. For this, I encourage everyone to read the artcle:

Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical
Computing", Journal of Computational and Graphical Statistics, 9,
491-508. 

I have coded a simulation program which runs at 1/5 the speed of
minimum usability on a Pentium 700 PC (windows me). I have spent quite
some time trying to optimize the program. So my question is: on which
platform does R run the fastest? I would be willing to invest on a
faster platform. Thanks in advance.

My program follows in case someone can help me improve it.

Thanks.

Jason

# Liao and Lipsitz (2001)
# A REML type estimate of variance components in generalized linear
mixed models
# This program runs on R. It does not run on S-plus.

  rm(list=ls(all=TRUE))
  
 ###small and common functions##################################
  glmm.sample.y <- function(x, z, b, D.u)
  {
    pp <- matrix(0, nrow = m, ncol = n);
    D.u.half <- t( chol(D.u) );
    zero <- numeric(n.random);
    
    for(i in 1:m)
    {
      obj <- rmulti.norm(zero, D.u.half);    
      pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran );  
    }   
    pp <- exp(pp)/( 1+exp(pp) );    
        
    y <- runif(m*n);
    dim(y) <- c(m, n);  
    ifelse(y < pp, 1, 0);    #returning vector  
  }
################
  rmulti.norm <- function(mean, var.half)
  {    
     ran <- rnorm( length(mean) );
     log.prob <-  -sum(ran*ran)/2;
     
     ran <- mean + as.vector( var.half %*% ran );   
     return(ran, log.prob);   
  }  
  #################   
   my.outer <- function(x) #this is much faster than R's outer()
function
   {
      dim(x) <- c(length(x), 1);
      x %*% t(x);    
   }
   
   quadratic.form <- function(A, x) {  as.vector(x %*% A %*% x)  }
   

#######################################################################
 
  REML.b.D.u <- function(b, D.u, y, x, z)
  {
     TT <- matrix(0, n.random, n.random);
     for(i in 1:30)
     {             
        obj <- sample.u(b, D.u, x, y, z);  
        b <- b - solve(obj$Hessian, obj$score); 
        D.u <- obj$uu;         
       
        yy <- glmm.sample.y(x, z, b, D.u);  #this is the sampling stage
        obj <- mle.b(b, D.u, yy, x, z);     #given D.u, solves for the
mle of b
       
        TT <-  TT*(1-1/i) + obj$uu.diff/i;     
        D.u <- D.u + TT;
        print(i); print(date());
        print("inside REML");
        print(D.u);print("TT"); print(TT);
     } 
     
      return(b, D.u);
  }
  mle.b <- function(b, D.u, y, x, z)  #b here is the initial value
  {    
     for(i in 1:30)
     { 
       obj <- sample.u(b, D.u, x, y, z);      
       b <- b - solve(obj$Hessian, obj$score); 
       if(i==1) uu.first <- obj$uu;  
     }    
    
     uu.diff <- uu.first - obj$uu;
     return(b, uu=obj$uu, uu.diff);
  }
    
    mle.b.D.u <- function(b, D.u, y, x, z)
    {  
      for(i in 1:30)
      {        
        obj <- sample.u(b, D.u, x, y, z);  
        b <- b - solve(obj$Hessian, obj$score); 
        D.u <- obj$uu;  
      }         
      return(b, D.u);
     }
    
   sample.u <- function(b, D.u, x, y, z)
   {                 
     D.u.inv <- solve(D.u);
     
     uu <- matrix(0, n.random, n.random);
     score <- numeric(n.fixed);
     Hessian <- matrix(0, n.fixed, n.fixed);
   
     for(i in 1:m) 
     { 
       obj <-  sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b),
                            x[i, ,], y[i,], z[i, ,]);
       uu <- uu + obj$uu;
       score <- score + obj$score;
       Hessian <- Hessian + obj$Hessian;
     }

     uu <- uu/m;
     return(uu, score, Hessian);     
  } 
 
   sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y,
z for one subject only 
   {          
      func <- one.log.like(D.u.inv, ada.part, y, z);  #function to be
maximized
      obj <- optim( numeric(n.random), func, control=list(fnscale=-1) )
                  
      obj <- optim(obj$par, func, method = c("BFGS"),
control=list(fnscale=-1), hessian = T )                
                                                
      mean <- obj$par;      
      var.half <-  solve(-obj$hessian)*1.2;    
      var.half <- t( chol(var.half) );        
          
      n.simu <- 1000  #sample size of importance sampling
      
      sum.weight <- 0;    
      uu <- matrix(0, n.random, n.random);  
      pi <- numeric(n);
      product <- numeric(n);
           
      func <- one.score.Hessian(D.u.inv, ada.part, x, y, z);    
      for(i in 1:n.simu)
      {
        obj1 <- rmulti.norm(mean, var.half);
        obj2 <- func(obj1$ran); 
        weight <- exp(obj2$log.likelihood - obj1$log.prob);
        
        uu <- uu + my.outer(obj1$ran) * weight;     
        pi <- pi + obj2$pi * weight;  
        product <- product + obj2$pi*(1-obj2$pi) * weight; 
                
        sum.weight <- sum.weight + weight;  
      }
         
      uu <- uu/sum.weight;
      pi <- pi/sum.weight;
      product <- product/sum.weight; 
      
      score <- as.vector( (y - pi) %*% x );   
      Hessian <- -t(x) %*% diag(product) %*% x;    
      
      return(uu, score, Hessian);
    } 
     
      one.log.like <- function(D.u.inv, ada.part, y, z)
      { 
        function(u)
        {
          pp <- exp(  ada.part + as.vector( z %*% u )  );
          pp <- pp/(1+pp);             
          -quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 -
y)*log(1-pp) );  
        }  
     }
     
    one.score.Hessian <- function(D.u.inv, ada.part, x, y, z)
    {
       function(u)
       {
         pi <- exp(  ada.part + as.vector( z %*% u )  );
         pi <- pi/(1 + pi);           
         log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum(
y*log(pi) + (1-y)*log(1-pi) ); 
                
         return(pi, log.likelihood);
       }
     }    
 
  main <- function()
  {
    b <- c(.5 , 1, -1, -.5);
    if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) );
    D.u <- c(.5, 0, 0, .25); 
    D.u <- matrix(D.u, nrow=2);  

    x <- array(0, c(m, n, n.fixed) );  
    x[, , 1] <- 1;
  
    for(j in 1:n) x[, j, 2] <- j-5;  
    x[1:trunc(m/2), , 3] <- 0;
    x[trunc(m/2+1):m, , 3] <- 1;

    x[, , 4] <- x[, , 2]*x[, , 3];  
    if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) );
  
    z <- x[, , 1:2];      

    for(i in 1:50)
    {    
      y <- glmm.sample.y(x, z, b, D.u); 
      
      obj <- mle.b.D.u(b, D.u, y, x, z); print("mle");
print(obj$D.u);
 
      write(obj$D.u, file = "simu1.dat", append=T);  
      
      obj <- REML.b.D.u(b, D.u, y, x, z); print("REML");
print(obj$D.u);            
      write(obj$D.u, file = "simu2.dat", append=T);  
    } 
  }     
###################################################
#global variable m, n, n.fixed, n.random
  
  n <- 10;
  m <- 25;

  n.fixed <- 4;
  n.random <- 2;
 
  main();
  print(date());




====Jason G. Liao
Department of Biometry and Epidemiology
Medical University of South Carolina
135 Rutledge Ave., STE 1148, Charleston, SC 29425
phone (843) 876-1114, fax (843) 876-1126

geocities.com/jg_liao/index.html

__________________________________________________
Do You Yahoo!?
Get email at your own domain with Yahoo! Mail. 
personal.mail.yahoo.com
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-help-request at
stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._

Jason Liao

2001-Apr-09 02:48 UTC

head link

[R] fastest R platform

Hello, everyone! I picked up R several months ago and have adopted it
as my choice for statistical programming. Coming from a Java
background, I can honestly say that R is not only free, it is better
tha S-plus: the lexical scope in R makes it very simple to simulate
Java's object model. For this, I encourage everyone to read the artcle:

Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical
Computing", Journal of Computational and Graphical Statistics, 9,
491-508. 

I have coded a simulation program which runs at 1/5 the speed of
minimum usability on a Pentium 700 PC (windows me). I have spent quite
some time trying to optimize the program. So my question is: on which
platform does R run the fastest? I would be willing to invest on a
faster platform. Thanks in advance.

My program follows in case someone can help me improve it.

Thanks.

Jason

# Liao and Lipsitz (2001)
# A REML type estimate of variance components in generalized linear
mixed models
# This program runs on R. It does not run on S-plus.

  rm(list=ls(all=TRUE))
  
 ###small and common functions##################################
  glmm.sample.y <- function(x, z, b, D.u)
  {
    pp <- matrix(0, nrow = m, ncol = n);
    D.u.half <- t( chol(D.u) );
    zero <- numeric(n.random);
    
    for(i in 1:m)
    {
      obj <- rmulti.norm(zero, D.u.half);    
      pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran );  
    }   
    pp <- exp(pp)/( 1+exp(pp) );    
        
    y <- runif(m*n);
    dim(y) <- c(m, n);  
    ifelse(y < pp, 1, 0);    #returning vector  
  }
################
  rmulti.norm <- function(mean, var.half)
  {    
     ran <- rnorm( length(mean) );
     log.prob <-  -sum(ran*ran)/2;
     
     ran <- mean + as.vector( var.half %*% ran );   
     return(ran, log.prob);   
  }  
  #################   
   my.outer <- function(x) #this is much faster than R's outer()
function
   {
      dim(x) <- c(length(x), 1);
      x %*% t(x);    
   }
   
   quadratic.form <- function(A, x) {  as.vector(x %*% A %*% x)  }
   

#######################################################################
 
  REML.b.D.u <- function(b, D.u, y, x, z)
  {
     TT <- matrix(0, n.random, n.random);
     for(i in 1:30)
     {             
        obj <- sample.u(b, D.u, x, y, z);  
        b <- b - solve(obj$Hessian, obj$score); 
        D.u <- obj$uu;         
       
        yy <- glmm.sample.y(x, z, b, D.u);  #this is the sampling stage
        obj <- mle.b(b, D.u, yy, x, z);     #given D.u, solves for the
mle of b
       
        TT <-  TT*(1-1/i) + obj$uu.diff/i;     
        D.u <- D.u + TT;
        print(i); print(date());
        print("inside REML");
        print(D.u);print("TT"); print(TT);
     } 
     
      return(b, D.u);
  }
  mle.b <- function(b, D.u, y, x, z)  #b here is the initial value
  {    
     for(i in 1:30)
     { 
       obj <- sample.u(b, D.u, x, y, z);      
       b <- b - solve(obj$Hessian, obj$score); 
       if(i==1) uu.first <- obj$uu;  
     }    
    
     uu.diff <- uu.first - obj$uu;
     return(b, uu=obj$uu, uu.diff);
  }
    
    mle.b.D.u <- function(b, D.u, y, x, z)
    {  
      for(i in 1:30)
      {        
        obj <- sample.u(b, D.u, x, y, z);  
        b <- b - solve(obj$Hessian, obj$score); 
        D.u <- obj$uu;  
      }         
      return(b, D.u);
     }
    
   sample.u <- function(b, D.u, x, y, z)
   {                 
     D.u.inv <- solve(D.u);
     
     uu <- matrix(0, n.random, n.random);
     score <- numeric(n.fixed);
     Hessian <- matrix(0, n.fixed, n.fixed);
   
     for(i in 1:m) 
     { 
       obj <-  sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b),
                            x[i, ,], y[i,], z[i, ,]);
       uu <- uu + obj$uu;
       score <- score + obj$score;
       Hessian <- Hessian + obj$Hessian;
     }

     uu <- uu/m;
     return(uu, score, Hessian);     
  } 
 
   sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y,
z for one subject only 
   {          
      func <- one.log.like(D.u.inv, ada.part, y, z);  #function to be
maximized
      obj <- optim( numeric(n.random), func, control=list(fnscale=-1) )
                  
      obj <- optim(obj$par, func, method = c("BFGS"),
control=list(fnscale=-1), hessian = T )                
                                                
      mean <- obj$par;      
      var.half <-  solve(-obj$hessian)*1.2;    
      var.half <- t( chol(var.half) );        
          
      n.simu <- 1000  #sample size of importance sampling
      
      sum.weight <- 0;    
      uu <- matrix(0, n.random, n.random);  
      pi <- numeric(n);
      product <- numeric(n);
           
      func <- one.score.Hessian(D.u.inv, ada.part, x, y, z);    
      for(i in 1:n.simu)
      {
        obj1 <- rmulti.norm(mean, var.half);
        obj2 <- func(obj1$ran); 
        weight <- exp(obj2$log.likelihood - obj1$log.prob);
        
        uu <- uu + my.outer(obj1$ran) * weight;     
        pi <- pi + obj2$pi * weight;  
        product <- product + obj2$pi*(1-obj2$pi) * weight; 
                
        sum.weight <- sum.weight + weight;  
      }
         
      uu <- uu/sum.weight;
      pi <- pi/sum.weight;
      product <- product/sum.weight; 
      
      score <- as.vector( (y - pi) %*% x );   
      Hessian <- -t(x) %*% diag(product) %*% x;    
      
      return(uu, score, Hessian);
    } 
     
      one.log.like <- function(D.u.inv, ada.part, y, z)
      { 
        function(u)
        {
          pp <- exp(  ada.part + as.vector( z %*% u )  );
          pp <- pp/(1+pp);             
          -quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 -
y)*log(1-pp) );  
        }  
     }
     
    one.score.Hessian <- function(D.u.inv, ada.part, x, y, z)
    {
       function(u)
       {
         pi <- exp(  ada.part + as.vector( z %*% u )  );
         pi <- pi/(1 + pi);           
         log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum(
y*log(pi) + (1-y)*log(1-pi) ); 
                
         return(pi, log.likelihood);
       }
     }    
 
  main <- function()
  {
    b <- c(.5 , 1, -1, -.5);
    if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) );
    D.u <- c(.5, 0, 0, .25); 
    D.u <- matrix(D.u, nrow=2);  

    x <- array(0, c(m, n, n.fixed) );  
    x[, , 1] <- 1;
  
    for(j in 1:n) x[, j, 2] <- j-5;  
    x[1:trunc(m/2), , 3] <- 0;
    x[trunc(m/2+1):m, , 3] <- 1;

    x[, , 4] <- x[, , 2]*x[, , 3];  
    if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) );
  
    z <- x[, , 1:2];      

    for(i in 1:50)
    {    
      y <- glmm.sample.y(x, z, b, D.u); 
      
      obj <- mle.b.D.u(b, D.u, y, x, z); print("mle");
print(obj$D.u);
 
      write(obj$D.u, file = "simu1.dat", append=T);  
      
      obj <- REML.b.D.u(b, D.u, y, x, z); print("REML");
print(obj$D.u);            
      write(obj$D.u, file = "simu2.dat", append=T);  
    } 
  }     
###################################################
#global variable m, n, n.fixed, n.random
  
  n <- 10;
  m <- 25;

  n.fixed <- 4;
  n.random <- 2;
 
  main();
  print(date());




====Jason G. Liao
Department of Biometry and Epidemiology
Medical University of South Carolina
135 Rutledge Ave., STE 1148, Charleston, SC 29425
phone (843) 876-1114, fax (843) 876-1126

geocities.com/jg_liao/index.html

__________________________________________________
Do You Yahoo!?
Get email at your own domain with Yahoo! Mail. 
personal.mail.yahoo.com
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-help-request at
stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._

aperrin@socrates.berkeley.edu

2001-Apr-09 12:30 UTC

head link

[R] fastest R platform

I haven't looked into this with R in particular, but with some textual
analysis software I developed in perl, the same job ran about 3.5x faster
on the same hardware running linux over running Windows NT.  The hardware
was much slower than yours (Pentium 150, 96M RAM).  The linux variant was
Debian potato (2.2r2).  YMMV.

Andy Perrin

------------------------------------------------------------------
Andrew J Perrin - Ph.D. Candidate, UC Berkeley, Dept. of Sociology  
Chapel Hill NC 27514 USA   |   aperrin at socrates.berkeley.edu

On Sun, 8 Apr 2001, Jason Liao wrote:
> Hello, everyone! I picked up R several months ago and have adopted it
> as my choice for statistical programming. Coming from a Java
> background, I can honestly say that R is not only free, it is better
> tha S-plus: the lexical scope in R makes it very simple to simulate
> Java's object model. For this, I encourage everyone to read the artcle:
> 
> Robert Gentleman and Ross Ihaka (2000) "Lexical Scope and Statistical
> Computing", Journal of Computational and Graphical Statistics, 9,
> 491-508. 
> 
> I have coded a simulation program which runs at 1/5 the speed of
> minimum usability on a Pentium 700 PC (windows me). I have spent quite
> some time trying to optimize the program. So my question is: on which
> platform does R run the fastest? I would be willing to invest on a
> faster platform. Thanks in advance.
> 
> My program follows in case someone can help me improve it.
> 
> Thanks.
> 
> Jason
> 
> # Liao and Lipsitz (2001)
> # A REML type estimate of variance components in generalized linear
> mixed models
> # This program runs on R. It does not run on S-plus.
> 
>   rm(list=ls(all=TRUE))
>   
>  ###small and common functions##################################
>   glmm.sample.y <- function(x, z, b, D.u)
>   {
>     pp <- matrix(0, nrow = m, ncol = n);
>     D.u.half <- t( chol(D.u) );
>     zero <- numeric(n.random);
>     
>     for(i in 1:m)
>     {
>       obj <- rmulti.norm(zero, D.u.half);    
>       pp[i, ] <- as.vector( x[i, , ] %*% b + z[i, , ] %*% obj$ran );  
>     }   
>     pp <- exp(pp)/( 1+exp(pp) );    
>         
>     y <- runif(m*n);
>     dim(y) <- c(m, n);  
>     ifelse(y < pp, 1, 0);    #returning vector  
>   }
> ################
>   rmulti.norm <- function(mean, var.half)
>   {    
>      ran <- rnorm( length(mean) );
>      log.prob <-  -sum(ran*ran)/2;
>      
>      ran <- mean + as.vector( var.half %*% ran );   
>      return(ran, log.prob);   
>   }  
>   #################   
>    my.outer <- function(x) #this is much faster than R's outer()
> function
>    {
>       dim(x) <- c(length(x), 1);
>       x %*% t(x);    
>    }
>    
>    quadratic.form <- function(A, x) {  as.vector(x %*% A %*% x)  }
>    
> 
> #######################################################################
>  
>   REML.b.D.u <- function(b, D.u, y, x, z)
>   {
>      TT <- matrix(0, n.random, n.random);
>      for(i in 1:30)
>      {             
>         obj <- sample.u(b, D.u, x, y, z);  
>         b <- b - solve(obj$Hessian, obj$score); 
>         D.u <- obj$uu;         
>        
>         yy <- glmm.sample.y(x, z, b, D.u);  #this is the sampling stage
>         obj <- mle.b(b, D.u, yy, x, z);     #given D.u, solves for the
> mle of b
>        
>         TT <-  TT*(1-1/i) + obj$uu.diff/i;     
>         D.u <- D.u + TT;
>         print(i); print(date());
>         print("inside REML");
>         print(D.u);print("TT"); print(TT);
>      } 
>      
>       return(b, D.u);
>   }
>   mle.b <- function(b, D.u, y, x, z)  #b here is the initial value
>   {    
>      for(i in 1:30)
>      { 
>        obj <- sample.u(b, D.u, x, y, z);      
>        b <- b - solve(obj$Hessian, obj$score); 
>        if(i==1) uu.first <- obj$uu;  
>      }    
>     
>      uu.diff <- uu.first - obj$uu;
>      return(b, uu=obj$uu, uu.diff);
>   }
>     
>     mle.b.D.u <- function(b, D.u, y, x, z)
>     {  
>       for(i in 1:30)
>       {        
>         obj <- sample.u(b, D.u, x, y, z);  
>         b <- b - solve(obj$Hessian, obj$score); 
>         D.u <- obj$uu;  
>       }         
>       return(b, D.u);
>      }
>     
>    sample.u <- function(b, D.u, x, y, z)
>    {                 
>      D.u.inv <- solve(D.u);
>      
>      uu <- matrix(0, n.random, n.random);
>      score <- numeric(n.fixed);
>      Hessian <- matrix(0, n.fixed, n.fixed);
>    
>      for(i in 1:m) 
>      { 
>        obj <-  sample.u.one(D.u.inv, ada.part=as.vector(x[i, ,] %*% b),
>                             x[i, ,], y[i,], z[i, ,]);
>        uu <- uu + obj$uu;
>        score <- score + obj$score;
>        Hessian <- Hessian + obj$Hessian;
>      }
> 
>      uu <- uu/m;
>      return(uu, score, Hessian);     
>   } 
>  
>    sample.u.one <- function(D.u.inv, ada.part, x, y, z)#ada.part, x, y,
> z for one subject only 
>    {          
>       func <- one.log.like(D.u.inv, ada.part, y, z);  #function to be
> maximized
>       obj <- optim( numeric(n.random), func, control=list(fnscale=-1) )
>                   
>       obj <- optim(obj$par, func, method = c("BFGS"),
> control=list(fnscale=-1), hessian = T )                
>                                                 
>       mean <- obj$par;      
>       var.half <-  solve(-obj$hessian)*1.2;    
>       var.half <- t( chol(var.half) );        
>           
>       n.simu <- 1000  #sample size of importance sampling
>       
>       sum.weight <- 0;    
>       uu <- matrix(0, n.random, n.random);  
>       pi <- numeric(n);
>       product <- numeric(n);
>            
>       func <- one.score.Hessian(D.u.inv, ada.part, x, y, z);    
>       for(i in 1:n.simu)
>       {
>         obj1 <- rmulti.norm(mean, var.half);
>         obj2 <- func(obj1$ran); 
>         weight <- exp(obj2$log.likelihood - obj1$log.prob);
>         
>         uu <- uu + my.outer(obj1$ran) * weight;     
>         pi <- pi + obj2$pi * weight;  
>         product <- product + obj2$pi*(1-obj2$pi) * weight; 
>                 
>         sum.weight <- sum.weight + weight;  
>       }
>          
>       uu <- uu/sum.weight;
>       pi <- pi/sum.weight;
>       product <- product/sum.weight; 
>       
>       score <- as.vector( (y - pi) %*% x );   
>       Hessian <- -t(x) %*% diag(product) %*% x;    
>       
>       return(uu, score, Hessian);
>     } 
>      
>       one.log.like <- function(D.u.inv, ada.part, y, z)
>       { 
>         function(u)
>         {
>           pp <- exp(  ada.part + as.vector( z %*% u )  );
>           pp <- pp/(1+pp);             
>           -quadratic.form(D.u.inv, u)/2 + sum( y*log(pp) + (1 -
> y)*log(1-pp) );  
>         }  
>      }
>      
>     one.score.Hessian <- function(D.u.inv, ada.part, x, y, z)
>     {
>        function(u)
>        {
>          pi <- exp(  ada.part + as.vector( z %*% u )  );
>          pi <- pi/(1 + pi);           
>          log.likelihood <- -quadratic.form(D.u.inv, u)/2 + sum(
> y*log(pi) + (1-y)*log(1-pi) ); 
>                 
>          return(pi, log.likelihood);
>        }
>      }    
>  
>   main <- function()
>   {
>     b <- c(.5 , 1, -1, -.5);
>     if(n.fixed > 4) b <- c( b, numeric(n.fixed-4) );
>     D.u <- c(.5, 0, 0, .25); 
>     D.u <- matrix(D.u, nrow=2);  
> 
>     x <- array(0, c(m, n, n.fixed) );  
>     x[, , 1] <- 1;
>   
>     for(j in 1:n) x[, j, 2] <- j-5;  
>     x[1:trunc(m/2), , 3] <- 0;
>     x[trunc(m/2+1):m, , 3] <- 1;
> 
>     x[, , 4] <- x[, , 2]*x[, , 3];  
>     if( n.fixed > 4 ) x[, , 5:n.fixed] <- rnorm( m*n*(n.fixed-4) );
>   
>     z <- x[, , 1:2];      
> 
>     for(i in 1:50)
>     {    
>       y <- glmm.sample.y(x, z, b, D.u); 
>       
>       obj <- mle.b.D.u(b, D.u, y, x, z); print("mle");
print(obj$D.u);
>  
>       write(obj$D.u, file = "simu1.dat", append=T);  
>       
>       obj <- REML.b.D.u(b, D.u, y, x, z); print("REML");
> print(obj$D.u);            
>       write(obj$D.u, file = "simu2.dat", append=T);  
>     } 
>   }     
> ###################################################
> #global variable m, n, n.fixed, n.random
>   
>   n <- 10;
>   m <- 25;
> 
>   n.fixed <- 4;
>   n.random <- 2;
>  
>   main();
>   print(date());
> 
> 
> 
> 
> ====> Jason G. Liao
> Department of Biometry and Epidemiology
> Medical University of South Carolina
> 135 Rutledge Ave., STE 1148, Charleston, SC 29425
> phone (843) 876-1114, fax (843) 876-1126
> 
> geocities.com/jg_liao/index.html
> 
> __________________________________________________
> Do You Yahoo!?
> Get email at your own domain with Yahoo! Mail. 
> personal.mail.yahoo.com
>
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
> r-help mailing list -- Read ci.tuwien.ac.at/~hornik/R/R-FAQ.html
> Send "info", "help", or "[un]subscribe"
> (in the "body", not the subject !)  To: r-help-request at
stat.math.ethz.ch
>
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
> 
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-help-request at
stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._

Possibly Parallel Threads

Search for more seemingly similar threads

R help - Apr 2001 - fastest R platform

[R] fastest R platform

[R] fastest R platform

[R] fastest R platform

[R] fastest R platform

[R] fastest R platform

Possibly Parallel Threads