thr3ads.net - R devel - [Rd] An example of very slow computation [Aug 2011]

If this information is useful, please help other people find it:
Share via:

John C Nash

2011-Aug-17 08:27 UTC

[Rd] An example of very slow computation

This message is about a curious difference in timing between two ways of
computing the
same function. One uses expm, so is expected to be a bit slower, but "a
bit" turned out to
be a factor of >1000. The code is below. We would be grateful if anyone can
point out any
egregious bad practice in our code, or enlighten us on why one approach is so
much slower
than the other. The problem arose in an activity to develop guidelines for
nonlinear
modeling in ecology (at NCEAS, Santa Barbara, with worldwide participants), and
we will be
trying to include suggestions of how to prepare problems like this for efficient
and
effective solution. The code for nlogL was the "original" from the
worker who supplied the
problem.

Best,

John Nash

--------------------------------------------------------------------------------------

cat("mineral-timing.R == benchmark MIN functions in R\n")
#  J C Nash July 31, 2011

require("microbenchmark")
require("numDeriv")
library(Matrix)
library(optimx)
# dat<-read.table('min.dat', skip=3, header=FALSE)
# t<-dat[,1]
t <- c(0.77,  1.69,  2.69,  3.67,  4.69,  5.71,  7.94,  9.67, 11.77, 17.77,
 23.77, 32.77, 40.73, 47.75, 54.90, 62.81, 72.88, 98.77, 125.92, 160.19,
 191.15, 223.78, 287.70, 340.01, 340.95, 342.01)

# y<-dat[,2] # ?? tidy up
 y<- c(1.396, 3.784, 5.948, 7.717, 9.077, 10.100, 11.263, 11.856, 12.251,
12.699,
12.869, 13.048, 13.222, 13.347, 13.507, 13.628, 13.804, 14.087, 14.185, 14.351,
14.458, 14.756, 15.262, 15.703, 15.703, 15.703)


ones<-rep(1,length(t))
theta<-c(-2,-2,-2,-2)


nlogL<-function(theta){
  k<-exp(theta[1:3])
  sigma<-exp(theta[4])
  A<-rbind(
  c(-k[1], k[2]),
  c( k[1], -(k[2]+k[3]))
  )
  x0<-c(0,100)
  sol<-function(t)100-sum(expm(A*t)%*%x0)
  pred<-sapply(dat[,1],sol)
  -sum(dnorm(dat[,2],mean=pred,sd=sigma, log=TRUE))
}

getpred<-function(theta, t){
  k<-exp(theta[1:3])
  sigma<-exp(theta[4])
  A<-rbind(
  c(-k[1], k[2]),
  c( k[1], -(k[2]+k[3]))
  )
  x0<-c(0,100)
  sol<-function(tt)100-sum(expm(A*tt)%*%x0)
  pred<-sapply(t,sol)
}

Mpred <- function(theta) {
# WARNING: assumes t global
kvec<-exp(theta[1:3])
k1<-kvec[1]
k2<-kvec[2]
k3<-kvec[3]
#   MIN problem terbuthylazene disappearance
    z<-k1+k2+k3
    y<-z*z-4*k1*k3
    l1<-0.5*(-z+sqrt(y))
    l2<-0.5*(-z-sqrt(y))
    val<-100*(1-((k1+k2+l2)*exp(l2*t)-(k1+k2+l1)*exp(l1*t))/(l2-l1))
} # val should be a vector if t is a vector

negll <- function(theta){
# non expm version JN 110731
  pred<-Mpred(theta)
  sigma<-exp(theta[4])
  -sum(dnorm(dat[,2],mean=pred,sd=sigma, log=TRUE))
}

theta<-rep(-2,4)
fand<-nlogL(theta)
fsim<-negll(theta)
cat("Check fn vals: expm =",fand,"   simple=",fsim," 
diff=",fand-fsim,"\n")

cat("time the function in expm form\n")
tnlogL<-microbenchmark(nlogL(theta), times=100L)
tnlogL

cat("time the function in simpler form\n")
tnegll<-microbenchmark(negll(theta), times=100L)
tnegll

ftimes<-data.frame(texpm=tnlogL$time, tsimp=tnegll$time)
# ftimes


boxplot(log(ftimes))
title("Log times in nanoseconds for matrix exponential and simple MIN
fn")

Michael Lachmann

2011-Aug-17 09:14 UTC

head link

[Rd] An example of very slow computation

I think one difference is that negll() is fully vectorized - no loops, whereas
nlogL calls the function sol() inside sapply, i.e. a loop.

Michael


On 17 Aug 2011, at 10:27AM, John C Nash wrote:
> This message is about a curious difference in timing between two ways of
computing the
> same function. One uses expm, so is expected to be a bit slower, but
"a bit" turned out to
> be a factor of >1000. The code is below. We would be grateful if anyone
can point out any
> egregious bad practice in our code, or enlighten us on why one approach is
so much slower
> than the other. The problem arose in an activity to develop guidelines for
nonlinear
> modeling in ecology (at NCEAS, Santa Barbara, with worldwide participants),
and we will be
> trying to include suggestions of how to prepare problems like this for
efficient and
> effective solution. The code for nlogL was the "original" from
the worker who supplied the
> problem.
> 
> Best,
> 
> John Nash
> 
>
--------------------------------------------------------------------------------------
> 
> cat("mineral-timing.R == benchmark MIN functions in R\n")
> #  J C Nash July 31, 2011
> 
> require("microbenchmark")
> require("numDeriv")
> library(Matrix)
> library(optimx)
> # dat<-read.table('min.dat', skip=3, header=FALSE)
> # t<-dat[,1]
> t <- c(0.77,  1.69,  2.69,  3.67,  4.69,  5.71,  7.94,  9.67, 11.77,
17.77,
> 23.77, 32.77, 40.73, 47.75, 54.90, 62.81, 72.88, 98.77, 125.92, 160.19,
> 191.15, 223.78, 287.70, 340.01, 340.95, 342.01)
> 
> # y<-dat[,2] # ?? tidy up
> y<- c(1.396, 3.784, 5.948, 7.717, 9.077, 10.100, 11.263, 11.856, 12.251,
12.699,
> 12.869, 13.048, 13.222, 13.347, 13.507, 13.628, 13.804, 14.087, 14.185,
14.351,
> 14.458, 14.756, 15.262, 15.703, 15.703, 15.703)
> 
> 
> ones<-rep(1,length(t))
> theta<-c(-2,-2,-2,-2)
> 
> 
> nlogL<-function(theta){
>  k<-exp(theta[1:3])
>  sigma<-exp(theta[4])
>  A<-rbind(
>  c(-k[1], k[2]),
>  c( k[1], -(k[2]+k[3]))
>  )
>  x0<-c(0,100)
>  sol<-function(t)100-sum(expm(A*t)%*%x0)
>  pred<-sapply(dat[,1],sol)
>  -sum(dnorm(dat[,2],mean=pred,sd=sigma, log=TRUE))
> }
> 
> getpred<-function(theta, t){
>  k<-exp(theta[1:3])
>  sigma<-exp(theta[4])
>  A<-rbind(
>  c(-k[1], k[2]),
>  c( k[1], -(k[2]+k[3]))
>  )
>  x0<-c(0,100)
>  sol<-function(tt)100-sum(expm(A*tt)%*%x0)
>  pred<-sapply(t,sol)
> }
> 
> Mpred <- function(theta) {
> # WARNING: assumes t global
> kvec<-exp(theta[1:3])
> k1<-kvec[1]
> k2<-kvec[2]
> k3<-kvec[3]
> #   MIN problem terbuthylazene disappearance
>    z<-k1+k2+k3
>    y<-z*z-4*k1*k3
>    l1<-0.5*(-z+sqrt(y))
>    l2<-0.5*(-z-sqrt(y))
>    val<-100*(1-((k1+k2+l2)*exp(l2*t)-(k1+k2+l1)*exp(l1*t))/(l2-l1))
> } # val should be a vector if t is a vector
> 
> negll <- function(theta){
> # non expm version JN 110731
>  pred<-Mpred(theta)
>  sigma<-exp(theta[4])
>  -sum(dnorm(dat[,2],mean=pred,sd=sigma, log=TRUE))
> }
> 
> theta<-rep(-2,4)
> fand<-nlogL(theta)
> fsim<-negll(theta)
> cat("Check fn vals: expm =",fand,"  
simple=",fsim,"  diff=",fand-fsim,"\n")
> 
> cat("time the function in expm form\n")
> tnlogL<-microbenchmark(nlogL(theta), times=100L)
> tnlogL
> 
> cat("time the function in simpler form\n")
> tnegll<-microbenchmark(negll(theta), times=100L)
> tnegll
> 
> ftimes<-data.frame(texpm=tnlogL$time, tsimp=tnegll$time)
> # ftimes
> 
> 
> boxplot(log(ftimes))
> title("Log times in nanoseconds for matrix exponential and simple MIN
fn")
> 
> ______________________________________________
> R-devel at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-devel
> 
> 
>

cberry at tajo.ucsd.edu

2011-Aug-17 17:08 UTC

head link

[Rd] An example of very slow computation

John C Nash <nashjc at uottawa.ca> writes:
> This message is about a curious difference in timing between two ways of
computing the
> same function. One uses expm, so is expected to be a bit slower, but
"a bit" turned out to
> be a factor of >1000. The code is below. We would be grateful if anyone
can point out any
> egregious bad practice in our code, or enlighten us on why one approach is
so much slower
> than the other. 
Looks like A*t in expm(A*t) is a "matrix".

'getMethod("expm","matrix")' coerces it arg to a
"Matrix", then calls
expm(), whose method coerces its arg to a "dMatrix" and calls expm(),
whose method coerces its arg to a 'dgeMatrix' and calls expm(), whose
method finally calls '.Call(dgeMatrix_exp, x)' 

Whew!

The time difference between 'expm( diag(10)+1 )' and 'expm( as(
diag(10)+1,
"dgeMatrix" ))' is a factor of 10 on my box. 

Dunno 'bout the other factor of 100.

Chuck



> The problem arose in an activity to develop guidelines for nonlinear
> modeling in ecology (at NCEAS, Santa Barbara, with worldwide participants),
and we will be
> trying to include suggestions of how to prepare problems like this for
efficient and
> effective solution. The code for nlogL was the "original" from
the worker who supplied the
> problem.
>
> Best,
>
> John Nash
>
>
--------------------------------------------------------------------------------------
>
> cat("mineral-timing.R == benchmark MIN functions in R\n")
> #  J C Nash July 31, 2011
>
> require("microbenchmark")
> require("numDeriv")
> library(Matrix)
> library(optimx)
> # dat<-read.table('min.dat', skip=3, header=FALSE)
> # t<-dat[,1]
> t <- c(0.77,  1.69,  2.69,  3.67,  4.69,  5.71,  7.94,  9.67, 11.77,
17.77,
>  23.77, 32.77, 40.73, 47.75, 54.90, 62.81, 72.88, 98.77, 125.92, 160.19,
>  191.15, 223.78, 287.70, 340.01, 340.95, 342.01)
>
> # y<-dat[,2] # ?? tidy up
>  y<- c(1.396, 3.784, 5.948, 7.717, 9.077, 10.100, 11.263, 11.856,
12.251, 12.699,
> 12.869, 13.048, 13.222, 13.347, 13.507, 13.628, 13.804, 14.087, 14.185,
14.351,
> 14.458, 14.756, 15.262, 15.703, 15.703, 15.703)
>
>
> ones<-rep(1,length(t))
> theta<-c(-2,-2,-2,-2)
>
>
> nlogL<-function(theta){
>   k<-exp(theta[1:3])
>   sigma<-exp(theta[4])
>   A<-rbind(
>   c(-k[1], k[2]),
>   c( k[1], -(k[2]+k[3]))
>   )
>   x0<-c(0,100)
>   sol<-function(t)100-sum(expm(A*t)%*%x0)
>   pred<-sapply(dat[,1],sol)
>   -sum(dnorm(dat[,2],mean=pred,sd=sigma, log=TRUE))
> }
>
> getpred<-function(theta, t){
>   k<-exp(theta[1:3])
>   sigma<-exp(theta[4])
>   A<-rbind(
>   c(-k[1], k[2]),
>   c( k[1], -(k[2]+k[3]))
>   )
>   x0<-c(0,100)
>   sol<-function(tt)100-sum(expm(A*tt)%*%x0)
>   pred<-sapply(t,sol)
> }
>
> Mpred <- function(theta) {
> # WARNING: assumes t global
> kvec<-exp(theta[1:3])
> k1<-kvec[1]
> k2<-kvec[2]
> k3<-kvec[3]
> #   MIN problem terbuthylazene disappearance
>     z<-k1+k2+k3
>     y<-z*z-4*k1*k3
>     l1<-0.5*(-z+sqrt(y))
>     l2<-0.5*(-z-sqrt(y))
>     val<-100*(1-((k1+k2+l2)*exp(l2*t)-(k1+k2+l1)*exp(l1*t))/(l2-l1))
> } # val should be a vector if t is a vector
>
> negll <- function(theta){
> # non expm version JN 110731
>   pred<-Mpred(theta)
>   sigma<-exp(theta[4])
>   -sum(dnorm(dat[,2],mean=pred,sd=sigma, log=TRUE))
> }
>
> theta<-rep(-2,4)
> fand<-nlogL(theta)
> fsim<-negll(theta)
> cat("Check fn vals: expm =",fand,"  
simple=",fsim,"  diff=",fand-fsim,"\n")
>
> cat("time the function in expm form\n")
> tnlogL<-microbenchmark(nlogL(theta), times=100L)
> tnlogL
>
> cat("time the function in simpler form\n")
> tnegll<-microbenchmark(negll(theta), times=100L)
> tnegll
>
> ftimes<-data.frame(texpm=tnlogL$time, tsimp=tnegll$time)
> # ftimes
>
>
> boxplot(log(ftimes))
> title("Log times in nanoseconds for matrix exponential and simple MIN
fn")
>
-- 
Charles C. Berry                         cberry at tajo.ucsd.edu

Reasonably Related Threads

Search for more possibly parallel threads

R devel - Aug 2011 - An example of very slow computation

[Rd] An example of very slow computation

[Rd] An example of very slow computation

[Rd] An example of very slow computation

Reasonably Related Threads