Hello R Help, I am trying solve an MLE convergence problem: I would like to estimate four parameters, p1, p2, mu1, mu2, which relate to the probabilities, P1, P2, P3, of a multinomial (trinomial) distribution. I am using the mle2() function and feeding it a time series dataset composed of four columns: time point, number of successes in category 1, number of successes in category 2, and number of success in category 3. The column headers are: t, n1, n2, and n3. The mle2() function converges occasionally, and I need to improve the rate of convergence when used in a stochastic simulation, with multiple stochastically generated datasets. When mle2() does not converge, it returns an error: "Error in optim(par = c(2, 2, 0.001, 0.001), fn = function (p) : L-BFGS-B needs finite values of 'fn'." I am using the L-BFGS-B optimization method with a lower box constraint of zero for all four parameters. While I do not know any theoretical upper limit(s) to the parameter values, I have not seen any parameter estimates above 2 when using empirical data. It seems that when I start with certain 'true' parameter values, the rate of convergence is quite high, whereas other "true" parameter values are very difficult to estimate. For example, the true parameter values p1 = 2, p2 = 2, mu1 = 0.001, mu2 = 0.001 causes convergence problems, but the parameter values p1 = 0.3, p2 = 0.3, mu1 = 0.08, mu2 = 0.08 lead to high convergence rate. I've chosen these two sets of values because they represent the upper and lower estimates of parameter values derived from graphical methods. First, do you have any suggestions on how to improve the rate of convergence and avoid the "finite values of 'fn'" error? Perhaps it has to do with the true parameter values being so close to the boundary? If so, any suggestions on how to estimate parameter values that are near zero? Here is reproducible and relevant code from my stochastic simulation: ######################################################################## library(bbmle) library(combinat) # define multinomial distribution dmnom2 <- function(x,prob,log=FALSE) { r <- lgamma(sum(x) + 1) + sum(x * log(prob) - lgamma(x + 1)) if (log) r else exp(r) } # vector of time points tv <- 1:20 # Negative log likelihood function NLL.func <- function(p1, p2, mu1, mu2, y){ t <- y$tv n1 <- y$n1 n2 <- y$n2 n3 <- y$n3 P1 <- (p1*((-1 + exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t))*((-mu2)*(mu2 - p1 + p2) + mu1*(mu2 + 2*p2)) - mu2*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) - exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t)* mu2*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) + 2*exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))))*t)*mu2* sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))))/ exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))))*t)/(2*(mu2*p1 + mu1*(mu2 + p2))* sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))) P2 <- (p2*((-1 + exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t))*(-mu1^2 + 2*mu2*p1 + mu1*(mu2 - p1 + p2)) - mu1*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) - exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t)* mu1*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) + 2*exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))))*t)*mu1* sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))))/ exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))))*t)/(2*(mu2*p1 + mu1*(mu2 + p2))* sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))) P3 <- 1 - P1 - P2 p.all <- c(P1, P2, P3) -sum(dmnom2(c(n1, n2, n3), prob = p.all, log = TRUE)) } ## Generate simulated data # Model equations as expressions, P1 <- expression((p1*((-1 + exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t))*((-mu2)*(mu2 - p1 + p2) + mu1*(mu2 + 2*p2)) - mu2*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) - exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t)* mu2*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) + 2*exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))))*t)*mu2* sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))))/ exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))))*t)/(2*(mu2*p1 + mu1*(mu2 + p2))* sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))))) P2 <- expression((p2*((-1 + exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t))*(-mu1^2 + 2*mu2*p1 + mu1*(mu2 - p1 + p2)) - mu1*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) - exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t)* mu1*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) + 2*exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))))*t)*mu1* sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))))/ exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))))*t)/(2*(mu2*p1 + mu1*(mu2 + p2))* sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))))) # True parameter values p1t = 2; p2t = 2; mu1t = 0.001; mu2t = 0.001 # Function to calculate probabilities from 'true' parameter values psim <- function(x){ params <- list(p1 = p1t, p2 = p2t, mu1 = mu1t, mu2 = mu2t, t = x) eval.P1 <- eval(P1, params) eval.P2 <- eval(P2, params) P3 <- 1 - eval.P1 - eval.P2 c(x, matrix(c(eval.P1, eval.P2, P3), ncol = 3)) } pdat <- sapply(tv, psim, simplify = TRUE) Pdat <- as.data.frame(t(pdat)) names(Pdat) <- c("time", "P1", "P2", "P3") # Generate simulated data set from probabilities n = rep(20, length(tv)) p = as.matrix(Pdat[,2:4]) y <- as.data.frame(rmultinomial(n,p)) yt <- cbind(tv, y) names(yt) <- c("tv", "n1", "n2", "n3") # mle2 call mle.fit <- mle2(NLL.func, data = list(y = yt), start = list(p1 = p1t, p2 = p2t, mu1 = mu1t, mu2 = mu2t), control = list(maxit = 5000, factr = 1e-10, lmm = 17), method = "L-BFGS-B", skip.hessian = TRUE, lower = list(p1 = 0, p2 = 0, mu1 = 0, mu2 = 0)) ########################################################################### I interpret the error as having to do with the finite difference approximation failing. If so, perhaps a gradient function would help? If you agree, I've described my unsuccessful attempt at writing a gradient function below. If a gradient function is unnecessary, ignore the remainder of this message. My gradient function: I derived the gradient function by taking the derivative of my NLL equation with respect to each parameter. My NLL equation is the probability mass function of the trinomial distribution. Thus the gradient equation for, say, parameter p1 would be: gr.p1 <- deriv(log(P1^n1), p1) + deriv(log(P2^n2), p1) + deriv(log(P3^n3), p1) This produces a very large equation, which I won't reproduce here. Let's say that the four gradient equations for the four parameters are defined as gr.p1, gr.p2, gr.mu1, gr.mu2, and all are derived as described above for gr.p1. These gradient equations are functions of p1, p2, mu1, mu2, t, n1, n2, and n3. My current gradient function is: grr <- function(p1, p2, mu1, mu2, y){ t <- y[,1] n1 <- y[,2] n2 <- y[,3] n3 <- y[,4] gr.p1 <- ....... gr.p2 <- ....... gr.mu1 <- ....... gr.mu2 <- ....... c(gr.p1, gr.p2, gr.mu1, gr.mu2) } The problem is that I need to supply values for t, n1, n2, and n3 to the gradient function, which are from the dataset yt, above. When I supply the dataset yt, the function produces a vector of length 4*nrow(yt) = 80. When I include it in my mle2() function, I get an error that mle2 (optim) requires a vector of length 4. How do I write my gradient function to work in mle2()? Any help would be much appreciated. Adam Zeilinger -- Adam Zeilinger Post Doctoral Scholar Department of Entomology University of California Riverside www.linkedin.com/in/adamzeilinger
Presumably you checked out the CRAN Optimization task view to see if you could find any joy there, right? (I doubt there is, but ...) -- Bert On Thu, Oct 4, 2012 at 10:12 PM, Adam Zeilinger <zeil0006 at umn.edu> wrote:> Hello R Help, > > I am trying solve an MLE convergence problem: I would like to estimate four > parameters, p1, p2, mu1, mu2, which relate to the probabilities, P1, P2, P3, > of a multinomial (trinomial) distribution. I am using the mle2() function > and feeding it a time series dataset composed of four columns: time point, > number of successes in category 1, number of successes in category 2, and > number of success in category 3. The column headers are: t, n1, n2, and n3. > > The mle2() function converges occasionally, and I need to improve the rate > of convergence when used in a stochastic simulation, with multiple > stochastically generated datasets. When mle2() does not converge, it > returns an error: "Error in optim(par = c(2, 2, 0.001, 0.001), fn = function > (p) : L-BFGS-B needs finite values of 'fn'." I am using the L-BFGS-B > optimization method with a lower box constraint of zero for all four > parameters. While I do not know any theoretical upper limit(s) to the > parameter values, I have not seen any parameter estimates above 2 when using > empirical data. It seems that when I start with certain 'true' parameter > values, the rate of convergence is quite high, whereas other "true" > parameter values are very difficult to estimate. For example, the true > parameter values p1 = 2, p2 = 2, mu1 = 0.001, mu2 = 0.001 causes convergence > problems, but the parameter values p1 = 0.3, p2 = 0.3, mu1 = 0.08, mu2 > 0.08 lead to high convergence rate. I've chosen these two sets of values > because they represent the upper and lower estimates of parameter values > derived from graphical methods. > > First, do you have any suggestions on how to improve the rate of convergence > and avoid the "finite values of 'fn'" error? Perhaps it has to do with the > true parameter values being so close to the boundary? If so, any > suggestions on how to estimate parameter values that are near zero? > > Here is reproducible and relevant code from my stochastic simulation: > > ######################################################################## > library(bbmle) > library(combinat) > > # define multinomial distribution > dmnom2 <- function(x,prob,log=FALSE) { > r <- lgamma(sum(x) + 1) + sum(x * log(prob) - lgamma(x + 1)) > if (log) r else exp(r) > } > > # vector of time points > tv <- 1:20 > > # Negative log likelihood function > NLL.func <- function(p1, p2, mu1, mu2, y){ > t <- y$tv > n1 <- y$n1 > n2 <- y$n2 > n3 <- y$n3 > P1 <- (p1*((-1 + exp(sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2)))*t))*((-mu2)*(mu2 - p1 + p2) + > mu1*(mu2 + 2*p2)) - mu2*sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))) - > exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t)* > mu2*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) + > 2*exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)*mu2* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))))/ > exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)/(2*(mu2*p1 + mu1*(mu2 + p2))* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))) > P2 <- (p2*((-1 + exp(sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2)))*t))*(-mu1^2 + 2*mu2*p1 + > mu1*(mu2 - p1 + p2)) - mu1*sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))) - > exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t)* > mu1*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) + > 2*exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)*mu1* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))))/ > exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)/(2*(mu2*p1 + mu1*(mu2 + p2))* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))) > P3 <- 1 - P1 - P2 > p.all <- c(P1, P2, P3) > -sum(dmnom2(c(n1, n2, n3), prob = p.all, log = TRUE)) > } > > ## Generate simulated data > # Model equations as expressions, > P1 <- expression((p1*((-1 + exp(sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2)))*t))*((-mu2)*(mu2 - p1 + p2) + > mu1*(mu2 + 2*p2)) - mu2*sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))) - > exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t)* > mu2*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) + > 2*exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)*mu2* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))))/ > exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)/(2*(mu2*p1 + mu1*(mu2 + p2))* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))))) > > P2 <- expression((p2*((-1 + exp(sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2)))*t))*(-mu1^2 + 2*mu2*p1 + > mu1*(mu2 - p1 + p2)) - mu1*sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))) - > exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t)* > mu1*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) + > 2*exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)*mu1* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))))/ > exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)/(2*(mu2*p1 + mu1*(mu2 + p2))* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))))) > > # True parameter values > p1t = 2; p2t = 2; mu1t = 0.001; mu2t = 0.001 > > # Function to calculate probabilities from 'true' parameter values > psim <- function(x){ > params <- list(p1 = p1t, p2 = p2t, mu1 = mu1t, mu2 = mu2t, t = x) > eval.P1 <- eval(P1, params) > eval.P2 <- eval(P2, params) > P3 <- 1 - eval.P1 - eval.P2 > c(x, matrix(c(eval.P1, eval.P2, P3), ncol = 3)) > } > pdat <- sapply(tv, psim, simplify = TRUE) > Pdat <- as.data.frame(t(pdat)) > names(Pdat) <- c("time", "P1", "P2", "P3") > > # Generate simulated data set from probabilities > n = rep(20, length(tv)) > p = as.matrix(Pdat[,2:4]) > y <- as.data.frame(rmultinomial(n,p)) > yt <- cbind(tv, y) > names(yt) <- c("tv", "n1", "n2", "n3") > > # mle2 call > mle.fit <- mle2(NLL.func, data = list(y = yt), > start = list(p1 = p1t, p2 = p2t, mu1 = mu1t, mu2 = mu2t), > control = list(maxit = 5000, factr = 1e-10, lmm = 17), > method = "L-BFGS-B", skip.hessian = TRUE, > lower = list(p1 = 0, p2 = 0, mu1 = 0, mu2 = 0)) > > ########################################################################### > > I interpret the error as having to do with the finite difference > approximation failing. If so, perhaps a gradient function would help? If > you agree, I've described my unsuccessful attempt at writing a gradient > function below. If a gradient function is unnecessary, ignore the remainder > of this message. > > My gradient function: I derived the gradient function by taking the > derivative of my NLL equation with respect to each parameter. My NLL > equation is the probability mass function of the trinomial distribution. > Thus the gradient equation for, say, parameter p1 would be: > > gr.p1 <- deriv(log(P1^n1), p1) + deriv(log(P2^n2), p1) + deriv(log(P3^n3), > p1) > > This produces a very large equation, which I won't reproduce here. Let's say > that the four gradient equations for the four parameters are defined as > gr.p1, gr.p2, gr.mu1, gr.mu2, and all are derived as described above for > gr.p1. These gradient equations are functions of p1, p2, mu1, mu2, t, n1, > n2, and n3. My current gradient function is: > > grr <- function(p1, p2, mu1, mu2, y){ > t <- y[,1] > n1 <- y[,2] > n2 <- y[,3] > n3 <- y[,4] > gr.p1 <- ....... > gr.p2 <- ....... > gr.mu1 <- ....... > gr.mu2 <- ....... > c(gr.p1, gr.p2, gr.mu1, gr.mu2) > } > > The problem is that I need to supply values for t, n1, n2, and n3 to the > gradient function, which are from the dataset yt, above. When I supply the > dataset yt, the function produces a vector of length 4*nrow(yt) = 80. When > I include it in my mle2() function, I get an error that mle2 (optim) > requires a vector of length 4. How do I write my gradient function to work > in mle2()? > > Any help would be much appreciated. > > Adam Zeilinger > > -- > Adam Zeilinger > Post Doctoral Scholar > Department of Entomology > University of California Riverside > www.linkedin.com/in/adamzeilinger > > ______________________________________________ > R-help at r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code.-- Bert Gunter Genentech Nonclinical Biostatistics Internal Contact Info: Phone: 467-7374 Website: http://pharmadevelopment.roche.com/index/pdb/pdb-functional-groups/pdb-biostatistics/pdb-ncb-home.htm
Berend Hasselman
2012-Oct-05 10:06 UTC
[R] problem with convergence in mle2/optim function
On 05-10-2012, at 07:12, Adam Zeilinger wrote:> Hello R Help, > > I am trying solve an MLE convergence problem: I would like to estimate four parameters, p1, p2, mu1, mu2, which relate to the probabilities, P1, P2, P3, of a multinomial (trinomial) distribution. I am using the mle2() function and feeding it a time series dataset composed of four columns: time point, number of successes in category 1, number of successes in category 2, and number of success in category 3. The column headers are: t, n1, n2, and n3. > > The mle2() function converges occasionally, and I need to improve the rate of convergence when used in a stochastic simulation, with multiple stochastically generated datasets. When mle2() does not converge, it returns an error: "Error in optim(par = c(2, 2, 0.001, 0.001), fn = function (p) : L-BFGS-B needs finite values of 'fn'." I am using the L-BFGS-B optimization method with a lower box constraint of zero for all four parameters. While I do not know any theoretical upper limit(s) to the parameter values, I have not seen any parameter estimates above 2 when using empirical data. It seems that when I start with certain 'true' parameter values, the rate of convergence is quite high, whereas other "true" parameter values are very difficult to estimate. For example, the true parameter values p1 = 2, p2 = 2, mu1 = 0.001, mu2 = 0.001 causes convergence problems, but the parameter values p1 = 0.3, p2 = 0.3, mu1 = 0.08, mu2 = 0.08 lead to high convergence rate. I've chosen these two sets of values because they represent the upper and lower estimates of parameter values derived from graphical methods. > > First, do you have any suggestions on how to improve the rate of convergence and avoid the "finite values of 'fn'" error? Perhaps it has to do with the true parameter values being so close to the boundary? If so, any suggestions on how to estimate parameter values that are near zero? > > Here is reproducible and relevant code from my stochastic simulation: > > ######################################################################## > library(bbmle) > library(combinat) > > # define multinomial distribution > dmnom2 <- function(x,prob,log=FALSE) { > r <- lgamma(sum(x) + 1) + sum(x * log(prob) - lgamma(x + 1)) > if (log) r else exp(r) > } > > # vector of time points > tv <- 1:20 > > # Negative log likelihood function > NLL.func <- function(p1, p2, mu1, mu2, y){ > t <- y$tv > n1 <- y$n1 > n2 <- y$n2 > n3 <- y$n3 > P1 <- (p1*((-1 + exp(sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2)))*t))*((-mu2)*(mu2 - p1 + p2) + > mu1*(mu2 + 2*p2)) - mu2*sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))) - > exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t)* > mu2*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) + > 2*exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)*mu2* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))))/ > exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)/(2*(mu2*p1 + mu1*(mu2 + p2))* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))) > P2 <- (p2*((-1 + exp(sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2)))*t))*(-mu1^2 + 2*mu2*p1 + > mu1*(mu2 - p1 + p2)) - mu1*sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))) - > exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t)* > mu1*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) + > 2*exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)*mu1* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))))/ > exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)/(2*(mu2*p1 + mu1*(mu2 + p2))* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))) > P3 <- 1 - P1 - P2 > p.all <- c(P1, P2, P3) > -sum(dmnom2(c(n1, n2, n3), prob = p.all, log = TRUE)) > } > > ## Generate simulated data > # Model equations as expressions, > P1 <- expression((p1*((-1 + exp(sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2)))*t))*((-mu2)*(mu2 - p1 + p2) + > mu1*(mu2 + 2*p2)) - mu2*sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))) - > exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t)* > mu2*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) + > 2*exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)*mu2* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))))/ > exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)/(2*(mu2*p1 + mu1*(mu2 + p2))* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))))) > > P2 <- expression((p2*((-1 + exp(sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2)))*t))*(-mu1^2 + 2*mu2*p1 + > mu1*(mu2 - p1 + p2)) - mu1*sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))) - > exp(sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))*t)* > mu1*sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))) + > 2*exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)*mu1* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2)))))/ > exp((1/2)*(mu1 + mu2 + p1 + p2 + sqrt((mu1 + mu2 + p1 + p2)^2 - > 4*(mu2*p1 + mu1*(mu2 + p2))))*t)/(2*(mu2*p1 + mu1*(mu2 + p2))* > sqrt((mu1 + mu2 + p1 + p2)^2 - 4*(mu2*p1 + mu1*(mu2 + p2))))) > > # True parameter values > p1t = 2; p2t = 2; mu1t = 0.001; mu2t = 0.001 > > # Function to calculate probabilities from 'true' parameter values > psim <- function(x){ > params <- list(p1 = p1t, p2 = p2t, mu1 = mu1t, mu2 = mu2t, t = x) > eval.P1 <- eval(P1, params) > eval.P2 <- eval(P2, params) > P3 <- 1 - eval.P1 - eval.P2 > c(x, matrix(c(eval.P1, eval.P2, P3), ncol = 3)) > } > pdat <- sapply(tv, psim, simplify = TRUE) > Pdat <- as.data.frame(t(pdat)) > names(Pdat) <- c("time", "P1", "P2", "P3") > > # Generate simulated data set from probabilities > n = rep(20, length(tv)) > p = as.matrix(Pdat[,2:4]) > y <- as.data.frame(rmultinomial(n,p)) > yt <- cbind(tv, y) > names(yt) <- c("tv", "n1", "n2", "n3") > > # mle2 call > mle.fit <- mle2(NLL.func, data = list(y = yt), > start = list(p1 = p1t, p2 = p2t, mu1 = mu1t, mu2 = mu2t), > control = list(maxit = 5000, factr = 1e-10, lmm = 17), > method = "L-BFGS-B", skip.hessian = TRUE, > lower = list(p1 = 0, p2 = 0, mu1 = 0, mu2 = 0)) > > ########################################################################### > > I interpret the error as having to do with the finite difference approximation failing. If so, perhaps a gradient function would help? If you agree, I've described my unsuccessful attempt at writing a gradient function below. If a gradient function is unnecessary, ignore the remainder of this message. >After playing with your function, I can't agree with your interpretation of what could be wrong. During optim iterations your function is dmnom2 is getting negative values for prob and that leads to the error messages. I checked this by inserting the following lines in NLL.func after the assignment to p.all: cat("NLL.func p.all {P1,P2,P3}\n") print(matrix(p.all, ncol=3)) At some stage entries for P1, P2, P3 become negative (which ones and how many depends on the random number generator). Try set.seed(1), set.seed(11) and set.seed(413) to see what happens. The expressions are too complicated for further analysis. Assuming your expressions are correct, you will need to restrict P1,P2,P3 to take on valid values. Berend