The source code listed below (just before the system information) makes "optim" to ignore the bounds on one of the parameters; this is detected by the function to be optimized (sn.dev) and the program stops. On my computer the numeric optput which I obtain is: sn.dev: (cp,dev)=[1] -1.3222 0.3738 0.3263 42.1824 sn.dev.gh: gradient=[1] 2.092 4.059 -4.039 sn.dev: (cp,dev)=[1] -2.2611 0.2060 0.6266 686.3262 sn.dev.gh: gradient=[1] -1208.1 -5719.7 -287.1 sn.dev: (cp,dev)=[1] -1.3632 0.3665 0.3394 42.6260 sn.dev.gh: gradient=[1] -27.758 1.067 -4.431 sn.dev: (cp,dev)=[1] -1.3279 0.3728 0.3281 42.1705 sn.dev.gh: gradient=[1] -1.976 3.989 -4.090 sn.dev: (cp,dev)=[1] -1.3279 0.3667 0.3348 42.1461 sn.dev.gh: gradient=[1] -0.4494 -4.9931 -4.0685 sn.dev: (cp,dev)=[1] -1.3280 0.3671 0.3403 42.1219 sn.dev.gh: gradient=[1] -0.5089 -4.4250 -4.0727 sn.dev: (cp,dev)=[1] -1.3282 0.3686 0.3623 42.0268 sn.dev.gh: gradient=[1] -0.7684 -2.2082 -4.0932 sn.dev: (cp,dev)=[1] -1.3290 0.3748 0.4506 41.6734 sn.dev.gh: gradient=[1] -2.128 5.823 -4.226 sn.dev: (cp,dev)=[1] -1.3341 0.3927 0.9953 25934.9488 sn.dev.gh: gradient=[1] 219013 -312643 441647332 [1] -1.3546 0.4645 3.1741 Error in fn(par, ...) : cp outside bounds The code looks somewhat involved.. however, what it does is to set up ingredients to run the final statement: opt<- optim(cp, fn=sn.dev, gr=sn.dev.gh, method="L-BFGS-B", lower=c(-rep(Inf,m),1e-10, -0.99527), upper=c(rep(Inf,m), Inf, 0.99527), control=list(iter.max=100, abs.tol=1e-5), X=X, y=y, trace=TRUE, hessian=FALSE) The above sequence stoped because the last component of vector cp is 3.1741, which exceeds the bound 0.99527. Notice that the problem occurs "with 0 probability", in the sense that 1. if you change some of the data below (in vector monica2), then it is most likely to run fine; 2. if you change the starting point, then again it works; specifically I changed cp <- c(qr.coef(qrX,y), s, gamma1) to cp <- c(-1.322168500, 0.373810085, 0.326283975) which differ by [1] 2.666e-10 -3.154e-10 -3.033e-10 and it worked from the second starting vector; sn.dev: (cp,dev)=[1] -1.3222 0.3738 0.3263 42.1824 sn.dev.gh: gradient=[1] 2.092 4.059 -4.039 sn.dev: (cp,dev)=[1] -2.2611 0.2060 0.6266 686.3263 sn.dev.gh: gradient=[1] -1208.1 -5719.7 -287.1 sn.dev: (cp,dev)=[1] -1.3632 0.3665 0.3394 42.6260 sn.dev.gh: gradient=[1] -27.758 1.067 -4.431 sn.dev: (cp,dev)=[1] -1.3279 0.3728 0.3281 42.1705 sn.dev.gh: gradient=[1] -1.976 3.989 -4.090 sn.dev: (cp,dev)=[1] -1.3279 0.3667 0.3348 42.1461 sn.dev.gh: gradient=[1] -0.4494 -4.9931 -4.0685 sn.dev: (cp,dev)=[1] -1.3280 0.3671 0.3403 42.1219 sn.dev.gh: gradient=[1] -0.5089 -4.4250 -4.0727 sn.dev: (cp,dev)=[1] -1.3282 0.3686 0.3623 42.0268 sn.dev.gh: gradient=[1] -0.7684 -2.2082 -4.0932 sn.dev: (cp,dev)=[1] -1.3290 0.3748 0.4506 41.6734 sn.dev.gh: gradient=[1] -2.128 5.823 -4.226 sn.dev: (cp,dev)=[1] -1.3341 0.3927 0.9953 25934.8747 sn.dev.gh: gradient=[1] 219013 -312642 441646067 sn.dev: (cp,dev)=[1] -1.3307 0.3808 0.6321 40.9462 sn.dev.gh: gradient=[1] -2.806 8.579 -4.256 sn.dev: (cp,dev)=[1] -1.3324 0.3867 0.8137 40.3288 <..snip..> sn.dev: (cp,dev)=[1] -1.3505 0.4184 0.9953 35.7271 sn.dev.gh: gradient=[1] 12.18 -13.92 -13349.90> opt$par [1] -1.3505 0.4184 0.9953 $value [1] 35.73 $counts function gradient 44 44 $convergence [1] 0 $message [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH" 3. when I run the same code on my MS-windows 2000 laptop (R 1.7.0), again it runs fine. Hope to have provided a complete description. With best regards, Adelchi Azzalini <azzalini@stat.unipd.it> Dipart.Scienze Statistiche, Università di Padova, Italia http://azzalini.stat.unipd.it/ #-------------------------------------------- # code which produced the above output, you can just source() it rm(list=ls()) sn.dev <- function(cp, X, y, trace=FALSE) { # -2*logL for centred parameters m <- ncol(X) if(abs(cp[length(cp)])> 0.99527) {print(cp); stop("cp outside bounds")} dp <- as.vector(cp.to.dp(cp)) location <- X %*% as.matrix(dp[1:m]) scale <- dp[m+1] # AVOID: logL <- sum(log(dsn(y,location,dp[m+1],dp[m+2]))) z <- (y-location)/scale nlogL <- (length(y)*log(2.506628274631*scale) + 0.5*sum(z^2) - sum(zeta(0,dp[m+2]*z))) if(trace) {cat("sn.dev: (cp,dev)="); print(c(cp,2*nlogL))} return(2*nlogL) } sn.dev.gh <- function(cp, X, y, trace=FALSE, hessian=FALSE) { # computes gradient and hessian of dev=-2*logL for centred parameters # (and observed information matrix); m <- ncol(X) n <- nrow(X) np <- m+2 score <- rep(NA,np) info <- matrix(NA,np,np) beta <- cp[1:m] sigma <- cp[m+1] gamma1 <- cp[m+2] lambda <- gamma1.to.lambda(gamma1) # dp<-cp.to.dp(c(beta,sigma,gamma1)) # info.dp <- sn.info(dp,y)$info.dp mu <- as.vector(X %*% as.matrix(beta)) d <- y-mu r <- d/sigma E.Z<- lambda*sqrt(2/(pi*(1+lambda^2))) s.Z<- sqrt(1-E.Z^2) z <- E.Z+s.Z*r p1 <- as.vector(zeta(1,lambda*z)) p2 <- as.vector(zeta(2,lambda*z)) omega<- sigma/s.Z w <- lambda*p1-E.Z DE.Z <- sqrt(2/pi)/(1+lambda^2)^1.5 Ds.Z <- (-E.Z/s.Z)*DE.Z Dz <- DE.Z + r*Ds.Z DDE.Z<- (-3)*E.Z/(1+lambda^2)^2 DDs.Z<- -((DE.Z*s.Z-E.Z*Ds.Z)*DE.Z/s.Z^2+E.Z*DDE.Z/s.Z) DDz <- DDE.Z + r*DDs.Z score[1:m] <- omega^(-2)*t(X) %*% as.matrix(y-mu-omega*w) score[m+1] <- (-n)/sigma+s.Z*sum(d*(z-p1*lambda))/sigma^2 score[m+2] <- score.l <- n*Ds.Z/s.Z-sum(z*Dz)+sum(p1*(z+lambda*Dz)) Dg.Dl <-1.5*(4-pi)*E.Z^2*(DE.Z*s.Z-E.Z*Ds.Z)/s.Z^4 R <- E.Z/s.Z T <- sqrt(2/pi-(1-2/pi)*R^2) Dl.Dg <- 2*(T/(T*R)^2+(1-2/pi)/T^3)/(3*(4-pi)) R. <- 2/(3*R^2 * (4-pi)) T. <- (-R)*R.*(1-2/pi)/T DDl.Dg <- (-2/(3*(4-pi))) * (T./(R*T)^2+2*R./(T*R^3)+3*(1-2/pi)*T./T^4) score[m+2] <- score[m+2]/Dg.Dl # convert deriv wrt lamda to gamma1 gradient <- (-2)*score if(hessian){ info[1:m,1:m] <- omega^(-2) * t(X) %*% ((1-lambda^2*p2)*X) info[1:m,m+1] <- info[m+1,1:m] <- s.Z* t(X) %*% as.matrix((z-lambda*p1)+d*(1-lambda^2*p2)* s.Z/sigma)/sigma^2 info[m+1,m+1] <- (-n)/sigma^2+2*s.Z*sum(d*(z-lambda*p1))/sigma^3 + s.Z^2*sum(d*(1-lambda^2*p2)*d)/sigma^4 info[1:m,m+2] <- info[m+2,1:m] <- t(X)%*%(-2*Ds.Z*d/omega+Ds.Z*w+s.Z*(p1+lambda*p2*(z+lambda*Dz) -DE.Z))/sigma info[m+1,m+2] <- info[m+2,m+1] <- -sum(d*(Ds.Z*(z-lambda*p1)+s.Z*(Dz-p1-p2*lambda*(z+lambda*Dz)) ))/sigma^2 info[m+2,m+2] <- (n*(-DDs.Z*s.Z+Ds.Z^2)/s.Z^2+sum(Dz^2+z*DDz)- sum(p2*(z+lambda*Dz)^2)- sum(p1*(2*Dz+lambda*DDz))) info[np,] <- info[np,]/Dg.Dl # convert info wrt lamda to gamma1 info[,np] <- info[,np]*Dl.Dg # an equivalent form of the above info[np,np] <- info[np,np]-score.l*DDl.Dg } attr(gradient,"hessian") <- 2*info if(trace) {cat("sn.dev.gh: gradient="); print(-2*score)} return(gradient) } cp.to.dp <- function(param){ # converts centred parameters cp=(mu,sigma,gamma1) # to direct parameters dp=(xi,omega,lambda) # Note: mu can be m-dimensional, the other must be scalars b <- sqrt(2/pi) m <- length(param)-2 gamma1 <- param[m+2] if(abs(gamma1)>0.9952719) stop("abs(gamma1)>0.9952719 ") A <- sign(gamma1)*(abs(2*gamma1/(4-pi)))^(1/3) delta <- A/(b*sqrt(1+A^2)) lambda <- delta/sqrt(1-delta^2) E.Z <- b*delta sd.Z <- sqrt(1-E.Z^2) location <- param[1:m] location[1] <- param[1]-param[m+1]*E.Z/sd.Z scale <- param[m+1]/sd.Z dp <- c(location,scale,lambda) names(dp)[(m+1):(m+2)] <- c("scale","shape") if(m==1) names(dp)[1] <- "location" dp } zeta <- function(k,x){# k integer \in (0,4) k <- as.integer(k) na <- is.na(x) x <- replace(x,na,0) if(any(abs(x)==Inf)) stop("Inf not allowed") # funzionerebbe per k=0 e 1, ma non per k>1 ok <- (-35<x) if(k>2 & sum(!ok)>0) na<- (na | x < (-35)) if(k==0) {ax <- (-x[!ok]) ay <- (-0.918938533204673)-0.5*ax^2-log(ax) y <- rep(NA,length(x)) y[ok] <- log(2*pnorm(x[ok])) y[!ok]<- ay } else {if(k==1) {y <- (-x)*(1+1/x^2) y[ok]<-dnorm(x[ok])/pnorm(x[ok]) } else { if(k==2) y<-(-zeta(1,x)*(x+zeta(1,x))) else{ if(k==3) y<-(-zeta(2,x)*(x+zeta(1,x))-zeta(1,x)*(1+zeta(2,x))) else{ if(k==4) y<-(-zeta(3,x)*(x+2*zeta(1,x))-2*zeta(2,x)*(1+zeta(2,x))) else stop("k must be integer in (0,4)") }}}} replace(y,na,NA) } gamma1.to.lambda<- function(gamma1){ max.gamma1 <- 0.5*(4-pi)*(2/(pi-2))^1.5 na <- (abs(gamma1)>max.gamma1) if(any(na)) warning("NAs generated") gamma1<-replace(gamma1,na,NA) a <- sign(gamma1)*(2*abs(gamma1)/(4-pi))^0.33333 delta<- sqrt(pi/2)*a/sqrt(1+a^2) lambda<-delta/sqrt(1-delta^2) as.vector(lambda) } monica2 <- structure(c(-1.13886379255452, -1.49413719201167, -1.87841064955904, -0.513738594928947, -1.00896950224601, -1.6532307897784, -1.30938967543405, -1.06041025293133, -1.81471688461199, -1.33766295196981, -1.64330728995432, -1.06469899752166, -1.00001063084463, -1.39630202884595, -0.69585045438959, -1.69203796703315, -1.52865988854611, -1.62709073708234, -0.934436685805195, -1.11123590657362, -1.67332502980791, -1.16134216754217, -1.27483304830845, -1.83766242751329, -1.56429386551254, -0.70663591112117, -0.806879736670856, -1.39706539761330, -1.48444503984765, -1.48192041745309, -0.763350185813698, -1.42340943250413, -1.5034850496403, -0.916057843936642, -0.797650717731944, -1.90218776624194, -1.89573584905280, -1.55031254908193, -1.55488970245028, -1.17137569123985, -1.81490209565657, -1.82974300222698, -0.797407302025425, -1.16514651844490, -1.31007112485982, -1.71325589588157, -1.54253707834837, -1.28190616471927, -1.28990272862791, -0.593534374170964), parameters = c(-1.71631422551004, 0.5, 2)) #---- # cp <- c(-1.322168500, 0.373810085, 0.326283975) y <- monica2 X <- matrix(1, nrow=length(monica2)) n <- length(y) m <- ncol(X) qrX <- qr(X) s <- sqrt(sum(qr.resid(qrX, y)^2)/n) gamma1 <- sum(qr.resid(qrX, y)^3)/(n*s^3) if(abs(gamma1)>0.99527) gamma1<- sign(gamma1)*0.95 cp <- c(qr.coef(qrX,y), s, gamma1) opt<- optim(cp, fn=sn.dev, gr=sn.dev.gh, method="L-BFGS-B", lower=c(-rep(Inf,m),1e-10, -0.99527), upper=c(rep(Inf,m), Inf, 0.99527), control=list(iter.max=100, abs.tol=1e-5), X=X, y=y, trace=TRUE, hessian=FALSE) --please do not edit the information below-- Version: platform = i686-pc-linux-gnu arch = i686 os = linux-gnu system = i686, linux-gnu status = major = 1 minor = 7.0 year = 2003 month = 04 day = 16 language = R Search Path: .GlobalEnv, package:methods, package:ctest, package:mva, package:modreg, package:nls, package:ts, file:/home/aa/SW/R-misc/local-def.save, Autoloads, package:base