Dear all,
I am using the "rq.fit.hogg" function from the "quantreg"
package. I have
two problems with it.
First (less importantly), it gives an error at its default values with
error message "Error in if (n2 != length(r)) stop("R and r of
incompatible
dimension") : argument is of length zero". I solve this by commenting
four
lines in the code. I.e. I define a new function "rq.fit.hogg2" that is
the
same as "rq.fit.hogg" but with four lines commented. You will see this
in
my code at the end of this post. I understand why this solution works, so
it is not really a problem right now.
Second (importantly), "rq.fit.hogg" crashes frequently. The message I
get
is "R for Windows GUI front-end has stopped working. A problem cause the
program to stop working correctly. Windows will close the program and
notify you if a solution is available."
I don't know how to provide a reproducible example of the crash because the
function seems to crash at random. That is, I generate some data (with a
fixed seed so that I can replicate it exactly), input into the function,
and sometimes it crashes but sometimes not. If I create a loop over
different seeds and run the function once in each iteration, sometimes it
crashes on the first iteration while sometimes it goes fine for some 20
iterations and crashes only then.
I am including the code with the loop that should eventually produce a
crash. If it does not, try running it again; that worked every time for me.
I am also including session info and locale info.
Please help me. Thank you!
Kind regards,
*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Here are my session details etc.:
> Sys.getlocale()
[1] "LC_COLLATE=English_United States.1252;LC_CTYPE=English_United
States.1252;LC_MONETARY=English_United
States.1252;LC_NUMERIC=C;LC_TIME=English_United
States.1252"
> sessionInfo()
R version 3.4.0 (2017-04-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 14393)
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] quantreg_5.33 SparseM_1.77 MASS_7.3-47
loaded via a namespace (and not attached):
[1] compiler_3.4.0 Matrix_1.2-10 MatrixModels_0.4-1 grid_3.4.0
[5] lattice_0.20-35
*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Here is the code that crashes:
#-----Here I redefine the function as described in my first paragraph
rq.fit.hogg2 = function (x, y, taus = c(0.1, 0.3, 0.5), weights = c(0.7,
0.2, 0.1), R = NULL, r = NULL, beta = 0.99995, eps = 1e-06)
{
n <- length(y)
n2 <- nrow(R)
m <- length(taus)
p <- ncol(x) + m
if (n != nrow(x))
stop("x and y don't match n")
if (m != length(weights))
stop("taus and weights differ in length")
if (any(taus < eps) || any(taus > 1 - eps))
stop("taus outside (0,1)")
W <- diag(weights)
if (m == 1)
W <- weights
x <- as.matrix(x)
X <- cbind(kronecker(W, rep(1, n)), kronecker(weights, x))
y <- kronecker(weights, y)
rhs <- c(weights * (1 - taus) * n, sum(weights * (1 - taus)) *
apply(x, 2, sum))
# if (n2 != length(r))
# stop("R and r of incompatible dimension")
# if (ncol(R) != p)
# stop("R and X of incompatible dimension")
d <- rep(1, m * n)
u <- rep(1, m * n)
if (length(r)) {
wn1 <- rep(0, 10 * m * n)
wn1[1:(m * n)] <- 0.5
wn2 <- rep(0, 6 * n2)
wn2[1:n2] <- 1
z <- .Fortran("rqfnc", as.integer(m * n), as.integer(n2),
as.integer(p), a1 = as.double(t(as.matrix(X))), c1 as.double(-y),
a2 = as.double(t(as.matrix(R))), c2 = as.double(-r),
rhs = as.double(rhs), d1 = double(m * n), d2 = double(n2),
as.double(u), beta = as.double(beta), eps = as.double(eps),
wn1 = as.double(wn1), wn2 = as.double(wn2), wp = double((p +
3) * p), it.count = integer(3), info = integer(1))
}
else {
wn <- rep(0, 10 * m * n)
wn[1:(m * n)] <- 0.5
z <- .Fortran("rqfnb", as.integer(m * n), as.integer(p),
a = as.double(t(as.matrix(X))), c = as.double(-y),
rhs = as.double(rhs), d = as.double(d), as.double(u),
beta = as.double(beta), eps = as.double(eps), wn as.double(wn),
wp = double((p + 3) * p), it.count = integer(2),
info = integer(1))
}
if (z$info != 0)
warning(paste("Info = ", z$info, "in stepy: singular
design:
iterations ",
z$it.count[1]))
coefficients <- -z$wp[1:p]
if (any(is.na(coefficients)))
stop("NA coefs: infeasible problem?")
list(coefficients = coefficients, nit = z$it.count, flag = z$info)
}
#----- Here is the main program that crashes
library(MASS)
library(quantreg)
M=1e3; n=1e3; p=15; type=8; K=10 # no. of replications, sample size,
dimension of beta, type of quantile estimator, number of quantiles
distributions=c("norm","t1","t2","t3","t5","logistic","
exponential","Weibull")
method.wqr="fn" # interior point method; if "fn" then
roughly matches with
method.cqr="ip"
# Create the covariance matrix of X
Sigma=matrix(NA,p,p); for(i in 1:p) for(j in 1:p) Sigma[i,j]=0.5^(abs(i-j))
# Generate X (common across all simulations)
set.seed(0); X=mvrnorm(n=n,mu=rep(0,p),Sigma=Sigma)
Binvlist=list()
for(k in 1:K){
tau=cumsum(rep(1/(k+1),k))
Ai=matrix(rep(tau,k),nrow=k,ncol=k,byrow=TRUE)
Aj=matrix(rep(tau,k),nrow=k,ncol=k,byrow=FALSE)
Amin=pmin(Ai,Aj) # Amin=Ai; Amin[Ai>Aj]=Aj[Ai>Aj]
Ax=tau %*% t(tau)
B=Amin-Ax
Binvlist[[k]]=solve(B)
}
for(m in 1:M){
mse_wqr_list=mse_cqr_list=list()
j=0; for(distr in distributions){
j=j+1
#print(distributions[col])
mse_wqr_list[[j]]=mse_cqr_list[[j]]=matrix(NA,M,K)
set.seed(m); beta=rnorm(p)
set.seed(m+1000000000)
if(distr=="norm" ) eps=rnorm(n)
if(substr(x=distr,start=1,stop=1)=="t"){
df=as.numeric(substr(x=distr,start=2,stop=nchar(distr)));
eps=rt(n,df=df) }
if(distr=="logistic" ) eps=rlogis(n)
if(distr=="exponential") eps=rexp(n)
if(distr=="Weibull" ) eps=rweibull(n,shape=1.5)
y=X%*%beta+eps
model=lm(y~X); res=model$res; d=density(res)
mse_wqr=mse_cqr=rep(NA,K)
for(k in 1:K){
#----- Find estimated optimal weights
tau=cumsum(rep(1/(k+1),k))
Binv=Binvlist[[k]]
q = quantile(res,probs=tau,type=type) # Hyndman recommends `type=8` for
the `quantile` function
tmp=as.numeric(c(d$x,q)); ranks=rank(tmp);
below=tail(ranks,length(q))-c(1:length(q));
above=below+1
v = d$y[below] * (q-d$x[below])/(d$x[above]-d$x[below]) + d$y[above] *
(d$x[above]-q)/(d$x[above]-d$x[below])
if(k==1) V = matrix(v,1,1) else V = diag(v)
w_CQR = Binv %*% v; w_CQR = w_CQR / sum(w_CQR)
#----- Use the estimated optimal weights to actually estimate beta (with
EQR and CQR) and evaluate how well it does
print(paste(m, distr, k)); readline()
################ THE NEXT LINE CRASHES AT RANDOM:
fit_cqr=try( rq.fit.hogg2(x=cbind(X),y=y,taus=tau,weights=c(w_CQR)) )
} # for(k in 1:K)
} # for(distr in distributions)
} # for(m in 1:M)
[[alternative HTML version deleted]]