Andreas Klein
2013-Mar-10 20:45 UTC
[R] Function ar() in package stats: AIC procedure and time series length
Dear R users, I took a closer look to the AIC-procedure for determining the order m of an VAR(m) process of the function ar() in package stats. From the original code (source: http://cran.r-project.org/src/base/R-2/R-2.15.3.tar.gz -> R-2.15.3\src\library\stats\R\ar.R -> lines 95 to 98) [...] cal.aic <- function() { # omits mean params, that is constant adj ??????????? det <- abs(prod(diag(qr(EA)$qr))) ??????????? return(n.used * log(det) + 2 * m * nser * nser) } [...] With: nser: number of time series n.used: number of observations in one of nser time series m: number of parameters in one single equation out of nser equations of the VAR(m) model This function computes "n.used * log(det)..." but in my oppinion it has to be "nser * n.used * log(det)...". Do I understand here something completely wrong? #Example to show that it makes a difference (I use the original code but shorten it a bit to the multivariate case): #Read in functions var.yw.aic.original() and var.yw.aic.modified() var.yw.aic.original <- function (x) { ??? x <- as.matrix(x) ??? xm <- colMeans(x) ??? x <- sweep(x, 2L, xm, check.margin=FALSE) ??? n.used <- nrow(x) ??? nser <- ncol(x) ??? order.max <- floor(10 * log10(n.used)) ??? xacf <- acf(x, type="covariance", lag.max=order.max, plot=FALSE, demean=TRUE)$acf ??? snames <- colnames(x) ??? A <- B <- array(0, dim = c(order.max + 1L, nser, nser)) ??? A[1L, , ] <- B[1L, , ] <- diag(nser) ??? EA <- EB <- xacf[1L, , , drop = TRUE] ??? xaic <- numeric(order.max + 1L) ??? solve.yw <- function(m) { ??? ??? betaA <- betaB <- 0 ??? ??? for (i in 0L:m) { ??? ??? ??? betaA <- betaA + A[i + 1L, , ] %*% xacf[m + 2L - i, , ] ??? ??? ??? betaB <- betaB + B[i + 1L, , ] %*% t(xacf[m + 2L - i, , ]) ??? ??? } ??? ??? KA <- -t(qr.solve(t(EB), t(betaA))) ??? ??? KB <- -t(qr.solve(t(EA), t(betaB))) ??? ??? EB <<- (diag(nser) - KB %*% KA) %*% EB ??? ??? EA <<- (diag(nser) - KA %*% KB) %*% EA ??? ??? Aold <- A ??? ??? Bold <- B ??? ??? for (i in seq_len(m + 1L)) { ??? ??? ??? A[i + 1L, , ] <<- Aold[i + 1L, , ] + KA %*% Bold[m + 2L - i, , ] ??? ??? ??? B[i + 1L, , ] <<- Bold[i + 1L, , ] + KB %*% Aold[m + 2L - i, , ] ??? ??? } ??? } ??? cal.aic <- function() { ??? ??? det <- abs(prod(diag(qr(EA)$qr))) ??? ??? return(n.used * log(det) + 2 * m * nser * nser) ??? } ??? cal.resid <- function() { ??? ??? resid <- array(0, dim = c(n.used - order, nser)) ??? ??? for (i in 0L:order) resid <- resid + x[(order - i + 1L):(n.used - i), , drop = FALSE] %*% t(ar[i + 1L, , ]) ??? ??? return(rbind(matrix(NA, order, nser), resid)) ??? } ??? order <- 0L ??? for (m in 0L:order.max) { ??? ??? xaic[m + 1L] <- cal.aic() ??? ??? if(xaic[m + 1L] == min(xaic[seq_len(m + 1L)])) { ??? ??? ??? ar <- A ??? ??? ??? order <- m ??? ??? ??? var.pred <- EA * n.used/(n.used - nser * (m + 1L)) ??? ??? } ??? ??? if(m < order.max) solve.yw(m) ??? } ??? xaic <- xaic - min(xaic) ??? names(xaic) <- 0L:order.max ??? resid <- cal.resid() ??? if(order) { ??? ??? ar <- -ar[2L:(order + 1L), , , drop = FALSE] ??? ??? dimnames(ar) <- list(seq_len(order), snames, snames) ??? } else ar <- array(0, dim=c(0L, nser, nser), dimnames=list(NULL, snames, snames)) ??? colnames(resid) <- colnames(x) ??? order } var.yw.aic.modified <- function (x) { ??? x <- as.matrix(x) ??? xm <- colMeans(x) ??? x <- sweep(x, 2L, xm, check.margin=FALSE) ??? n.used <- nrow(x) ??? nser <- ncol(x) ??? order.max <- floor(10 * log10(n.used)) ??? xacf <- acf(x, type="covariance", lag.max=order.max, plot=FALSE, demean=TRUE)$acf ??? snames <- colnames(x) ??? A <- B <- array(0, dim = c(order.max + 1L, nser, nser)) ??? A[1L, , ] <- B[1L, , ] <- diag(nser) ??? EA <- EB <- xacf[1L, , , drop = TRUE] ??? xaic <- numeric(order.max + 1L) ??? solve.yw <- function(m) { ??? ??? betaA <- betaB <- 0 ??? ??? for (i in 0L:m) { ??? ??? ??? betaA <- betaA + A[i + 1L, , ] %*% xacf[m + 2L - i, , ] ??? ??? ??? betaB <- betaB + B[i + 1L, , ] %*% t(xacf[m + 2L - i, , ]) ??? ??? } ??? ??? KA <- -t(qr.solve(t(EB), t(betaA))) ??? ??? KB <- -t(qr.solve(t(EA), t(betaB))) ??? ??? EB <<- (diag(nser) - KB %*% KA) %*% EB ??? ??? EA <<- (diag(nser) - KA %*% KB) %*% EA ??? ??? Aold <- A ??? ??? Bold <- B ??? ??? for (i in seq_len(m + 1L)) { ??? ??? ??? A[i + 1L, , ] <<- Aold[i + 1L, , ] + KA %*% Bold[m + 2L - i, , ] ??? ??? ??? B[i + 1L, , ] <<- Bold[i + 1L, , ] + KB %*% Aold[m + 2L - i, , ] ??? ??? } ??? } ??? cal.aic <- function() { ??? ??? det <- abs(prod(diag(qr(EA)$qr))) ??? ??? return(nser * n.used * log(det) + 2 * m * nser * nser) ??? } ??? cal.resid <- function() { ??? ??? resid <- array(0, dim = c(n.used - order, nser)) ??? ??? for (i in 0L:order) resid <- resid + x[(order - i + 1L):(n.used - i), , drop = FALSE] %*% t(ar[i + 1L, , ]) ??? ??? return(rbind(matrix(NA, order, nser), resid)) ??? } ??? order <- 0L ??? for (m in 0L:order.max) { ??? ??? xaic[m + 1L] <- cal.aic() ??? ??? if(xaic[m + 1L] == min(xaic[seq_len(m + 1L)])) { ??? ??? ??? ar <- A ??? ??? ??? order <- m ??? ??? ??? var.pred <- EA * n.used/(n.used - nser * (m + 1L)) ??? ??? } ??? ??? if(m < order.max) solve.yw(m) ??? } ??? xaic <- xaic - min(xaic) ??? names(xaic) <- 0L:order.max ??? resid <- cal.resid() ??? if(order) { ??? ??? ar <- -ar[2L:(order + 1L), , , drop = FALSE] ??? ??? dimnames(ar) <- list(seq_len(order), snames, snames) ??? } else ar <- array(0, dim=c(0L, nser, nser), dimnames=list(NULL, snames, snames)) ??? colnames(resid) <- colnames(x) ??? order } #Data for the example x <- c(-0.00658,? 0.00802,? 0.00642,? 0.00412, -0.00618,? 0.00462, -0.00168, -0.00148, -0.00108, -0.00608, -0.00058, -0.00318, -0.00528, -0.00218, -0.00688,? 0.00022,? 0.00472, -0.00028,? 0.00362,? 0.00122,? 0.00852, -0.00118) y <- c(-0.0046,? 0.0018,? 0.0088,? 0.0051, -0.0020,? 0.0018,? 0.0083, -0.0012,? 0.0096, -0.0014, -0.0035, -0.0107, -0.0052, -0.0027, -0.0095,? 0.0008,? 0.0070, -0.0018,? 0.0031, -0.0019,? 0.0097, -0.0022) #Calculations ar(cbind(x, y))$order #m=0 var.yw.aic.original(cbind(x, y)) #m=0 var.yw.aic.modified(cbind(x, y)) #m=13 Is my understanding of the function calc.aic() inside ar() wrong? I hope someone can help me to understand why ar() uses "n.used" instead of "nser * n.used"? Best Andreas