b-h@mevik.net
2004-Jan-30 20:25 UTC
[Rd] Two apparent bugs in aov(y~ *** -1 + Error(***)), with suggested (PR#6510)
I think there are two bugs in aov() that shows up when the right hand side of `formula' contains both `-1' and an Error() term, e.g., aov(y ~ a + b - 1 + Error(c), ...). Without `-1' or `Error()' there is no problem. I've included and example, and the source of aov() with suggested fixes below. The first bug (labeled BUG 1 below) creates an extra, empty stratum inside `Within' in the result list (se example below). The second bug (BUG 2) assigns the fit of each stratum at one index too high in the result list; i.e., the fit of stratum 1 is assigned to `Stratum 2' etc. `Stratum 1' is NULL. (Also the extra stratum added by BUG 1, is NULL.) *** *** VERSION INFORMATION ***> version_ platform i386-pc-linux-gnu arch i386 os linux-gnu system i386, linux-gnu status major 1 minor 8.1 year 2003 month 11 day 21 language R *** *** EXAMPLE ***> test.df <- data.frame (y=rnorm(8), a=gl(2,1,8), b=gl(2,3,8), c=gl(2,4,8)) > ## With intercept; no problem: > aov(y~a+b+Error(c), data=test.df)Call: aov(formula = y ~ a + b + Error(c), data = test.df) Grand Mean: -0.4105379 Stratum 1: c Terms: b Sum of Squares 0.3602348 Deg. of Freedom 1 Estimated effects are balanced Stratum 2: Within Terms: a b Residuals Sum of Squares 0.0142722 2.7349479 1.7883294 Deg. of Freedom 1 1 4 Residual standard error: 0.6686422 Estimated effects may be unbalanced> ## Without intercept; incorrect strata assignments: > aov(y~a+b-1+Error(c), data=test.df)Call: aov(formula = y ~ a + b - 1 + Error(c), data = test.df) Stratum 1: c NULL Stratum 2: Within Terms: a b Sum of Squares 1.3483306 0.3602348 Deg. of Freedom 1 1 1 out of 3 effects not estimable Estimated effects may be unbalanced Stratum 3: NA NULL> ## Fixed version: > fixaov(y~a+b-1+Error(c), data=test.df)Call: fixaov(formula = y ~ a + b - 1 + Error(c), data = test.df) Stratum 1: c Terms: a b Sum of Squares 1.3483306 0.3602348 Deg. of Freedom 1 1 1 out of 3 effects not estimable Estimated effects may be unbalanced Stratum 2: Within Terms: a b Residuals Sum of Squares 0.0142722 2.7349479 1.7883294 Deg. of Freedom 1 1 4 Residual standard error: 0.6686422 1 out of 3 effects not estimable Estimated effects may be unbalanced>*** *** aov() code with suggested fixes: *** fixaov <- function(formula, data = NULL, projections = FALSE, qr = TRUE, contrasts = NULL, ...) { Terms <- if(missing(data)) terms(formula, "Error") else terms(formula, "Error", data = data) indError <- attr(Terms, "specials")$Error if(length(indError) > 1) stop(paste("There are", length(indError), "Error terms: only 1 is allowed")) lmcall <- Call <- match.call() lmcall[[1]] <- as.name("lm") lmcall$singular.ok <- TRUE # not currently used in R if(projections) qr <- lmcall$qr <- TRUE lmcall$projections <- NULL if(is.null(indError)) { ## no Error term fit <- eval(lmcall, parent.frame()) if(projections) fit$projections <- proj(fit) class(fit) <- if(inherits(fit, "mlm")) c("maov", "aov", oldClass(fit)) else c("aov", oldClass(fit)) fit$call <- Call return(fit) } else { ## helmert contrasts can be helpful: do we want to force them? ## this version does for the Error model. opcons <- options("contrasts") options(contrasts=c("contr.helmert", "contr.poly")) on.exit(options(opcons)) allTerms <- Terms errorterm <- attr(Terms, "variables")[[1 + indError]] eTerm <- deparse(errorterm[[2]], width = 500, backtick = TRUE) intercept <- attr(Terms, "intercept") ecall <- lmcall ecall$formula <- as.formula(paste(deparse(formula[[2]], width = 500, backtick = TRUE), "~", eTerm, if(!intercept) "- 1"), env=environment(formula)) ecall$method <- "qr" ecall$qr <- TRUE ecall$contrasts <- NULL er.fit <- eval(ecall, parent.frame()) options(opcons) nmstrata <- attr(terms(er.fit), "term.labels") ## remove backticks from simple labels for strata (only) nmstrata <- sub("^`(.*)`$", "\\1", nmstrata) if(intercept) nmstrata <- c("(Intercept)", nmstrata) qr.e <- er.fit$qr rank.e <- er.fit$rank qty <- er.fit$resid maov <- is.matrix(qty) asgn.e <- er.fit$assign[qr.e$piv[1:rank.e]] ## we want this to label the rows of qtx, not cols of x. nobs <- NROW(qty) if(nobs > rank.e) { ## APPARENT BUG 1: # result <- vector("list", max(asgn.e) + 2) # asgn.e[(rank.e+1):nobs] <- max(asgn.e) + 1 ## SUGGESTED FIX: asgn.e[(rank.e+1):nobs] <- max(asgn.e) + 1 result <- vector("list", length (unique(asgn.e))) ## END FIX nmstrata <- c(nmstrata, "Within") } else result <- vector("list", max(asgn.e) + 1) names(result) <- nmstrata lmcall$formula <- form <- update(formula, paste(". ~ .-", deparse(errorterm, width = 500, backtick = TRUE))) Terms <- terms(form) lmcall$method <- "model.frame" mf <- eval(lmcall, parent.frame()) xvars <- as.character(attr(Terms, "variables"))[-1] if ((yvar <- attr(Terms, "response")) > 0) xvars <- xvars[-yvar] if (length(xvars) > 0) { xlev <- lapply(mf[xvars], levels) xlev <- xlev[!sapply(xlev, is.null)] } else xlev <- NULL resp <- model.response(mf) qtx <- model.matrix(Terms, mf, contrasts) cons <- attr(qtx, "contrasts") dnx <- colnames(qtx) asgn.t <- attr(qtx, "assign") if(length(wts <- model.weights(mf))) { wts <- sqrt(wts) resp <- resp * wts qtx <- qtx * wts } qty <- as.matrix(qr.qty(qr.e, resp)) if((nc <- ncol(qty)) > 1) { dny <- colnames(resp) if(is.null(dny)) dny <- paste("Y", 1:nc, sep="") dimnames(qty) <- list(seq(nrow(qty)), dny) } else dimnames(qty) <- list(seq(nrow(qty)), NULL) qtx <- qr.qty(qr.e, qtx) dimnames(qtx) <- list(seq(nrow(qtx)) , dnx) for(i in seq(along=nmstrata)) { ## APPARENT BUG 2: # select <- asgn.e==(i-1) ## SUGGESTED FIX: select <- asgn.e==unique(asgn.e)[i] ## END FIX ni <- sum(select) if(!ni) next ## helpful to drop constant columns. xi <- qtx[select, , drop = FALSE] cols <- colSums(xi^2) > 1e-5 if(any(cols)) { xi <- xi[, cols, drop = FALSE] attr(xi, "assign") <- asgn.t[cols] fiti <- lm.fit(xi, qty[select,,drop=FALSE]) fiti$terms <- Terms } else { y <- qty[select,,drop=FALSE] fiti <- list(coefficients = numeric(0), residuals = y, fitted.values = 0 * y, weights = wts, rank = 0, df.residual = NROW(y)) } if(projections) fiti$projections <- proj(fiti) class(fiti) <- c(if(maov) "maov", "aov", oldClass(er.fit)) result[[i]] <- fiti } class(result) <- c("aovlist", "listof") if(qr) attr(result, "error.qr") <- qr.e attr(result, "call") <- Call if(length(wts)) attr(result, "weights") <- wts attr(result, "terms") <- allTerms attr(result, "contrasts") <- cons attr(result, "xlevels") <- xlev result } } -- Sincerely, Bj?rn-Helge Mevik, dr.scient.