Hi,
While exploring how summary.lm generated its output I came across a section
that left me puzzled.
at around line 57
R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
se <- sqrt(diag(R) * resvar)
I'm hoping somebody could explain the logic of these to steps or
alternatively point me in the direction of a text that will explain these
steps.
In particular I'm puzzled what is the relationship between QR factorization
and the cholesky factorization such that one can give a (sort of ) R matrix
as a parameter of chol2inv(). I say sort of R matrix as the matrix
generated by Qr$qr[p1, p1, drop = FALSE] has a lower triangle with non zero
entries although the upper triangle corresponds to the values in the R matrix.
Thank you
Dominic
summary.lm
function (object, correlation = FALSE, symbolic.cor = FALSE,
...)
{
z <- object
p <- z$rank
if (p == 0) {
r <- z$residuals
n <- length(r)
w <- z$weights
if (is.null(w)) {
rss <- sum(r^2)
}
else {
rss <- sum(w * r^2)
r <- sqrt(w) * r
}
resvar <- rss/(n - p)
ans <- z[c("call", "terms")]
class(ans) <- "summary.lm"
ans$aliased <- is.na(coef(object))
ans$residuals <- r
ans$df <- c(0, n, length(ans$aliased))
ans$coefficients <- matrix(NA, 0, 4)
dimnames(ans$coefficients) <- list(NULL, c("Estimate",
"Std. Error", "t value",
"Pr(>|t|)"))
ans$sigma <- sqrt(resvar)
ans$r.squared <- ans$adj.r.squared <- 0
return(ans)
}
Qr <- object$qr
if (is.null(z$terms) || is.null(Qr))
stop("invalid 'lm' object: no terms nor qr
component")
n <- NROW(Qr$qr)
rdf <- n - p
if (rdf != z$df.residual)
warning("inconsistent residual degrees of freedom. -- please
report!")
p1 <- 1:p
r <- z$residuals
f <- z$fitted
w <- z$weights
if (is.null(w)) {
mss <- if (attr(z$terms, "intercept"))
sum((f - mean(f))^2)
else sum(f^2)
rss <- sum(r^2)
}
else {
mss <- if (attr(z$terms, "intercept")) {
m <- sum(w * f/sum(w))
sum(w * (f - m)^2)
}
else sum(w * f^2)
rss <- sum(w * r^2)
r <- sqrt(w) * r
}
resvar <- rss/rdf
R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
se <- sqrt(diag(R) * resvar)
est <- z$coefficients[Qr$pivot[p1]]
tval <- est/se
ans <- z[c("call", "terms")]
ans$residuals <- r
ans$coefficients <- cbind(est, se, tval, 2 * pt(abs(tval),
rdf, lower.tail = FALSE))
dimnames(ans$coefficients) <- list(names(z$coefficients)[Qr$pivot[p1]],
c("Estimate", "Std. Error", "t value",
"Pr(>|t|)"))
ans$aliased <- is.na(coef(object))
ans$sigma <- sqrt(resvar)
ans$df <- c(p, rdf, NCOL(Qr$qr))
if (p != attr(z$terms, "intercept")) {
df.int <- if (attr(z$terms, "intercept"))
1
else 0
ans$r.squared <- mss/(mss + rss)
ans$adj.r.squared <- 1 - (1 - ans$r.squared) * ((n -
df.int)/rdf)
ans$fstatistic <- c(value = (mss/(p - df.int))/resvar,
numdf = p - df.int, dendf = rdf)
}
else ans$r.squared <- ans$adj.r.squared <- 0
ans$cov.unscaled <- R
dimnames(ans$cov.unscaled) <- dimnames(ans$coefficients)[c(1,
1)]
if (correlation) {
ans$correlation <- (R * resvar)/outer(se, se)
dimnames(ans$correlation) <- dimnames(ans$cov.unscaled)
ans$symbolic.cor <- symbolic.cor
}
class(ans) <- "summary.lm"
ans
}
<environment: namespace:base>