A recent stackoverflow post <http://stackoverflow.com/questions/31134985> "How do you make R poly() evaluate (or ?predict?) multivariate new data (orthogonal or raw)?" made me look at poly and polym again. predict.poly doesn't work with multivariate data because for such data poly calls polym which does not: a) return an object inheriting from class poly; b) return the coefficients needed to make predictions; c) accept coefficients as an argument or include code to make predictions. This does lead to some wrong answers without warnings. e.g. ################## vanilla poly and polym ########### library(datasets) alm <- lm(stack.loss ~ poly(Air.Flow, Water.Temp, degree=3), stackloss) # "correct" prediction values [1:10] alm$fitted.values[1:10] # predict(alldata)[1:10] gives correct values predict(alm, stackloss)[1:10] # predict(partdata) gives wrong values predict(alm, stackloss[1:10,]) ######### I guess - but haven't confirmed - that with multivariate newdata predict.lm(alm, newdata) calculates new orthogonal polynomials based on newdata rather than applying the original coefficients. Below I append versions of: a) polym edited to address the three points above; b) poly slightly edited to reflect the changes in polym; c) predict.poly unaltered, just to get it in the same environment as polym and poly for testing. After implementing these the three sets of predictions above all agree. I'm very ready to believe that I've got the wrong end of the stick and/or my suggestions can be improved so I welcome correction. Otherwise, how do I go about getting these changes implemented? I see stats is maintained by R Core Team. Are they likely to pick it up from here, or do I need to take any other action? Best regards Keith Jewell ### polym ############################################## polym <- function (..., degree = 1, coefs = NULL, raw = FALSE) # add coefs argument { if(is.null(coefs)) { dots <- list(...) nd <- length(dots) if (nd == 0) stop("must supply one or more vectors") if (nd == 1) return(poly(dots[[1L]], degree, raw = raw)) n <- sapply(dots, length) if (any(n != n[1L])) stop("arguments must have the same length") z <- do.call("expand.grid", rep.int(list(0:degree), nd)) s <- rowSums(z) ind <- (s > 0) & (s <= degree) z <- z[ind, ] s <- s[ind] aPoly <- poly(dots[[1L]], degree, raw = raw) # avoid 2 calcs res <- cbind(1, aPoly)[, 1 + z[, 1]] # attribute "coefs" = list of coefs from individual variables if (!raw) coefs <- list(attr(aPoly, "coefs")) for (i in 2:nd) { aPoly <- poly(dots[[i]], degree, raw = raw) res <- res * cbind(1, aPoly)[, 1 + z[, i]] if (!raw) coefs <- c(coefs, list(attr(aPoly, "coefs"))) } colnames(res) <- apply(z, 1L, function(x) paste(x, collapse = ".")) attr(res, "degree") <- as.vector(s) if (!raw) attr(res, "coefs") <- coefs class(res) <- c("poly", "matrix") # add poly class res } else { nd <- length(coefs) # number of variables newdata <- as.data.frame(list(...)) # new data if (nd != ncol(newdata)) stop("wrong number of columns in newdata") z <- do.call("expand.grid", rep.int(list(0:degree), nd)) s <- rowSums(z) ind <- (s > 0) & (s <= degree) z <- z[ind, ] res <- cbind(1, poly(newdata[[1]], degree=degree, coefs=coefs[[1]]))[, 1 + z[, 1]] for (i in 2:nd) res <- res*cbind(1, poly(newdata[[i]], degree=degree, coefs=coefs[[i]]))[, 1 + z[, i]] colnames(res) <- apply(z, 1L, function(x) paste(x, collapse = ".")) res } } ###################### #### poly ################## poly <- function (x, ..., degree = 1, coefs = NULL, raw = FALSE) { dots <- list(...) if (nd <- length(dots)) { if (nd == 1 && length(dots[[1L]]) == 1L) degree <- dots[[1L]] # pass coefs argument as well else return(polym(x, ..., degree = degree, coefs=coefs, raw = raw)) } if (is.matrix(x)) { m <- unclass(as.data.frame(cbind(x, ...))) # pass coefs argument as well return(do.call("polym", c(m, degree = degree, raw = raw, list(coefs=coefs)))) } if (degree < 1) stop("'degree' must be at least 1") if (anyNA(x)) stop("missing values are not allowed in 'poly'") n <- degree + 1 if (raw) { Z <- outer(x, 1L:degree, "^") colnames(Z) <- 1L:degree attr(Z, "degree") <- 1L:degree class(Z) <- c("poly", "matrix") return(Z) } if (is.null(coefs)) { if (degree >= length(unique(x))) stop("'degree' must be less than number of unique points") xbar <- mean(x) x <- x - xbar X <- outer(x, seq_len(n) - 1, "^") QR <- qr(X) if (QR$rank < degree) stop("'degree' must be less than number of unique points") z <- QR$qr z <- z * (row(z) == col(z)) raw <- qr.qy(QR, z) norm2 <- colSums(raw^2) alpha <- (colSums(x * raw^2)/norm2 + xbar)[1L:degree] Z <- raw/rep(sqrt(norm2), each = length(x)) colnames(Z) <- 1L:n - 1L Z <- Z[, -1, drop = FALSE] attr(Z, "degree") <- 1L:degree attr(Z, "coefs") <- list(alpha = alpha, norm2 = c(1, norm2)) class(Z) <- c("poly", "matrix") } else { alpha <- coefs$alpha norm2 <- coefs$norm2 Z <- matrix(, length(x), n) Z[, 1] <- 1 Z[, 2] <- x - alpha[1L] if (degree > 1) for (i in 2:degree) Z[, i + 1] <- (x - alpha[i]) * Z[, i] - (norm2[i + 1]/norm2[i]) * Z[, i - 1] Z <- Z/rep(sqrt(norm2[-1L]), each = length(x)) colnames(Z) <- 0:degree Z <- Z[, -1, drop = FALSE] attr(Z, "degree") <- 1L:degree attr(Z, "coefs") <- list(alpha = alpha, norm2 = norm2) class(Z) <- c("poly", "matrix") } Z } ################################################ ### predict.poly #### predict.poly <- function (object, newdata, ...) { if (missing(newdata)) return(object) if (is.null(attr(object, "coefs"))) poly(newdata, degree = max(attr(object, "degree")), raw = TRUE) else poly(newdata, degree = max(attr(object, "degree")), coefs = attr(object, "coefs")) } ######################