You can try this: data <- cbind("a"=sample(1:100000), "b"=sample(1:100000)) fact <- sample(rep(1:10000, each=10)) system.time(std <- by(data, fact, colSums)) by.matrix <- function (data, INDICES, FUN, ...) { if (!is.list(INDICES)) { IND <- vector("list", 1) IND[[1]] <- INDICES names(IND) <- deparse(substitute(INDICES))[1] } else IND <- INDICES FUNx <- function(x) FUN(data[x, , drop = FALSE], ...) nd <- nrow(data) ans <- eval(substitute(tapply(1:nd, IND, FUNx)), as.data.frame (data)) attr(ans, "call") <- match.call() class(ans) <- "by" ans } system.time(mod <- by(data, fact, colSums)) all.equal(std, mod) I get a 30x speed up (I'm not sure why the attributes differ, but I'm sure this can be fixed...)