Sorry, this is *related* to a recent post, but not the same. I'd
appreciate your help in getting the same results with the two methods below
(the first using plyr and the second using dplyr. The former works, but not
the latter.)
### Sample data
set.seed(4)
df <- data.frame(pred = rnorm(100), y = sample(c(0,1), 100, replace = TRUE),
models = gl(2, 50, 100, labels = c("model1",
"model2")))
### using plyr
fooFun <- function(x) {
xcuts <- unique(x$pred)
x$bin <- cut(x$pred, breaks = xcuts, include.lowest = TRUE)
x <- dplyr::summarize(dplyr::group_by(x, bin), sumY = sum(y))
x
}
head(plyr::ddply(df, plyr::.(models), fooFun))
### Using dplyr
fooFun2 <- function(pred, y) {
xcuts <- unique(pred)
bin <- cut(pred, breaks = xcuts, include.lowest = TRUE)
dft <- data.frame(bin, pred, y)
dft <- dplyr::summarize(dplyr::group_by(dft, bin), sumY = sum(y))
dft
}
res_dplyr <- dplyr::mutate(dplyr::group_by(df, models), fooFun2(pred, y))
head(res_dplyr)
Thanks
Axel.
[[alternative HTML version deleted]]