Dear list,
I would like to do some calculation using different grouping variables.
My 'df' looks like this:
# Some data
set.seed(345)
id <- seq(200,400, by=10)
ids <- sample(substr(id,1,1))
group1 <- rep(1:3, each=7)
group2 <- rep(1:2, c(10,11))
group3 <- rep(1:4, c(5,5,5,6))
df <- data.frame(id, ids, group1, group2, group3)
df <- rbind(df, df, df)
df$time <- seq(2009, 2011, each=3)
df$x1 <- sample(0:100, 63)
df$x2 <- sample(44:234, 63)
head(df)
## For group1
d1 <- aggregate(cbind(x1, x2) ~
group1 + ids + time, data = df, sum)
d1$l_pct <- with(d1, ave(x1, list(group1, time),
FUN = function(x) round(prop.table(x) * 100, 1) ) )
op1 <- xtabs(l_pct ~ group1 + ids + time, data = d1)
ftable(op1, row.vars=c(1,3))
## For group2
d2 <- aggregate(cbind(x1, x2) ~
group2 + ids + time, data = df, sum)
d2$l_pct <- with(d2, ave(x1, list(group2, time),
FUN = function(x) round(prop.table(x) * 100, 1) ) )
op2 <- xtabs(l_pct ~ group2 + ids + time, data = d2)
ftable(op2, row.vars=c(1,3))
## and for group3...
## To have a more flexible solution I wrote this function:
myfun <- function(xdf, xvar) {
fo1 <- "cbind(x1, x2) ~ "
fo2 <- paste(fo1, xvar, "+ ids + time", sep="")
formular <- as.formula(fo2)
d2 <- do.call(aggregate, list(formular, data = xdf, FUN = sum))
d2$l_pct <- with(d2, ave(x1, list(eval(as.name(xvar)), time),
FUN = function(x) round(prop.table(x) * 100, 1) ) )
op2 <- xtabs(l_pct ~ eval(as.name(xvar)) + ids + time, data = d2)
fop2 <- ftable(op2, row.vars=c(1,3))
out <- list(d2, fop2)
return(out)
}
( out_gr1 <- myfun(df, "group1") )
( out_gr2 <- myfun(df, "group2") )
( out_gr3 <- myfun(df, "group3") )
This seems to work ok, but I am not really familiar with 'as.formula',
'eval' and 'as.name'. So I would like to know, if my solution is
ok or
if there are maybe better ways to solve this task.
Thanks for any help!!
Patrick
On Apr 30, 2011, at 7:06 AM, Patrick Hausmann wrote:> Dear list, > > I would like to do some calculation using different grouping > variables. My 'df' looks like this: > > # Some data > set.seed(345) > id <- seq(200,400, by=10) > ids <- sample(substr(id,1,1)) > group1 <- rep(1:3, each=7) > group2 <- rep(1:2, c(10,11)) > group3 <- rep(1:4, c(5,5,5,6)) > df <- data.frame(id, ids, group1, group2, group3) > df <- rbind(df, df, df) > df$time <- seq(2009, 2011, each=3) > df$x1 <- sample(0:100, 63) > df$x2 <- sample(44:234, 63) > > head(df) > > ## For group1 > d1 <- aggregate(cbind(x1, x2) ~ > group1 + ids + time, data = df, sum) > > d1$l_pct <- with(d1, ave(x1, list(group1, time), > FUN = function(x) round(prop.table(x) * 100, 1) ) ) > > op1 <- xtabs(l_pct ~ group1 + ids + time, data = d1) > ftable(op1, row.vars=c(1,3)) > > ## For group2 > d2 <- aggregate(cbind(x1, x2) ~ > group2 + ids + time, data = df, sum) > > d2$l_pct <- with(d2, ave(x1, list(group2, time), > FUN = function(x) round(prop.table(x) * 100, 1) ) ) > > op2 <- xtabs(l_pct ~ group2 + ids + time, data = d2) > ftable(op2, row.vars=c(1,3)) > > ## and for group3... > ## To have a more flexible solution I wrote this function: > > myfun <- function(xdf, xvar) { > > fo1 <- "cbind(x1, x2) ~ " > fo2 <- paste(fo1, xvar, "+ ids + time", sep="") > formular <- as.formula(fo2) > > d2 <- do.call(aggregate, list(formular, data = xdf, FUN = sum)) > > d2$l_pct <- with(d2, ave(x1, list(eval(as.name(xvar)), time), > FUN = function(x) round(prop.table(x) * 100, 1) ) ) > op2 <- xtabs(l_pct ~ eval(as.name(xvar)) + ids + time, data = d2) > fop2 <- ftable(op2, row.vars=c(1,3)) > out <- list(d2, fop2) > return(out) > > } > > ( out_gr1 <- myfun(df, "group1") ) > ( out_gr2 <- myfun(df, "group2") ) > ( out_gr3 <- myfun(df, "group3") ) > > This seems to work ok, but I am not really familiar with > 'as.formula', 'eval' and 'as.name'. So I would like to know, if my > solution is ok or if there are maybe better ways to solve this task.The do.call to aggregate looks unnecessarily complex and could be changed to: d2 <- aggregate(formular, data = xdf, FUN = sum) -- David Winsemius, MD West Hartford, CT
Reasonably Related Threads
- BUG: Standard Time v. DST calculated wrong by Samba Server
- How to use lm() output for systemfit() 'Seemingly unrelated regression'
- Samba bug: 1 out of 387 files off by 1 hour (DST/ST bug)
- Extreme AIC in glm(), perfect separation, svm() tuning
- Send QueueMemberAdded Event via AMI