Hi Justin:
I'm not dead certain this is what you were after, but try this:
dat<-data.frame(id = rep(1:5, each = 200),
state=sample(1:3, 1000,
replace=TRUE, prob=c(0.7,0.05,0.25)),
V1=runif(1000, 1, 10), V2=rnorm(1000))
## input a data frame, output a data frame...
loopFun <- function(d) {
rle.dat <- rle(d$state)
to <- cumsum(rle.dat$lengths)
from <- c(1, 1 + to[-length(to)])
## data frame of input parameters for mdply() below
subs <- data.frame(from = from, to = to)
## compute mean & sum from data subset
## between from and to
foo <- function(from, to) {
subd <- d[from:to, ]
data.frame(v1mean = mean(subd$V1),
v2sum = sum(subd$V2))
}
u <- mdply(subs, foo)
data.frame(index = seq_along(from),
state = rle.dat$values,
u)
}
w <- ddply(dat, .(id), loopFun)
My result looks like:> head(w)
id index state from to v1mean v2sum
1 1 1 2 1 1 6.077463 -0.9980917
2 1 2 3 2 2 5.013528 -0.4693002
3 1 3 1 3 10 6.066698 3.3607026
4 1 4 3 11 11 3.235843 0.8072452
5 1 5 1 12 13 3.385865 1.5167562
6 1 6 3 14 14 4.236730 -1.4976861> tail(w)
id index state from to v1mean v2sum
430 5 85 3 177 178 7.327400 0.4125103
431 5 86 1 179 185 5.084396 -0.3874377
432 5 87 2 186 187 2.558208 -1.2045609
433 5 88 1 188 192 6.180575 2.2682108
434 5 89 3 193 193 5.606389 1.2107051
435 5 90 1 194 200 5.307754 -0.8947832
HTH,
Dennis
On Fri, Jun 17, 2011 at 3:55 PM, Justin Haynes <jtor14 at gmail.com>
wrote:> I think need to do something like this:
>
> dat<-data.frame(state=sample(id=rep(1:5,each=200),1:3, 1000,
> replace=T,prob=c(0.7,0.05,0.25)),V1=runif(1,10,1000),V2=rnorm(1000))
> rle.dat<-rle(dat$state)
> temp<-1
> out<-data.frame(id=1:length(rle.dat$length))
> for(i in 1:length(rle.dat$length)){
> ? ? ? ?temp2<-temp+rle.dat$length[[i]]
> ? ? ? ?out$V1[i]<-mean(dat$V1[temp:temp2])
> ? ? ? ?out$V2[i]<-sum(dat$V2[temp:temp2])
> ? ? ? ?out$state[i]<-rle.dat$value[[i]]
> ? ? ? ?temp<-temp2
> }
>
> to a very large dataset. ?I want to apply a few summary functions to
> some variables within a data.frame for given states. to complicate
> things, id like to use plyr and split on the id variable before i do
> any of this...
>
> loop.func<-function(dat){
> ?rle.dat<-rle(dat$state)
> ?temp<-1
> ?out<-data.frame(id=1:length(rle.dat$length))
> ?for(i in 1:length(rle.dat$length)){
> ? ? ? ?temp2<-temp+rle.dat$length[[i]]
> ? ? ? ?out$V1[i]<-mean(dat$V1[temp:temp2])
> ? ? ? ?out$V2[i]<-sum(dat$V2[temp:temp2])
> ? ? ? ?out$state[i]<-rle.dat$value[[i]]
> ? ? ? ?temp<-temp2
> ?}
> ?return(out)
> }
> out<-ddply(dat,.(id),loop.func)
>
> mostly, i just don't understand how to use a list (especially in this
> instance) in a plyr/apply statement...
>
>
> Thanks,
>
> Justin
>
> ______________________________________________
> R-help at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide
http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>