arun
2013-Jun-21 23:23 UTC
[R] extracting data coincident with the beginning and end of multiple streaks (rle)
Hi, May be this helps: ###Added more lines of fake data fn_hp<- read.table(text=" id?????? date????????????? wl_m??? wet??? cuml_day 585 fn 2012-03-03?? 0.1527048?? 1??????? 1????????????? 586 fn 2012-03-04?? 0.2121408?? 1??????? 2????????????? 587 fn 2012-03-05?? 0.1877568?? 1??????? 3????????????? 588 fn 2012-03-06?? 0.1709928?? 1??????? 4????????????? 589 fn 2012-03-07?? 0.1642872?? 1??????? 5????????????? 598 fn 2012-03-16?? 0.0182880?? 0??????? 1????????????? 599 fn 2012-03-17? -0.0076200?? 0??????? 2????????????? 600 fn 2012-03-18? -0.0067056?? 0??????? 3????????????? 601 fn 2012-03-19? -0.0097536?? 0??????? 4????????????? 602 fn 2012-03-20?? 0.0015240?? 0??????? 5????????????? 603 fn 2012-03-21? -0.0067056?? 0??????? 6????????????? 604 fn 2012-03-22?? 0.0003048?? 0??????? 7????????????? 605 fn 2012-03-23?? 0.0024384?? 0??????? 8????????????? 606 fn 2012-03-24? -0.0054864?? 0??????? 9 607 fn 2012-03-25? -0.0067056?? 1??????? 1????????????? 608 fn 2012-03-26?? 0.0003048?? 1??????? 2????????????? 609 fn 2012-03-27?? 0.0024384?? 1??????? 3????????????? 610 fn 2012-03-28? -0.0054864?? 1??????? 4 ",sep="",header=TRUE,stringsAsFactors=FALSE) fn_hp1<- fn_hp fn_hp$DESIRED.col<-NA fn_hp$IDNew<- cumsum(c(1,abs(diff(fn_hp$cuml_day)))>1)+1 res1<-? unsplit(lapply(split(fn_hp,fn_hp$IDNew),function(x){ x$DESIRED.col[1]<-tail(x$cuml_day,1);x$DESIRED.col[nrow(x)]<- x$DESIRED.col[1];x}),fn_hp$IDNew)[,-7] res1[!is.na(res1$DESIRED.col),] #??? id?????? date?????? wl_m wet cuml_day DESIRED.col #585 fn 2012-03-03? 0.1527048?? 1??????? 1?????????? 5 #589 fn 2012-03-07? 0.1642872?? 1??????? 5?????????? 5 #598 fn 2012-03-16? 0.0182880?? 0??????? 1?????????? 9 #606 fn 2012-03-24 -0.0054864?? 0??????? 9?????????? 9 #607 fn 2012-03-25 -0.0067056?? 1??????? 1?????????? 4 #610 fn 2012-03-28 -0.0054864?? 1??????? 4?????????? 4 #or fn_hp1$IDNew<-cumsum(c(1,abs(diff(fn_hp1$cuml_day)))>1)+1 library(plyr) res2<-ddply(fn_hp1,.(IDNew),mutate,DESIRED.col=c(tail(cuml_day,1),rep(NA,length(cuml_day)-2),tail(cuml_day,1)))[,-6] row.names(res2)<- row.names(fn_hp1) ?res2[!is.na(res2$DESIRED.col),-6] #??? id?????? date?????? wl_m wet cuml_day #585 fn 2012-03-03? 0.1527048?? 1??????? 1 #589 fn 2012-03-07? 0.1642872?? 1??????? 5 #598 fn 2012-03-16? 0.0182880?? 0??????? 1 #606 fn 2012-03-24 -0.0054864?? 0??????? 9 #607 fn 2012-03-25 -0.0067056?? 1??????? 1 #610 fn 2012-03-28 -0.0054864?? 1??????? 4 #or #if the `DESIRED.col` is not needed res3<- ddply(fn_hp1,.(IDNew),function(x) x[c(1,nrow(x)),])[,-6] res3 #? id?????? date?????? wl_m wet cuml_day #1 fn 2012-03-03? 0.1527048?? 1??????? 1 #2 fn 2012-03-07? 0.1642872?? 1??????? 5 #3 fn 2012-03-16? 0.0182880?? 0??????? 1 #4 fn 2012-03-24 -0.0054864?? 0??????? 9 #5 fn 2012-03-25 -0.0067056?? 1??????? 1 #6 fn 2012-03-28 -0.0054864?? 1??????? 4 A.K. Good day: I used rle to calculate the wet and dry duration (cuml_day) of wetlands using the "wet" variable from the sample data below.>cum_day<- unlist( lapply( rle(fn_hp$wet)$lengths, seq_len)) ### counts consecutive 1 and 0 ### ? >fn_hp<-cbind(fn_hp,cum_day) ### bind cumul. days to org dataframe?I would now like to extract the rows of data that correspond to the beginning and end of each streak so I can look at both the duration of the streak and the date ranges where it occurred (to see if wet periods coincide with amphibian breeding periods). - An alternative solution would be to add the streak length from rle to each row that was included in the particular streak (DESIRED.col) I am a relatively new R user and not sure the best way to approach this. Any insight is appreciated. -Jeff ? ? ? ? ? ? ?id ? ? ? date ? ? ? ? ? ? ?wl_m ? ?wet ? ?cuml_day ? DESIRED.col 585 fn 2012-03-03 ? 0.1527048 ? 1 ? ? ? ?1 ? ? ? ? ? ? ? ? ? ? ? 5 586 fn 2012-03-04 ? 0.2121408 ? 1 ? ? ? ?2 ? ? ? ? ? ? ? ? ? ? ? . 587 fn 2012-03-05 ? 0.1877568 ? 1 ? ? ? ?3 ? ? ? ? ? ? ? ? ? ? ? . 588 fn 2012-03-06 ? 0.1709928 ? 1 ? ? ? ?4 ? ? ? ? ? ? ? ? ? ? ? . 589 fn 2012-03-07 ? 0.1642872 ? 1 ? ? ? ?5 ? ? ? ? ? ? ? ? ? ? ? 5 598 fn 2012-03-16 ? 0.0182880 ? 0 ? ? ? ?1 ? ? ? ? ? ? ? ? ? ? ? 9 599 fn 2012-03-17 ?-0.0076200 ? 0 ? ? ? ?2 ? ? ? ? ? ? ? ? ? ? ? . 600 fn 2012-03-18 ?-0.0067056 ? 0 ? ? ? ?3 ? ? ? ? ? ? ? ? ? ? ? . 601 fn 2012-03-19 ?-0.0097536 ? 0 ? ? ? ?4 ? ? ? ? ? ? ? ? ? ? ? . 602 fn 2012-03-20 ? 0.0015240 ? 0 ? ? ? ?5 ? ? ? ? ? ? ? ? ? ? ? . 603 fn 2012-03-21 ?-0.0067056 ? 0 ? ? ? ?6 ? ? ? ? ? ? ? ? ? ? ? . 604 fn 2012-03-22 ? 0.0003048 ? 0 ? ? ? ?7 ? ? ? ? ? ? ? ? ? ? ? . 605 fn 2012-03-23 ? 0.0024384 ? 0 ? ? ? ?8 ? ? ? ? ? ? ? ? ? ? ? . 606 fn 2012-03-24 ?-0.0054864 ? 0 ? ? ? ?9 ? ? ? ? ? ? ? ? ? ? ?9