HI, You can do this: dat1<- read.csv("dat7.csv",header=TRUE,stringsAsFactors=FALSE,sep="\t") dat.bru<- dat1[!is.na(dat1$evnmt_brutal),] fun2<- function(dat){? ????? lst1<- split(dat,dat$patient_id) ??? lst2<- lapply(lst1,function(x) x[cumsum(x$evnmt_brutal==0)>0,]) ??? lst3<- lapply(lst2,function(x) x[!(all(x$evnmt_brutal==1)|all(x$evnmt_brutal==0)),]) ??? lst4<- lst3[lapply(lst3,nrow)!=0] ??? lst5<- lapply(seq_along(lst4),function(i){ ???????????????????? do.call(rbind,lapply(which(lst4[[i]]$evnmt_brutal==1),function(x) { ??????????????????????????????????????? x1<-c(x-2,x-1,x) ??????????????????????????????????????? x2<-x1[!any(x1==0)] ??????????????????????????????????????? x3<-lst4[[i]][x2,] ??????????????????????????????????????? x4<-x3[!is.na(match(paste(x3$evnmt_brutal,collapse=""),"001")),] ??????????????????????????????????????? x4[!any(duplicated(x4$number))] ??????????????????????????????????????? } ??????????????????????????????????????? )) ??????????????????????????????????? }) ?? lst6<-lst5[lapply(lst5,nrow)!=0] ?? names(lst6)<- unlist(lapply(lst6,function(x) unique(x$patient_id))) ?? Mean_01<-do.call(rbind,lapply(lst6,function(x) cbind(Mean_Middle0=mean(x[seq(nrow(x))%%3==2,"basdai_d"]),Mean_1=mean(x[seq(nrow(x))%%3==0,"basdai_d"])))) rownames(Mean_01)<- names(lst6) ? ?? lst7<-list(lst6,Mean_01) ?? lst7 ?? #lapply(lst7,head,2)?? ?? }????????????????? fun2(dat.bru) head(fun2(dat.bru)[[1]],3) #$`2` #??? X patient_id number responsed_at? t basdai_d evnmt_brutal #13 13????????? 2???? 12?? 2011-07-05 12???? -1.0??????????? 0 #14 14????????? 2???? 13?? 2011-08-07 13????? 0.9??????????? 0 #15 15????????? 2???? 14?? 2011-09-11 14???? -0.8??????????? 1 # #$`5` ?# ? X patient_id number responsed_at t basdai_d evnmt_brutal #52 52????????? 5????? 8?? 2011-01-11 7???? -2.8??????????? 0 #53 53????????? 5????? 9?? 2011-02-13 8????? 0.0??????????? 0 #54 54????????? 5???? 10?? 2011-03-19 9???? -1.2??????????? 1 # #$`6` ?# ? X patient_id number responsed_at? t basdai_d evnmt_brutal #74 74????????? 6????? 9?? 2011-02-05? 8???? 0.80??????????? 0 #75 75????????? 6???? 10?? 2011-03-09? 9???? 0.15??????????? 0 #76 76????????? 6???? 11?? 2011-04-11 10??? -0.45??????????? 1 ?head(fun2(dat.bru)[[2]],3) # Mean_Middle0 Mean_1 #2???????? 0.90? -0.80 #5???????? 0.00? -1.20 #6???????? 0.15? -0.45 res1<- fun2(dat.bru)[[1]] lapply(res1,function(x) tail(x,-1))[1:3] #$`2` ?# ? X patient_id number responsed_at? t basdai_d evnmt_brutal #14 14????????? 2???? 13?? 2011-08-07 13????? 0.9??????????? 0 #15 15????????? 2???? 14?? 2011-09-11 14???? -0.8??????????? 1 # #$`5` ?# ? X patient_id number responsed_at t basdai_d evnmt_brutal #53 53????????? 5????? 9?? 2011-02-13 8????? 0.0??????????? 0 #54 54????????? 5???? 10?? 2011-03-19 9???? -1.2??????????? 1 # #$`6` ?# ? X patient_id number responsed_at? t basdai_d evnmt_brutal #75 75????????? 6???? 10?? 2011-03-09? 9???? 0.15??????????? 0 #76 76????????? 6???? 11?? 2011-04-11 10??? -0.45??????????? 1 #or res11<-do.call(rbind,lapply(res1,function(x) tail(x,-1))) row.names(res11)<-1:nrow(res11) A.K. ________________________________ From: GUANGUAN LUO <guanguanluo at gmail.com> To: arun <smartpink111 at yahoo.com> Sent: Tuesday, June 4, 2013 2:10 PM Subject: choose the lines2 13 2 12 2011/7/5 12 -1 0 14 2 13 2011/8/7 13 0.9 0 15 2 14 2011/9/11 14 -0.8 1 52 5 8 2011/1/11 7 -2.8 0 53 5 9 2011/2/13 8 0 0 54 5 10 2011/3/19 9 -1.2 1 74 6 9 2011/2/5 8 0.8 0 75 6 10 2011/3/9 9 0.15 0 76 6 11 2011/4/11 10 -0.45 1 those are the result which i want, and then i want to choose like this 14 2 13 2011/8/7 13 0.9 0 15 2 14 2011/9/11 14 -0.8 1 53 5 9 2011/2/13 8 0 0 54 5 10 2011/3/19 9 -1.2 1 75 6 10 2011/3/9 9 0.15 0 76 6 11 2011/4/11 10 -0.45 1 so that i can calculate the mean of the lines with evnmt_brutal ==0 and compare with the lines with evnmt_brutal==1