Hi,
Try this:
final3New<-read.table(file="real_data_cecilia.txt",sep="\t")
dim(final3New)
#[1] 5369??? 5
#Inside the split within split, dummy==1 for the first row.? For lists that have
many rows, I selected the row with dummy==0 (from the rest) using the #condition
that the absolute difference between the dimensions of those rows and the first
row dimension was minimum (after I applied the first #constraint).? I guess you
wanted to select only a pair of rows (dummy=0 and dummy=1) for each lists.
fun1<- function(dat,percent,number){???
??? lst1<- split(dat,list(dat$year,dat$industry))
??? lst1New<- lapply(lst1,function(x) x[!(all(x$dummy==0)|all(x$dummy==1)),])
??? lst2<- lst1New[lapply(lst1New,nrow)>0]
??? lst3<- lapply(lst2,function(x){
??? ??? ??? ??? lapply(x$dimension,function(y){
??? ??? ??? ??? ? x1<- x[(y < (x$dimension+(x$dimension*percent))) &
(y > (x$dimension-(x$dimension*percent))),]??? ???
??? ??? ??? ??? })???
??? ??? ??? ??? })
??? lst4<- lapply(lst3,function(x){
??? ??? ??? ???? ?? lst<- lapply(x,function(y){
??? ??? ??? ??? ??? ??? ??? y[!all(y$dummy==0),]
??? ??? ??? ??? ??? ??? ??? ???? })
??? ??? ??? ??? ?? lstNew<- lst[lapply(lst,nrow)>1]
??? ??? ??? ??? ?? lstNew1<- unique(lstNew)
??? ??? ??? ??? ?? })
???????? lst5<- lst4[lapply(lst4,length)>0]
??? ?lst6<- lapply(lst5,function(x) {
??? ??? ??? ??? ??? lst<- lapply(x,function(y){
??? ??? ??? ??? ??? ??? ? y[!all(y$dummy==1),]
??? ??? ??? ??? ??? ??? ?? })
??? ??? ??? ??? ??? lst[lapply(lst,nrow)>0]
??? ??? ??? ??? ??? })
???????? lst7<- lapply(lst6,function(x){
??? ??? ??? ??? ??? lst<- lapply(x,function(y) {
??? ??? ??? ??? ??? ??? x1<- y[1,]
??? ??? ??? ??? ??? ??? x2<- y[-1,]
??? ??? ??? ??? ??? ??? x3<- subset(x2,dummy==0)
??? ??? ??? ??? ??? ??? x4<- x3[which.min(abs(x1$dimension-x3$dimension)),]
??? ??? ??? ??? ??? ??? rbind(x1,x4)
??? ??? ??? ??? ??? ??? })
??? ??? ??? ??? ??? lstNew<-unique(lst)
??? ??? ??? ??? ??? lstNew1<- lapply(lstNew,function(x){
??? ??? ??? ??? ??? ??? x[abs(diff(x$dimension)) < number,]
??? ??? ??? ??? ??? ??? })
??? ??? ??? ??? ??? ?lstNew1[lapply(lstNew1,nrow)>0]
??? ??? ??? ??? ??? ??? })
??? ?lst8<- lst7[lapply(lst7,length)>0]
??? ?res<- do.call(rbind,lapply(lst8,function(x){
??? ??? ??? ??? ??? ??? ?do.call(rbind,x)
??? ??? ??? ??? ??? ??? ??? })
??? ??? ??? ??? ??? ??? ??????? )
??? ?row.names(res)<- 1:nrow(res)
??? ?res}??? ??? ??? ??? ???? ??? ???
res10Percent<- fun1(final3New,0.1,200)
dim(res10Percent)
#[1] 508?? 5
?nrow(subset(res10Percent,dummy==0))
#[1] 254
?nrow(subset(res10Percent,dummy==1))
#[1] 254
?head(res10Percent)
#?????? firm year industry dummy dimension
#1 500622043 2004??????? 1???? 1????? 1198
#2 501611886 2004??????? 1???? 0????? 1208
#3 501164600 2005??????? 1???? 1????? 1332
#4 504243349 2005??????? 1???? 0????? 1455
#5 500862893 2006??????? 1???? 1????? 5324
#6 501744860 2006??????? 1???? 0????? 5453
res5Percent<- fun1(final3New,0.05,200)
?dim(res5Percent)
#[1] 548?? 5
?nrow(subset(res5Percent,dummy==0))
#[1] 274
? nrow(subset(res5Percent,dummy==1))
#[1] 274
res5percent1<-fun1(final3New,0.05,50)
?dim(res5percent1)
#[1] 302?? 5
?nrow(subset(res5percent1,dummy==0))
#[1] 151
?nrow(subset(res5percent1,dummy==1))
#[1] 151
Hope it helps.
A.K.
________________________________
From: Cecilia Carmo <cecilia.carmo at ua.pt>
To: arun <smartpink111 at yahoo.com>
Sent: Friday, June 7, 2013 7:30 PM
Subject: data
I'm sending the data.
Thank you very much.
Cec?lia
The code
final3<-read.table(file="real data cecilia.txt",sep="\t")
lst1<-split(final3,list(final3$year,final3$industry))
lst2<-lst1[lapply(lst1,nrow)>0]
lst3<-lapply(lst2,function(x) lapply(x$dimension,function(y) x[(y<
(x$dimension+x$dimension*0.10)) & (y> (x$dimension-x$dimension*0.10)),]))
lst4<-lapply(lst3,function(x) x[lapply(x,nrow)==2])
lst5<-lapply(lst4,function(x)x[!duplicated(x)])
lst6<-lst5[lapply(lst5,length)>0]
lst7<-lapply(lst6,function(x) {lst<-lapply(x,function(y)
y[sum(y$dummy)==1,]);lst[lapply(lst,nrow)>0]})
res<-do.call(rbind,lapply(lst7,function(x) do.call(rbind,x)))
row.names(res)<-1:nrow(res)
nrow(subset(res,res$dummy==1))
nrow(subset(res,res$dummy==0))??