Sorry, I forgot to paste some lines and change the names:
res10Percent<- fun1(final3New,0.1,200)
res10PercentSub1<-subset(res10Percent[duplicated(res10Percent)|duplicated(res10Percent,fromLast=TRUE),],dummy==1)
indx1<-as.numeric(row.names(res10PercentSub1))
res10PercentSub2<-res10PercentSub1[order(res10PercentSub1$dimension),]
indx11<-as.numeric(row.names(res10PercentSub2))
names(indx11)<-(seq_along(indx11)-1)%/%2+1
res10PercentSub3<-res10Percent[c(indx11,indx11+1),]
res10PercentSub3$id<- names(c(indx11,indx11+1))
res10PercentSub4<-do.call(rbind,lapply(split(res10PercentSub3,res10PercentSub3$id),function(x)
{x1<-x[-1,];x2<-x1[which.max(abs(x1$dimension[1]-x1$dimension[-1]))+1,];x3<-x[x$dummy==1,][which.min(abs(as.numeric(row.names(x[x$dummy==1,]))-as.numeric(row.names(x2)))),];rbind(x3,x2)}))
row.names(res10PercentSub4)<-gsub(".*\\.","",row.names(res10PercentSub4))
#####forgot
################################################
res10PercentSub0<-subset(res10Percent[duplicated(res10Percent)|duplicated(res10Percent,fromLast=TRUE),],dummy==0)
indx0<-as.numeric(row.names(res10PercentSub0))
res10PercentSub20<-res10PercentSub0[order(res10PercentSub0$dimension),]
indx00<-as.numeric(row.names(res10PercentSub20))
names(indx00)<-(seq_along(indx00)-1)%/%2+1
res10PercentSub30<- res10Percent[c(indx00-1,indx00),]
res10PercentSub30$id<- names(c(indx00-1,indx00))
res10PercentSub40<-
do.call(rbind,lapply(split(res10PercentSub30,res10PercentSub30$id),function(x){x1<-subset(x,dummy==1);
x2<-subset(x,dummy==0);x3<-x1[which.max(abs(x1$dimension-unique(x2$dimension))),];x4<-x2[which.min(abs(as.numeric(row.names(x3))-as.numeric(row.names(x2)))),];rbind(x3,x4)}))
row.names(res10PercentSub40)<-gsub(".*\\.","",row.names(res10PercentSub40))
indxNew<-
sort(as.numeric(c(row.names(res10PercentSub4),row.names(res10PercentSub40))))
#####res10PercentSub4
res10PercentFinal<-res10Percent[-indxNew,]
dim(res10PercentFinal)
#[1] 454? 5
nrow(subset(res10PercentFinal,dummy==0))
#[1] 227
nrow(subset(res10PercentFinal,dummy==1))
#[1] 227
nrow(unique(res10PercentFinal))
A.K.
----- Original Message -----
From: Cecilia Carmo <cecilia.carmo at ua.pt>
To: arun <smartpink111 at yahoo.com>
Cc:
Sent: Monday, June 10, 2013 5:48 PM
Subject: RE: please check this
Error message:
Error in row.names(res10PercentSub5) :
? object 'res10PercentSub5' not found
________________________________________
De: arun [smartpink111 at yahoo.com]
Enviado: segunda-feira, 10 de Junho de 2013 22:05
Para: Cecilia Carmo
Cc: R help
Assunto: Re: please check this
Hi,
Try this:
res10Percent<- fun1(final3New,0.1,200)
res10PercentSub1<-subset(res10Percent[duplicated(res10Percent)|duplicated(res10Percent,fromLast=TRUE),],dummy==1)
indx1<-as.numeric(row.names(res10PercentSub1))
res10PercentSub2<-res10PercentSub1[order(res10PercentSub1$dimension),]
indx11<-as.numeric(row.names(res10PercentSub2))
names(indx11)<-(seq_along(indx11)-1)%/%2+1
res10PercentSub3<-res10Percent[c(indx11,indx11+1),]
res10PercentSub3$id<- names(c(indx11,indx11+1))
res10PercentSub4<-do.call(rbind,lapply(split(res10PercentSub3,res10PercentSub3$id),function(x)
{x1<-x[-1,];x2<-x1[which.max(abs(x1$dimension[1]-x1$dimension[-1]))+1,];x3<-x[x$dummy==1,][which.min(abs(as.numeric(row.names(x[x$dummy==1,]))-as.numeric(row.names(x2)))),];rbind(x3,x2)}))
################################################
res10PercentSub0<-subset(res10Percent[duplicated(res10Percent)|duplicated(res10Percent,fromLast=TRUE),],dummy==0)
indx0<-as.numeric(row.names(res10PercentSub0))
res10PercentSub20<-res10PercentSub0[order(res10PercentSub0$dimension),]
indx00<-as.numeric(row.names(res10PercentSub20))
names(indx00)<-(seq_along(indx00)-1)%/%2+1
res10PercentSub30<- res10Percent[c(indx00-1,indx00),]
res10PercentSub30$id<- names(c(indx00-1,indx00))
res10PercentSub40<-
do.call(rbind,lapply(split(res10PercentSub30,res10PercentSub30$id),function(x){x1<-subset(x,dummy==1);
x2<-subset(x,dummy==0);x3<-x1[which.max(abs(x1$dimension-unique(x2$dimension))),];x4<-x2[which.min(abs(as.numeric(row.names(x3))-as.numeric(row.names(x2)))),];rbind(x3,x4)}))
row.names(res10PercentSub40)<-gsub(".*\\.","",row.names(res10PercentSub40))
indxNew<-
sort(as.numeric(c(row.names(res10PercentSub5),row.names(res10PercentSub40))))
res10PercentFinal<-res10Percent[-indxNew,]
dim(res10PercentFinal)
#[1] 454? 5
nrow(subset(res10PercentFinal,dummy==0))
#[1] 227
nrow(subset(res10PercentFinal,dummy==1))
#[1] 227
nrow(unique(res10PercentFinal))
#[1] 454
which(duplicated(res10Percent)|duplicated(res10Percent,fromLast=TRUE))
# [1] 113 117 123 125 153 157 187 189 207 213 223 235 265 267 269 275 276 278
279
#[20] 283 293 301 303 305 309 317 327 331 335 339 341 343 347 351 367 369 371
379
#[39] 385 399 407 413 415 417 429 437 441 453 459 461 471 473 477 479 501 505
res10Percent[c(113:114,117:118),]
#? ? ? ? firm year industry dummy dimension
#113 500221723 2005? ? ? 26? ? 1? ? ? 3147
#114 500601429 2005? ? ? 26? ? 0? ? ? 3076
#117 500221723 2005? ? ? 26? ? 1? ? ? 3147
#118 502668920 2005? ? ? 26? ? 0? ? ? 3249
res10PercentFinal[c(113:114,117:118),]? #deleted the duplicated row and the
accompanying pair with the maximum difference
#? ? ? ? firm year industry dummy dimension
#113 500221723 2005? ? ? 26? ? 1? ? ? 3147
#114 500601429 2005? ? ? 26? ? 0? ? ? 3076
#119 500115362 2006? ? ? 26? ? 1? ? ? 6239
#120 500060223 2006? ? ? 26? ? 0? ? ? 6208
A.K.
row.names(res10PercentSub4)<-gsub(".*\\.","",row.names(res10PercentSub4))
res10PercentSub5<-res10PercentSub4[order(as.numeric(res10PercentSub4$id)),]
----- Original Message -----
From: Cecilia Carmo <cecilia.carmo at ua.pt>
To: arun <smartpink111 at yahoo.com>
Cc:
Sent: Monday, June 10, 2013 1:41 PM
Subject: RE: please check this
I think it could be better to eliminate that one.
If you could do it I appreciate.
Cec?lia
________________________________________
De: arun [smartpink111 at yahoo.com]
Enviado: segunda-feira, 10 de Junho de 2013 18:14
Para: Cecilia Carmo
Assunto: Re: please check this
If you wanted to eliminate the duplicate rows that have the pair with the
maximum difference, it is possible.
Just informing you.
----- Original Message -----
From: Cecilia Carmo <cecilia.carmo at ua.pt>
To: arun <smartpink111 at yahoo.com>
Cc:
Sent: Monday, June 10, 2013 10:51 AM
Subject: RE: please check this
I think it is ok now.
Thanks
Cec?lia
________________________________________
De: arun [smartpink111 at yahoo.com]
Enviado: segunda-feira, 10 de Junho de 2013 15:39
Para: Cecilia Carmo
Cc: R help
Assunto: Re: please check this
Hi,
Try this:
which(duplicated(res10Percent))
# [1] 117 125 157 189 213 235 267 275 278 293 301 327 331 335 339 367 369 371
379
#[20] 413 415 417 441 459 461 477 479 505
res10PercentSub1<-subset(res10Percent[which(duplicated(res10Percent)),],dummy==1)?
#most of the duplicated are dummy==1
res10PercentSub0<-subset(res10Percent[which(duplicated(res10Percent)),],dummy==0)
indx1<-as.numeric(row.names(res10PercentSub1))
indx11<-sort(c(indx1,indx1+1))
indx0<- as.numeric(row.names(res10PercentSub0))
indx00<- sort(c(indx0,indx0-1))
indx10<- sort(c(indx11,indx00))
nrow(res10Percent[-indx10,])
#[1] 452
res10PercentNew<-res10Percent[-indx10,]
nrow(subset(res10PercentNew,dummy==1))
#[1] 226
nrow(subset(res10PercentNew,dummy==0))
#[1] 226
nrow(unique(res10PercentNew))
#[1] 452
A.K.
----- Original Message -----
From: Cecilia Carmo <cecilia.carmo at ua.pt>
To: arun <smartpink111 at yahoo.com>
Cc:
Sent: Monday, June 10, 2013 10:19 AM
Subject: RE: please check this
But I don't want it like this.
Once a firm is paired with another, these two firms should not be paired again.
Could you solve this?
Thanks,
Cec?lia
________________________________________
De: arun [smartpink111 at yahoo.com]
Enviado: segunda-feira, 10 de Junho de 2013 15:12
Para: Cecilia Carmo
Assunto: Re: please check this
I did look into that.
If you look for the nrow() in each category, then it will be different.? It
means that the duplicates are not pairwise, but in the whole `result`.? The
explanation is again with the multiple matches.? So, here we selected the one
with dummy==0 that closely matches the dimension of one dummy==1.? Suppose, the
value of dimension with dummy==1` is `2554` and it got a match with dummy==0
with `2580`.? Now, consider another case with dimension as `2570` with dummy==1
(which also comes within the same split group).? Then it got a match with
`2580' with dummy==0.? I guess it was based on the way in which it was
tested.
________________________________
From: Cecilia Carmo <cecilia.carmo at ua.pt>
To: arun <smartpink111 at yahoo.com>
Sent: Monday, June 10, 2013 10:02 AM
Subject: please check this
When I do
res10Percent<- fun1(final3New,0.1,200)
dim(res10Percent)
[1] 508? 5
#[1] 508? 5
nrow(subset(res10Percent,dummy==0))
#[1] 254
nrow(subset(res10Percent,dummy==1))
#[1] 254
testingDuplicates<-unique(res10Percent)
nrow(testingDuplicates)
[1] 480 #this should be 508, if not there are duplicated rows, or not?
Thanks
Cecilia