Hi,
DataA<- structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L), Status = c("A", "B", "A",
"B", "A", "B", "A",
"A", "B", "B", "A", "A",
"A", "A", "B", "A", "B",
"A", "A", "A",
"B", "A", "B", "A", "B",
"A", "B", "A", "A", "B",
"A", "B", "A",
"B", "A", "B", "A", "A",
"B", "A", "B", "A", "B",
"A", "B", "A"
), Date1 = c("03-Feb-01", "15-May-01",
"16-May-01", "06-Sep-01",
"13-Sep-01", "26-Feb-04", "08-Mar-04",
"09-Feb-01", "24-May-01",
"25-May-01", "06-Mar-02", "18-Mar-02",
"20-Mar-02", "01-Apr-01",
"05-May-01", "08-Jul-01", "09-Feb-01",
"13-Jun-01", "14-Jun-01",
"19-Jun-01", "05-Jul-01", "29-Oct-01",
"13-Nov-01", "19-Jan-02",
"05-Feb-02", "18-Mar-02", "01-Apr-02",
"30-Oct-02", "21-Nov-02",
"26-Mar-03", "06-Jun-03", "17-Jun-03",
"05-Nov-03", "12-Nov-03",
"13-Nov-03", "01-Apr-02", "30-Oct-02",
"21-Nov-02", "26-Mar-03",
"06-Jun-03", "17-Jun-03", "05-Nov-03",
"12-Nov-03", "13-Nov-03",
"12-Nov-03", "13-Nov-03"), Date2 = c("15-May-01",
"16-May-01",
"06-Sep-01", "13-Sep-01", "26-Feb-04",
"08-Mar-04", "01-Aug-04",
"24-May-01", "25-May-01", "06-Mar-02",
"18-Mar-02", "20-Mar-02",
"27-Mar-02", "08-May-02", "14-Jun-02",
"07-Sep-02", "13-Jun-01",
"14-Jun-01", "19-Jun-01", "05-Jul-01",
"29-Oct-01", "13-Nov-01",
"19-Jan-02", "05-Feb-02", "18-Mar-02",
"01-Apr-02", "30-Oct-02",
"21-Nov-02", "26-Mar-03", "06-Jun-03",
"17-Jun-03", "05-Nov-03",
"12-Nov-03", "13-Nov-03", "21-Nov-03",
"30-Oct-02", "21-Nov-02",
"26-Mar-03", "06-Jun-03", "17-Jun-03",
"05-Nov-03", "12-Nov-03",
"13-Nov-03", "21-Nov-03", "13-Nov-03",
"21-Nov-03")), .Names = c("ID",
"Status", "Date1", "Date2"), class =
"data.frame", row.names = c(NA,
-46L))
DataB<- structure(list(ID = c(1L, 1L, 2L, 8L, 8L, 8L, 8L, 8L), Date.Accident
= c("06-Sep-01",
"20-Jan-05", "06-Mar-02", "25-Jun-01",
"29-Oct-01", "18-Mar-02",
"06-Jun-03", "16-Jan-04")), .Names = c("ID",
"Date.Accident"), row.names = c(NA,
-8L), class = "data.frame")
library(stringr)
DataB[,2]<- str_trim(DataB[,2])???
DataA<-
read.csv("FarnooshDataA.csv",sep=",",stringsAsFactors=FALSE)
DataA[,3]<- str_trim(DataA[,3])
DataA[,4]<- str_trim(DataA[,4])
dataNew1<-do.call(rbind,lapply(unique(DataA$ID),function(i) {x1<-
DataA[DataA$ID==i,]; x2<-DataB[DataB$ID==i,];
do.call(rbind,lapply(seq_len(nrow(x2)),function(i){x3<-
unlist(mapply(function(x,y) which(x%in% y),x1[,3:4],x2[i,2]));
x3<-if(is.matrix(x3)) x3[1,] else x3; x4<- if(length(x3)==2)
x1[x3[which.min(x3)],!names(x1)%in% names(x3[which.max(x3)])] else
if(length(x3)==1) x1[x3,c("ID","Status",names(x3))] else
NULL; if(length(x4)>0){colnames(x4)[3]<- colnames(DataB)[2];x4} else
NULL})) }))
DataA1<- DataA
DataA1$Date1<-as.Date(DataA1$Date1,format="%d-%b-%y")
?DataA1$Date2<-as.Date(DataA1$Date2,format="%d-%b-%y")
DataB1<- DataB
DataB1$Date.Accident<-
as.Date(DataB1$Date.Accident,format="%d-%b-%y")
dataNew2<-do.call(rbind,lapply(unique(DataA1$ID),function(i) { x1<-
DataA1[DataA1$ID==i,] ;x1New<- t(x1); x2<-DataB1[DataB1$ID==i,];
do.call(rbind,lapply(seq_len(nrow(x2)),function(i){x3<-x1[x2[i,2]>x1New[3,]
& x2[i,2]< x1New[4,] ,]; x4<-x2[i,][nrow(x3)>0];merge(x3[,1:2],x4)
}))}))
dataNew2$Date.Accident<-format(dataNew2$Date.Accident,"%d-%b-%y")
library(plyr)
res<-join(rbind(dataNew1,dataNew2),DataB,by=c("ID","Date.Accident"),type="right")
res
#? ID Date.Accident Status
#1? 1???? 06-Sep-01????? A
#2? 1???? 20-Jan-05?? <NA>
#3? 2???? 06-Mar-02????? B
#4? 8???? 25-Jun-01????? A
#5? 8???? 29-Oct-01????? B
#6? 8???? 18-Mar-02????? B
#7? 8???? 06-Jun-03????? B
#8? 8???? 16-Jan-04?? <NA>
dat2<- data.frame(ID=unique(DataA$ID))
?res2<-join(dat2,res,by="ID",type="full")
?res2$Status[is.na(res2$Date.Accident) & is.na(res2$Status)]<-0
?res2
#? ID Date.Accident Status
#1? 1???? 06-Sep-01????? A
#2? 1???? 20-Jan-05?? <NA>
#3? 2???? 06-Mar-02????? B
#4? 3????????? <NA>????? 0
#5? 8???? 25-Jun-01????? A
#6? 8???? 29-Oct-01????? B
#7? 8???? 18-Mar-02????? B
#8? 8???? 06-Jun-03????? B
#9? 8???? 16-Jan-04?? <NA>
This is similar to the output you wanted in the attached excel sheet except that
the status for "29-Oct-01" should be B as per the rules.
A.K.
ID Date.Accident Result
06-Sep-01 A
1 20-Jan-05 NA
2 06-Mar-02 B
25-Jun-01 A
29-Oct-01 A
8 18-Mar-02 B
06-Jun-03 B
16-Jan-04 NA
________________________________
From: farnoosh sheikhi <farnoosh_81 at yahoo.com>
To: arun <smartpink111 at yahoo.com>
Sent: Saturday, April 13, 2013 3:39 PM
Subject: Re: Comparison of Date format
Hi Arun,
Thanks you so much for your help and effort.?
I have attached an excel file that I put the conditions in it. It might help for
coding.
I basically want to check if the dateAccident is between date1 and
date2--->return(status in that row)
If dateAccident =date1 and date2 ---> return (Status for Previous row)
If dateAccident=date1 and itsNOT equal to date2 --->return the status for
date1
If ID in DateA not found in ID in DateB---> (I'm not sure what I can
return here) I can return only ID with status 0(?)
Thanks again.
?
Best,Farnoosh Sheikhi
________________________________
From: arun <smartpink111 at yahoo.com>
To: farnoosh sheikhi <farnoosh_81 at yahoo.com>
Cc: R help <r-help at r-project.org>
Sent: Friday, April 12, 2013 10:19 PM
Subject: Re: Comparison of Date format
HI,
In cases like below:
DataA<- read.table(text="
ID,Status,Date1,Date2 ??? ??? ??????
1,A,3-Feb-01,15-May-01 ??? ???
1,B,15-May-01,16-May-01 ??? ???
1,A,16-May-01,3-Sep-01 ??? ??? ??? ??? ???
1,B,3-Sep-01,13-Sep-01 ??? ??? ??? ??? ???
1,C,13-Sep-01,26-Feb-04 ??? ??? ??? ??? ???
2,A,9-Feb-01,24-May-01 ??? ???
2,B,24-May-01,25-May-01 ??? ??? ???
??? ???
2,A,25-May-01,16-Mar-02 ??? ??? ??? ??? ???
2,A,6-Mar-02,18-Mar-02
2,A,14-Sep-01,6-Mar-02???
",sep=",",header=TRUE,stringsAsFactors=FALSE)
library(stringr)
DataA[,3]<- str_trim(DataA[,3])
DataA[,4]<- str_trim(DataA[,4])
DataB<- read.table(text="
ID ??? Date.Accident ??? ???
1?????? 3-Sep-01?
1 ??? 20-Jan-05
1?????? 26-Feb-04
2?????? 25-May-01??? ???
2 ??? 6-Mar-02
",sep="",header=TRUE,stringsAsFactors=FALSE)
DataB[,2]<- str_trim(DataB[,2])
#Needed modification in the 2nd method:
dataNew2<-do.call(rbind,lapply(unique(DataA$ID),function(i){x1<-
DataA[DataA$ID==i,]; x2<- DataB[DataB$ID==i,];
do.call(rbind,lapply(seq_len(nrow(x2)),function(i) {x3<-
unlist(mapply(function(x,y) which(x==y), x1[,3:4],x2[i,2])); x4<-
if(length(x3)==2) x1[x3[which.min(x3)],!names(x1)%in%names(x3[which.max(x3)])]
else if(length(x3)==1) x1[x3,c("ID","Status",names(x3))]
else NULL; if(length(x4)>0) {colnames(x4)[3]<- colnames(DataB)[2]; x4}
else NULL})) }))
library(plyr)
res2<-
join(dataNew2,DataB,by=c("Date.Accident","ID"),type="right")
res2
#? Date.Accident ID Status
#1????? 3-Sep-01? 1????? A
#2???? 20-Jan-05? 1?? <NA>
#3???? 26-Feb-04? 1????? C
#4???? 25-May-01? 2????? B
#5????? 6-Mar-02? 2????? A
Using the first
method:
lst1<-lapply(seq_len(nrow(DataB)),function(i)
{x1<-unlist(mapply(function(x,y)
which(x==y),DataA[,3:4],DataB[i,2]));x2<-if(length(x1)==2)
DataA[x1[which.min(x1)],!names(DataA)%in%names(x1[which.max(x1)])] else
if(length(x1)==1) DataA[x1,c("ID","Status",names(x1))] else
NULL})
?lst2<-lapply(lst1,data.frame)
lst2<-lst2[lapply(lst2,nrow)!=0]
library(plyr)
?dataNew<-do.call(rbind,lapply(lst2,function(x) {colnames(x)[3]<-
colnames(DataB)[2];x}))
res<-join(dataNew,DataB,by=c("Date.Accident","ID"),type="right")
?identical(res,res2)
#[1] TRUE
A.K.
----- Original Message -----
From: arun <smartpink111 at yahoo.com>
To: farnoosh sheikhi <farnoosh_81 at yahoo.com>
Cc: R help <r-help at r-project.org>
Sent: Saturday, April 13, 2013 12:41 AM
Subject: Re: Comparison of Date format
Hi,
?In the example you provided, it looks like the dates in Date2 happens first.?
So, I changed it a bit.?
DataA<- read.table(text="
ID,Status,Date1,Date2 ??? ??? ??????
1,A,3-Feb-01,15-May-01 ??? ???
1,B,15-May-01,16-May-01 ??? ???
1,A,16-May-01,3-Sep-01 ??? ??? ??? ??? ???
1,B,3-Sep-01,13-Sep-01 ??? ??? ??? ??? ???
1,C,13-Sep-01,26-Feb-04 ??? ??? ??? ??? ???
2,A,9-Feb-01,24-May-01 ??? ???
2,B,24-May-01,25-May-01 ??? ??? ??? ??? ???
2,A,25-May-01,16-Mar-02 ??? ??? ??? ??? ???
2,A,6-Mar-02,18-Mar-02
2,A,14-Sep-01,6-Mar-02 ??? ???
",sep=",",header=TRUE,stringsAsFactors=FALSE)
library(stringr)
DataA[,3]<- str_trim(DataA[,3])
DataA[,4]<- str_trim(DataA[,4])
DataB<- read.table(text="
ID ??? Date.Accident ??? ???
1?????? 3-Sep-01?
1 ??? 20-Jan-05
1?????? 26-Feb-04??? ???
2 ???
6-Mar-02
",sep="",header=TRUE,stringsAsFactors=FALSE)
?
lst1<-lapply(seq_len(nrow(DataB)),function(i)
{x1<-unlist(mapply(function(x,y)
which(x==y),DataA[,3:4],DataB[i,2]));x2<-if(length(x1)==2)
DataA[x1[which.min(x1)],!names(DataA)%in%names(x1[which.max(x1)])] else
if(length(x1)==1) DataA[x1,c("ID","Status",names(x1))] else
NULL})
?lst2<-lapply(lst1,data.frame)
lst2<-lst2[lapply(lst2,nrow)!=0]
?lst2
#[[1]]
#? ID Status??? Date2
#3? 1????? A 3-Sep-01
#[[2]]
#? ID Status???? Date2
#5? 1????? C 26-Feb-04
#[[3]]
#? ID Status??? Date1
#9? 2????? A 6-Mar-02
library(plyr)
?dataNew<-do.call(rbind,lapply(lst2,function(x) {colnames(x)[3]<-
colnames(DataB)[2];x}))
res<-join(dataNew,DataB,by=c("Date.Accident","ID"),type="right")
?res
#? Date.Accident ID Status
#1????? 3-Sep-01? 1????? A
#2???? 20-Jan-05? 1?? <NA>
#3???? 26-Feb-04? 1????? C
#4????? 6-Mar-02? 2????? A
#or you can split by ID
lst1New<-lapply(unique(DataA$ID),function(i){x1<- DataA[DataA$ID==i,];
x2<- DataB[DataB$ID==i,]; do.call(rbind,lapply(seq_len(nrow(x2)),function(i)
{x3<- unlist(mapply(function(x,y) which(x==y), x1[,3:4],x2[i,2])); x4<-
if(length(x3)==2) x1[x3[which.min(x3)],!names(x1)%in%names(x3[which.max(x3)])]
else if(length(x3)==1) x1[x3,c("ID","Status",names(x3))]
else NULL})) })
?lst1New
#[[1]]
?# ID
Status???? Date2
#3? 1????? A? 3-Sep-01
#5? 1????? C 26-Feb-04
#[[2]]
?# ID Status??? Date1
#9? 2????? A 6-Mar-02
?dataNew1<- do.call(rbind,lapply(lst1New,function(x) {colnames(x)[3]<-
colnames(DataB)[2];x}))
?res1<-
join(dataNew1,DataB,by=c("Date.Accident","ID"),type="right")
?res1
#? Date.Accident ID Status
#1????? 3-Sep-01? 1????? A
#2???? 20-Jan-05? 1?? <NA>
#3???? 26-Feb-04? 1????? C
#4????? 6-Mar-02? 2????? A
A.K.
________________________________
From: farnoosh sheikhi <farnoosh_81 at yahoo.com>
To: "smartpink111 at yahoo.com" <smartpink111 at yahoo.com>
Sent: Friday, April 12, 2013 5:40 PM
Subject: Comparison of Date format
?Hi there,
Hope all is well.
I have a complicated data and I need to create a new variable based on the date
and description of the data.
I really?appreciate?if you can help me.
Here is how data look like:
DataA
DataB
? ? ? ?ID Status Date1 Date2
? ? ? ?ID Date.Accident
Result
1 ? A 3-Feb-01 15-May-01
1 3-Sep-01
A
1 ? ?B 15-May-01 16-May-01
1 20-Jan-05
NA
1 ? ?A 16-May-01 3-Sep-01
1 ?
?B 3-Sep-01 13-Sep-01
1 ? ? C 13-Sep-01 26-Feb-04
2 ? ?A 9-Feb-01 24-May-01
2 6-Mar-02
A
2 ? ?B 24-May-01 25-May-01
2 ? ?A 25-May-01 6-Mar-02
2 ? ? A 6-Mar-02 18-Mar-02
I want to compare dataA to B for each ID. if Date 1 or Date 2 matches to
Date.Accident return the result as status in dataA as a new result in Data B.
The trick here is I have two dates that are matched, but I want the status of
the one that happen first. The sample size of each data is not the same.
I really appreciate your time and help.
Thanks.