Hello
I would like to transform the dataframe df1 into df2 (ie copy the data
from several lines for a men/women to only one line by individu men/women)
dput(df1)
structure(list(Mat = c(934L, 934L, 934L, 935L, 935L, 936L, 936L,
936L, 936L, 937L, 937L, 937L, 937L), Nom = structure(c(2L, 2L,
2L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L), .Label = c("TTTT",
"XXXX", "YYYY", "ZZZZ"), class =
"factor"), Sexe = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Masculin",
class = "factor"),
Cat = c(6L, 7L, 8L, 7L, 8L, 6L, 7L, 8L, 9L, 3L, 4L, 6L, 7L
), D?but = structure(c(5L, 7L, 2L, 12L, 8L, 3L, 4L, 10L,
11L, 13L, 1L, 6L, 9L), .Label = c("01/01/1990",
"01/01/2011",
"01/02/1986", "01/02/1990", "01/07/1986",
"01/07/1993", "01/07/1994",
"01/07/1996", "01/10/2003", "01/11/2002",
"01/11/2011", "13/01/1986",
"23/01/1986"), class = "factor"), Fin = structure(c(2L,
9L,
10L, 3L, 10L, 5L, 6L, 7L, 10L, 8L, 1L, 4L, 10L), .Label =
c("30/06/1993",
"30/06/1994", "30/06/1996", "30/09/2003",
"31/01/1990", "31/10/2002",
"31/10/2011", "31/12/1989", "31/12/2010",
"31/12/4712"), class =
"factor")), .Names = c("Mat",
"Nom", "Sexe", "Cat", "D?but",
"Fin"), class = "data.frame", row.names =
c(NA,
-13L))
dput(df2)
structure(list(Mat = 934:937, Nom = structure(c(2L, 3L, 4L, 1L
), .Label = c("TTTT", "XXXX", "YYYY",
"ZZZZ"), class = "factor"),
Sexe = structure(c(1L, 1L, 1L, 1L), .Label = "Masculin", class =
"factor"),
Cat4 = c(NA, NA, NA, 4L), D?but4 = structure(c(1L, 1L, 1L,
2L), .Label = c("", "01/01/1990"), class =
"factor"), Fin4 =
structure(c(1L,
1L, 1L, 2L), .Label = c("", "30/06/1993"), class =
"factor"),
Cat5 = c(NA, NA, NA, NA), D?but5 = c(NA, NA, NA, NA), Fin5 = c(NA,
NA, NA, NA), Cat6 = c(6L, NA, 6L, 6L), D?but6 = structure(c(3L,
1L, 2L, 4L), .Label = c("", "01/02/1986",
"01/07/1986", "01/07/1993"
), class = "factor"), Fin6 = structure(c(2L, 1L, 4L, 3L), .Label
=
c("",
"30/06/1994", "30/09/2003", "31/01/1990"),
class = "factor"),
Cat7 = c(7L, 7L, 7L, 7L), D?but7 = structure(c(2L, 4L, 1L,
3L), .Label = c("01/02/1990", "01/07/1994",
"01/10/2003",
"13/01/1986"), class = "factor"), Fin7 =
structure(c(3L,
1L, 2L, 4L), .Label = c("30/06/1996", "31/10/2002",
"31/12/2010",
"31/12/4712"), class = "factor"), Cat8 = c(8L, 8L, 8L,
NA
), D?but8 = structure(c(2L, 3L, 4L, 1L), .Label = c("",
"01/01/2011",
"01/07/1996", "01/11/2002"), class =
"factor"), Fin8 = structure(c(3L,
3L, 2L, 1L), .Label = c("", "31/10/2011",
"31/12/4712"), class =
"factor"),
Cat9 = c(NA, NA, 9L, NA), D?but9 = structure(c(1L, 1L, 2L,
1L), .Label = c("", "01/11/2011"), class =
"factor"), Fin9 =
structure(c(1L,
1L, 2L, 1L), .Label = c("", "31/12/4712"), class =
"factor")),
.Names = c("Mat",
"Nom", "Sexe", "Cat4", "D?but4",
"Fin4", "Cat5", "D?but5", "Fin5",
"Cat6", "D?but6", "Fin6", "Cat7",
"D?but7", "Fin7", "Cat8", "D?but8",
"Fin8", "Cat9", "D?but9", "Fin9"), class
= "data.frame", row.names = c(NA,
-4L))
Any idea ?
Thank you
--
Michel ARNAUD
Charg? de mission aupr?s du DRH
DGDRD-Drh - TA 174/04
Av Agropolis 34398 Montpellier cedex 5
tel : 04.67.61.75.38
fax : 04.67.61.57.87
port: 06.47.43.55.31
Hello,
The following does what you want but the order of columns is different.
reshape(df1, v.names = c("Cat", "D?but", "Fin"),
idvar = "Mat", timevar = "Cat", direction =
"wide")
Hope this helps,
Rui Barradas
Em 29-06-2013 10:26, Arnaud Michel escreveu:> Hello
>
> I would like to transform the dataframe df1 into df2 (ie copy the data
> from several lines for a men/women to only one line by individu men/women)
>
> dput(df1)
> structure(list(Mat = c(934L, 934L, 934L, 935L, 935L, 936L, 936L,
> 936L, 936L, 937L, 937L, 937L, 937L), Nom = structure(c(2L, 2L,
> 2L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L), .Label = c("TTTT",
> "XXXX", "YYYY", "ZZZZ"), class =
"factor"), Sexe = structure(c(1L,
> 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label =
"Masculin",
> class = "factor"),
> Cat = c(6L, 7L, 8L, 7L, 8L, 6L, 7L, 8L, 9L, 3L, 4L, 6L, 7L
> ), D?but = structure(c(5L, 7L, 2L, 12L, 8L, 3L, 4L, 10L,
> 11L, 13L, 1L, 6L, 9L), .Label = c("01/01/1990",
"01/01/2011",
> "01/02/1986", "01/02/1990",
"01/07/1986", "01/07/1993", "01/07/1994",
> "01/07/1996", "01/10/2003",
"01/11/2002", "01/11/2011", "13/01/1986",
> "23/01/1986"), class = "factor"), Fin =
structure(c(2L, 9L,
> 10L, 3L, 10L, 5L, 6L, 7L, 10L, 8L, 1L, 4L, 10L), .Label >
c("30/06/1993",
> "30/06/1994", "30/06/1996",
"30/09/2003", "31/01/1990", "31/10/2002",
> "31/10/2011", "31/12/1989",
"31/12/2010", "31/12/4712"), class >
"factor")), .Names = c("Mat",
> "Nom", "Sexe", "Cat", "D?but",
"Fin"), class = "data.frame", row.names > c(NA,
> -13L))
>
>
> dput(df2)
> structure(list(Mat = 934:937, Nom = structure(c(2L, 3L, 4L, 1L
> ), .Label = c("TTTT", "XXXX", "YYYY",
"ZZZZ"), class = "factor"),
> Sexe = structure(c(1L, 1L, 1L, 1L), .Label = "Masculin",
class > "factor"),
> Cat4 = c(NA, NA, NA, 4L), D?but4 = structure(c(1L, 1L, 1L,
> 2L), .Label = c("", "01/01/1990"), class =
"factor"), Fin4 > structure(c(1L,
> 1L, 1L, 2L), .Label = c("", "30/06/1993"), class =
"factor"),
> Cat5 = c(NA, NA, NA, NA), D?but5 = c(NA, NA, NA, NA), Fin5 = c(NA,
> NA, NA, NA), Cat6 = c(6L, NA, 6L, 6L), D?but6 = structure(c(3L,
> 1L, 2L, 4L), .Label = c("", "01/02/1986",
"01/07/1986", "01/07/1993"
> ), class = "factor"), Fin6 = structure(c(2L, 1L, 4L, 3L),
.Label > c("",
> "30/06/1994", "30/09/2003",
"31/01/1990"), class = "factor"),
> Cat7 = c(7L, 7L, 7L, 7L), D?but7 = structure(c(2L, 4L, 1L,
> 3L), .Label = c("01/02/1990", "01/07/1994",
"01/10/2003",
> "13/01/1986"), class = "factor"), Fin7 =
structure(c(3L,
> 1L, 2L, 4L), .Label = c("30/06/1996",
"31/10/2002", "31/12/2010",
> "31/12/4712"), class = "factor"), Cat8 = c(8L, 8L,
8L, NA
> ), D?but8 = structure(c(2L, 3L, 4L, 1L), .Label = c("",
"01/01/2011",
> "01/07/1996", "01/11/2002"), class =
"factor"), Fin8 = structure(c(3L,
> 3L, 2L, 1L), .Label = c("", "31/10/2011",
"31/12/4712"), class > "factor"),
> Cat9 = c(NA, NA, 9L, NA), D?but9 = structure(c(1L, 1L, 2L,
> 1L), .Label = c("", "01/11/2011"), class =
"factor"), Fin9 > structure(c(1L,
> 1L, 2L, 1L), .Label = c("", "31/12/4712"), class =
"factor")),
> .Names = c("Mat",
> "Nom", "Sexe", "Cat4", "D?but4",
"Fin4", "Cat5", "D?but5", "Fin5",
> "Cat6", "D?but6", "Fin6", "Cat7",
"D?but7", "Fin7", "Cat8", "D?but8",
> "Fin8", "Cat9", "D?but9", "Fin9"),
class = "data.frame", row.names = c(NA,
> -4L))
>
>
> Any idea ?
> Thank you
>
HI,
Not sure why df2 didn't include Cat3, Debut3, Fin3, instead included columns
with all missing values (Cat5, Debut5, Fin5).
library(plyr)
library(reshape2)
df1New<-ddply(df1,.(Mat),transform,castCat=paste0("Cat",Cat),castD?but=paste0("D?but",Cat),castFin=paste0("Fin",Cat))
res<-
join_all(list(dcast(df1New,Mat+Nom+Sexe~castCat,value.var="Cat"),dcast(df1New,Mat+Nom+Sexe~castD?but,value.var="D?but"),
dcast(df1New,Mat+Nom+Sexe~castFin,value.var="Fin")),by=c("Mat","Nom","Sexe"))
?indx<-grepl("Mat|Nom|Sexe",colnames(res))
?res1<-cbind(res[indx],res[!indx][order(as.numeric(gsub("[[:alpha:]]","",colnames(res)[!indx])))])
res1
#? Mat? Nom???? Sexe Cat3???? D?but3?????? Fin3 Cat4???? D?but4?????? Fin4 Cat6
#1 934 XXXX Masculin?? NA?????? <NA>?????? <NA>?? NA??????
<NA>?????? <NA>??? 6
#2 935 YYYY Masculin?? NA?????? <NA>?????? <NA>?? NA??????
<NA>?????? <NA>?? NA
#3 936 ZZZZ Masculin?? NA?????? <NA>?????? <NA>?? NA??????
<NA>?????? <NA>??? 6
#4 937 TTTT Masculin??? 3 23/01/1986 31/12/1989??? 4 01/01/1990 30/06/1993??? 6
?# ??? D?but6?????? Fin6 Cat7???? D?but7?????? Fin7 Cat8???? D?but8?????? Fin8
#1 01/07/1986 30/06/1994??? 7 01/07/1994 31/12/2010??? 8 01/01/2011 31/12/4712
#2?????? <NA>?????? <NA>??? 7 13/01/1986 30/06/1996??? 8 01/07/1996
31/12/4712
#3 01/02/1986 31/01/1990??? 7 01/02/1990 31/10/2002??? 8 01/11/2002 31/10/2011
#4 01/07/1993 30/09/2003??? 7 01/10/2003 31/12/4712?? NA?????? <NA>??????
<NA>
?# Cat9???? D?but9?????? Fin9
#1?? NA?????? <NA>?????? <NA>
#2?? NA?????? <NA>?????? <NA>
#3??? 9 01/11/2011 31/12/4712
#4?? NA?????? <NA>?????? <NA>
A.K.
----- Original Message -----
From: Arnaud Michel <michel.arnaud at cirad.fr>
To: r-help at r-project.org
Cc:
Sent: Saturday, June 29, 2013 5:26 AM
Subject: [R] To transform a dataframe
Hello
I would like to transform the dataframe df1 into df2 (ie copy the data
from several lines for a men/women to only one line by individu men/women)
dput(df1)
structure(list(Mat = c(934L, 934L, 934L, 935L, 935L, 936L, 936L,
936L, 936L, 937L, 937L, 937L, 937L), Nom = structure(c(2L, 2L,
2L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L), .Label = c("TTTT",
"XXXX", "YYYY", "ZZZZ"), class =
"factor"), Sexe = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Masculin",
class = "factor"),
? ? Cat = c(6L, 7L, 8L, 7L, 8L, 6L, 7L, 8L, 9L, 3L, 4L, 6L, 7L
? ? ), D?but = structure(c(5L, 7L, 2L, 12L, 8L, 3L, 4L, 10L,
? ? 11L, 13L, 1L, 6L, 9L), .Label = c("01/01/1990",
"01/01/2011",
? ? "01/02/1986", "01/02/1990", "01/07/1986",
"01/07/1993", "01/07/1994",
? ? "01/07/1996", "01/10/2003", "01/11/2002",
"01/11/2011", "13/01/1986",
? ? "23/01/1986"), class = "factor"), Fin = structure(c(2L,
9L,
? ? 10L, 3L, 10L, 5L, 6L, 7L, 10L, 8L, 1L, 4L, 10L), .Label =
c("30/06/1993",
? ? "30/06/1994", "30/06/1996", "30/09/2003",
"31/01/1990", "31/10/2002",
? ? "31/10/2011", "31/12/1989", "31/12/2010",
"31/12/4712"), class =
"factor")), .Names = c("Mat",
"Nom", "Sexe", "Cat", "D?but",
"Fin"), class = "data.frame", row.names =
c(NA,
-13L))
dput(df2)
structure(list(Mat = 934:937, Nom = structure(c(2L, 3L, 4L, 1L
), .Label = c("TTTT", "XXXX", "YYYY",
"ZZZZ"), class = "factor"),
? ? Sexe = structure(c(1L, 1L, 1L, 1L), .Label = "Masculin", class =
"factor"),
? ? Cat4 = c(NA, NA, NA, 4L), D?but4 = structure(c(1L, 1L, 1L,
? ? 2L), .Label = c("", "01/01/1990"), class =
"factor"), Fin4 =
structure(c(1L,
? ? 1L, 1L, 2L), .Label = c("", "30/06/1993"), class =
"factor"),
? ? Cat5 = c(NA, NA, NA, NA), D?but5 = c(NA, NA, NA, NA), Fin5 = c(NA,
? ? NA, NA, NA), Cat6 = c(6L, NA, 6L, 6L), D?but6 = structure(c(3L,
? ? 1L, 2L, 4L), .Label = c("", "01/02/1986",
"01/07/1986", "01/07/1993"
? ? ), class = "factor"), Fin6 = structure(c(2L, 1L, 4L, 3L), .Label
=
c("",
? ? "30/06/1994", "30/09/2003", "31/01/1990"),
class = "factor"),
? ? Cat7 = c(7L, 7L, 7L, 7L), D?but7 = structure(c(2L, 4L, 1L,
? ? 3L), .Label = c("01/02/1990", "01/07/1994",
"01/10/2003",
? ? "13/01/1986"), class = "factor"), Fin7 =
structure(c(3L,
? ? 1L, 2L, 4L), .Label = c("30/06/1996", "31/10/2002",
"31/12/2010",
? ? "31/12/4712"), class = "factor"), Cat8 = c(8L, 8L, 8L,
NA
? ? ), D?but8 = structure(c(2L, 3L, 4L, 1L), .Label = c("",
"01/01/2011",
? ? "01/07/1996", "01/11/2002"), class =
"factor"), Fin8 = structure(c(3L,
? ? 3L, 2L, 1L), .Label = c("", "31/10/2011",
"31/12/4712"), class =
"factor"),
? ? Cat9 = c(NA, NA, 9L, NA), D?but9 = structure(c(1L, 1L, 2L,
? ? 1L), .Label = c("", "01/11/2011"), class =
"factor"), Fin9 =
structure(c(1L,
? ? 1L, 2L, 1L), .Label = c("", "31/12/4712"), class =
"factor")),
.Names = c("Mat",
"Nom", "Sexe", "Cat4", "D?but4",
"Fin4", "Cat5", "D?but5", "Fin5",
"Cat6", "D?but6", "Fin6", "Cat7",
"D?but7", "Fin7", "Cat8", "D?but8",
"Fin8", "Cat9", "D?but9", "Fin9"), class
= "data.frame", row.names = c(NA,
-4L))
Any idea ?
Thank you
--
Michel ARNAUD
Charg? de mission aupr?s du DRH
DGDRD-Drh - TA 174/04
Av Agropolis 34398 Montpellier cedex 5
tel : 04.67.61.75.38
fax : 04.67.61.57.87
port: 06.47.43.55.31
______________________________________________
R-help at r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.