Hello I would like to transform the dataframe df1 into df2 (ie copy the data from several lines for a men/women to only one line by individu men/women) dput(df1) structure(list(Mat = c(934L, 934L, 934L, 935L, 935L, 936L, 936L, 936L, 936L, 937L, 937L, 937L, 937L), Nom = structure(c(2L, 2L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L), .Label = c("TTTT", "XXXX", "YYYY", "ZZZZ"), class = "factor"), Sexe = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Masculin", class = "factor"), Cat = c(6L, 7L, 8L, 7L, 8L, 6L, 7L, 8L, 9L, 3L, 4L, 6L, 7L ), D?but = structure(c(5L, 7L, 2L, 12L, 8L, 3L, 4L, 10L, 11L, 13L, 1L, 6L, 9L), .Label = c("01/01/1990", "01/01/2011", "01/02/1986", "01/02/1990", "01/07/1986", "01/07/1993", "01/07/1994", "01/07/1996", "01/10/2003", "01/11/2002", "01/11/2011", "13/01/1986", "23/01/1986"), class = "factor"), Fin = structure(c(2L, 9L, 10L, 3L, 10L, 5L, 6L, 7L, 10L, 8L, 1L, 4L, 10L), .Label = c("30/06/1993", "30/06/1994", "30/06/1996", "30/09/2003", "31/01/1990", "31/10/2002", "31/10/2011", "31/12/1989", "31/12/2010", "31/12/4712"), class = "factor")), .Names = c("Mat", "Nom", "Sexe", "Cat", "D?but", "Fin"), class = "data.frame", row.names = c(NA, -13L)) dput(df2) structure(list(Mat = 934:937, Nom = structure(c(2L, 3L, 4L, 1L ), .Label = c("TTTT", "XXXX", "YYYY", "ZZZZ"), class = "factor"), Sexe = structure(c(1L, 1L, 1L, 1L), .Label = "Masculin", class = "factor"), Cat4 = c(NA, NA, NA, 4L), D?but4 = structure(c(1L, 1L, 1L, 2L), .Label = c("", "01/01/1990"), class = "factor"), Fin4 = structure(c(1L, 1L, 1L, 2L), .Label = c("", "30/06/1993"), class = "factor"), Cat5 = c(NA, NA, NA, NA), D?but5 = c(NA, NA, NA, NA), Fin5 = c(NA, NA, NA, NA), Cat6 = c(6L, NA, 6L, 6L), D?but6 = structure(c(3L, 1L, 2L, 4L), .Label = c("", "01/02/1986", "01/07/1986", "01/07/1993" ), class = "factor"), Fin6 = structure(c(2L, 1L, 4L, 3L), .Label = c("", "30/06/1994", "30/09/2003", "31/01/1990"), class = "factor"), Cat7 = c(7L, 7L, 7L, 7L), D?but7 = structure(c(2L, 4L, 1L, 3L), .Label = c("01/02/1990", "01/07/1994", "01/10/2003", "13/01/1986"), class = "factor"), Fin7 = structure(c(3L, 1L, 2L, 4L), .Label = c("30/06/1996", "31/10/2002", "31/12/2010", "31/12/4712"), class = "factor"), Cat8 = c(8L, 8L, 8L, NA ), D?but8 = structure(c(2L, 3L, 4L, 1L), .Label = c("", "01/01/2011", "01/07/1996", "01/11/2002"), class = "factor"), Fin8 = structure(c(3L, 3L, 2L, 1L), .Label = c("", "31/10/2011", "31/12/4712"), class = "factor"), Cat9 = c(NA, NA, 9L, NA), D?but9 = structure(c(1L, 1L, 2L, 1L), .Label = c("", "01/11/2011"), class = "factor"), Fin9 = structure(c(1L, 1L, 2L, 1L), .Label = c("", "31/12/4712"), class = "factor")), .Names = c("Mat", "Nom", "Sexe", "Cat4", "D?but4", "Fin4", "Cat5", "D?but5", "Fin5", "Cat6", "D?but6", "Fin6", "Cat7", "D?but7", "Fin7", "Cat8", "D?but8", "Fin8", "Cat9", "D?but9", "Fin9"), class = "data.frame", row.names = c(NA, -4L)) Any idea ? Thank you -- Michel ARNAUD Charg? de mission aupr?s du DRH DGDRD-Drh - TA 174/04 Av Agropolis 34398 Montpellier cedex 5 tel : 04.67.61.75.38 fax : 04.67.61.57.87 port: 06.47.43.55.31
Hello, The following does what you want but the order of columns is different. reshape(df1, v.names = c("Cat", "D?but", "Fin"), idvar = "Mat", timevar = "Cat", direction = "wide") Hope this helps, Rui Barradas Em 29-06-2013 10:26, Arnaud Michel escreveu:> Hello > > I would like to transform the dataframe df1 into df2 (ie copy the data > from several lines for a men/women to only one line by individu men/women) > > dput(df1) > structure(list(Mat = c(934L, 934L, 934L, 935L, 935L, 936L, 936L, > 936L, 936L, 937L, 937L, 937L, 937L), Nom = structure(c(2L, 2L, > 2L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L), .Label = c("TTTT", > "XXXX", "YYYY", "ZZZZ"), class = "factor"), Sexe = structure(c(1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Masculin", > class = "factor"), > Cat = c(6L, 7L, 8L, 7L, 8L, 6L, 7L, 8L, 9L, 3L, 4L, 6L, 7L > ), D?but = structure(c(5L, 7L, 2L, 12L, 8L, 3L, 4L, 10L, > 11L, 13L, 1L, 6L, 9L), .Label = c("01/01/1990", "01/01/2011", > "01/02/1986", "01/02/1990", "01/07/1986", "01/07/1993", "01/07/1994", > "01/07/1996", "01/10/2003", "01/11/2002", "01/11/2011", "13/01/1986", > "23/01/1986"), class = "factor"), Fin = structure(c(2L, 9L, > 10L, 3L, 10L, 5L, 6L, 7L, 10L, 8L, 1L, 4L, 10L), .Label > c("30/06/1993", > "30/06/1994", "30/06/1996", "30/09/2003", "31/01/1990", "31/10/2002", > "31/10/2011", "31/12/1989", "31/12/2010", "31/12/4712"), class > "factor")), .Names = c("Mat", > "Nom", "Sexe", "Cat", "D?but", "Fin"), class = "data.frame", row.names > c(NA, > -13L)) > > > dput(df2) > structure(list(Mat = 934:937, Nom = structure(c(2L, 3L, 4L, 1L > ), .Label = c("TTTT", "XXXX", "YYYY", "ZZZZ"), class = "factor"), > Sexe = structure(c(1L, 1L, 1L, 1L), .Label = "Masculin", class > "factor"), > Cat4 = c(NA, NA, NA, 4L), D?but4 = structure(c(1L, 1L, 1L, > 2L), .Label = c("", "01/01/1990"), class = "factor"), Fin4 > structure(c(1L, > 1L, 1L, 2L), .Label = c("", "30/06/1993"), class = "factor"), > Cat5 = c(NA, NA, NA, NA), D?but5 = c(NA, NA, NA, NA), Fin5 = c(NA, > NA, NA, NA), Cat6 = c(6L, NA, 6L, 6L), D?but6 = structure(c(3L, > 1L, 2L, 4L), .Label = c("", "01/02/1986", "01/07/1986", "01/07/1993" > ), class = "factor"), Fin6 = structure(c(2L, 1L, 4L, 3L), .Label > c("", > "30/06/1994", "30/09/2003", "31/01/1990"), class = "factor"), > Cat7 = c(7L, 7L, 7L, 7L), D?but7 = structure(c(2L, 4L, 1L, > 3L), .Label = c("01/02/1990", "01/07/1994", "01/10/2003", > "13/01/1986"), class = "factor"), Fin7 = structure(c(3L, > 1L, 2L, 4L), .Label = c("30/06/1996", "31/10/2002", "31/12/2010", > "31/12/4712"), class = "factor"), Cat8 = c(8L, 8L, 8L, NA > ), D?but8 = structure(c(2L, 3L, 4L, 1L), .Label = c("", "01/01/2011", > "01/07/1996", "01/11/2002"), class = "factor"), Fin8 = structure(c(3L, > 3L, 2L, 1L), .Label = c("", "31/10/2011", "31/12/4712"), class > "factor"), > Cat9 = c(NA, NA, 9L, NA), D?but9 = structure(c(1L, 1L, 2L, > 1L), .Label = c("", "01/11/2011"), class = "factor"), Fin9 > structure(c(1L, > 1L, 2L, 1L), .Label = c("", "31/12/4712"), class = "factor")), > .Names = c("Mat", > "Nom", "Sexe", "Cat4", "D?but4", "Fin4", "Cat5", "D?but5", "Fin5", > "Cat6", "D?but6", "Fin6", "Cat7", "D?but7", "Fin7", "Cat8", "D?but8", > "Fin8", "Cat9", "D?but9", "Fin9"), class = "data.frame", row.names = c(NA, > -4L)) > > > Any idea ? > Thank you >
HI, Not sure why df2 didn't include Cat3, Debut3, Fin3, instead included columns with all missing values (Cat5, Debut5, Fin5). library(plyr) library(reshape2) df1New<-ddply(df1,.(Mat),transform,castCat=paste0("Cat",Cat),castD?but=paste0("D?but",Cat),castFin=paste0("Fin",Cat)) res<- join_all(list(dcast(df1New,Mat+Nom+Sexe~castCat,value.var="Cat"),dcast(df1New,Mat+Nom+Sexe~castD?but,value.var="D?but"), dcast(df1New,Mat+Nom+Sexe~castFin,value.var="Fin")),by=c("Mat","Nom","Sexe")) ?indx<-grepl("Mat|Nom|Sexe",colnames(res)) ?res1<-cbind(res[indx],res[!indx][order(as.numeric(gsub("[[:alpha:]]","",colnames(res)[!indx])))]) res1 #? Mat? Nom???? Sexe Cat3???? D?but3?????? Fin3 Cat4???? D?but4?????? Fin4 Cat6 #1 934 XXXX Masculin?? NA?????? <NA>?????? <NA>?? NA?????? <NA>?????? <NA>??? 6 #2 935 YYYY Masculin?? NA?????? <NA>?????? <NA>?? NA?????? <NA>?????? <NA>?? NA #3 936 ZZZZ Masculin?? NA?????? <NA>?????? <NA>?? NA?????? <NA>?????? <NA>??? 6 #4 937 TTTT Masculin??? 3 23/01/1986 31/12/1989??? 4 01/01/1990 30/06/1993??? 6 ?# ??? D?but6?????? Fin6 Cat7???? D?but7?????? Fin7 Cat8???? D?but8?????? Fin8 #1 01/07/1986 30/06/1994??? 7 01/07/1994 31/12/2010??? 8 01/01/2011 31/12/4712 #2?????? <NA>?????? <NA>??? 7 13/01/1986 30/06/1996??? 8 01/07/1996 31/12/4712 #3 01/02/1986 31/01/1990??? 7 01/02/1990 31/10/2002??? 8 01/11/2002 31/10/2011 #4 01/07/1993 30/09/2003??? 7 01/10/2003 31/12/4712?? NA?????? <NA>?????? <NA> ?# Cat9???? D?but9?????? Fin9 #1?? NA?????? <NA>?????? <NA> #2?? NA?????? <NA>?????? <NA> #3??? 9 01/11/2011 31/12/4712 #4?? NA?????? <NA>?????? <NA> A.K. ----- Original Message ----- From: Arnaud Michel <michel.arnaud at cirad.fr> To: r-help at r-project.org Cc: Sent: Saturday, June 29, 2013 5:26 AM Subject: [R] To transform a dataframe Hello I would like to transform the dataframe df1 into df2 (ie copy the data from several lines for a men/women to only one line by individu men/women) dput(df1) structure(list(Mat = c(934L, 934L, 934L, 935L, 935L, 936L, 936L, 936L, 936L, 937L, 937L, 937L, 937L), Nom = structure(c(2L, 2L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L), .Label = c("TTTT", "XXXX", "YYYY", "ZZZZ"), class = "factor"), Sexe = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Masculin", class = "factor"), ? ? Cat = c(6L, 7L, 8L, 7L, 8L, 6L, 7L, 8L, 9L, 3L, 4L, 6L, 7L ? ? ), D?but = structure(c(5L, 7L, 2L, 12L, 8L, 3L, 4L, 10L, ? ? 11L, 13L, 1L, 6L, 9L), .Label = c("01/01/1990", "01/01/2011", ? ? "01/02/1986", "01/02/1990", "01/07/1986", "01/07/1993", "01/07/1994", ? ? "01/07/1996", "01/10/2003", "01/11/2002", "01/11/2011", "13/01/1986", ? ? "23/01/1986"), class = "factor"), Fin = structure(c(2L, 9L, ? ? 10L, 3L, 10L, 5L, 6L, 7L, 10L, 8L, 1L, 4L, 10L), .Label = c("30/06/1993", ? ? "30/06/1994", "30/06/1996", "30/09/2003", "31/01/1990", "31/10/2002", ? ? "31/10/2011", "31/12/1989", "31/12/2010", "31/12/4712"), class = "factor")), .Names = c("Mat", "Nom", "Sexe", "Cat", "D?but", "Fin"), class = "data.frame", row.names = c(NA, -13L)) dput(df2) structure(list(Mat = 934:937, Nom = structure(c(2L, 3L, 4L, 1L ), .Label = c("TTTT", "XXXX", "YYYY", "ZZZZ"), class = "factor"), ? ? Sexe = structure(c(1L, 1L, 1L, 1L), .Label = "Masculin", class = "factor"), ? ? Cat4 = c(NA, NA, NA, 4L), D?but4 = structure(c(1L, 1L, 1L, ? ? 2L), .Label = c("", "01/01/1990"), class = "factor"), Fin4 = structure(c(1L, ? ? 1L, 1L, 2L), .Label = c("", "30/06/1993"), class = "factor"), ? ? Cat5 = c(NA, NA, NA, NA), D?but5 = c(NA, NA, NA, NA), Fin5 = c(NA, ? ? NA, NA, NA), Cat6 = c(6L, NA, 6L, 6L), D?but6 = structure(c(3L, ? ? 1L, 2L, 4L), .Label = c("", "01/02/1986", "01/07/1986", "01/07/1993" ? ? ), class = "factor"), Fin6 = structure(c(2L, 1L, 4L, 3L), .Label = c("", ? ? "30/06/1994", "30/09/2003", "31/01/1990"), class = "factor"), ? ? Cat7 = c(7L, 7L, 7L, 7L), D?but7 = structure(c(2L, 4L, 1L, ? ? 3L), .Label = c("01/02/1990", "01/07/1994", "01/10/2003", ? ? "13/01/1986"), class = "factor"), Fin7 = structure(c(3L, ? ? 1L, 2L, 4L), .Label = c("30/06/1996", "31/10/2002", "31/12/2010", ? ? "31/12/4712"), class = "factor"), Cat8 = c(8L, 8L, 8L, NA ? ? ), D?but8 = structure(c(2L, 3L, 4L, 1L), .Label = c("", "01/01/2011", ? ? "01/07/1996", "01/11/2002"), class = "factor"), Fin8 = structure(c(3L, ? ? 3L, 2L, 1L), .Label = c("", "31/10/2011", "31/12/4712"), class = "factor"), ? ? Cat9 = c(NA, NA, 9L, NA), D?but9 = structure(c(1L, 1L, 2L, ? ? 1L), .Label = c("", "01/11/2011"), class = "factor"), Fin9 = structure(c(1L, ? ? 1L, 2L, 1L), .Label = c("", "31/12/4712"), class = "factor")), .Names = c("Mat", "Nom", "Sexe", "Cat4", "D?but4", "Fin4", "Cat5", "D?but5", "Fin5", "Cat6", "D?but6", "Fin6", "Cat7", "D?but7", "Fin7", "Cat8", "D?but8", "Fin8", "Cat9", "D?but9", "Fin9"), class = "data.frame", row.names = c(NA, -4L)) Any idea ? Thank you -- Michel ARNAUD Charg? de mission aupr?s du DRH DGDRD-Drh - TA 174/04 Av Agropolis 34398 Montpellier cedex 5 tel : 04.67.61.75.38 fax : 04.67.61.57.87 port: 06.47.43.55.31 ______________________________________________ R-help at r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.