Hello
I have a large dataframe (nrow=55000).
This below df1 an extract of the original dataframe
dput(df1)
structure(list(Cat = c(6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4,
4, 8, 8, 9, 9, 9, 9, 9, 9, 9, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2, 6, 7, 7,
7, 7, 7), Ech = c(8, 9, 10, 11, 12, 12, 13, 13, 11, 12, 13, 14,
14, 14, 9, 10, 5, 6, 7, 7, 7, 7, 7, 7, 8, 9, 10, 11, 11, 11,
4, 5, 6, 7, 8, 8, 8, 9, 9, 8, 9, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5,
5, 1, 2, 3, 4, 5, 6, 6, 6, 7, 7, 7, 5, 6, 7, 8, 9, 9, 10, 10,
11, 11, 11, 11, 11, 11, 15, 5, 6, 7, 7, 8, 8, 8, 8, 9, 9, 13,
13, 14, 15, 15, 15, 10, 1, 2, 3, 4, 5)), .Names = c("Cat",
"Ech"
), row.names = c("1", "2", "3", "4",
"5", "6", "7", "8", "10",
"11", "12", "13", "14", "15",
"17", "18", "19", "20", "21",
"22",
"23", "24", "25", "27", "28",
"29", "30", "31", "32", "33",
"35",
"36", "37", "38", "39", "40",
"41", "42", "43", "45", "46",
"47",
"48", "49", "50", "51", "52",
"53", "54", "55", "56", "57",
"59",
"60", "61", "62", "63", "64",
"65", "66", "67", "68", "69",
"71",
"72", "73", "74", "75", "76",
"77", "78", "79", "80", "81",
"82",
"83", "84", "86", "87", "88",
"89", "90", "91", "92", "93",
"94",
"95", "96", "98", "99", "100",
"101", "102", "103", "105",
"106",
"107", "108", "109", "110"), class =
"data.frame")
I do not manage to avoid a do loop because very slow
I want to obtain a new dataframe df2 with a new variable CatEch.
CatEch is the paste of the 2 variables Cat and Ech
dput(df2)
structure(list(Cat = c(6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4,
4, 8, 8, 9, 9, 9, 9, 9, 9, 9, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2, 6, 7, 7,
7, 7, 7), Ech = c(8, 9, 10, 11, 12, 12, 13, 13, 11, 12, 13, 14,
14, 14, 9, 10, 5, 6, 7, 7, 7, 7, 7, 7, 8, 9, 10, 11, 11, 11,
4, 5, 6, 7, 8, 8, 8, 9, 9, 8, 9, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5,
5, 1, 2, 3, 4, 5, 6, 6, 6, 7, 7, 7, 5, 6, 7, 8, 9, 9, 10, 10,
11, 11, 11, 11, 11, 11, 15, 5, 6, 7, 7, 8, 8, 8, 8, 9, 9, 13,
13, 14, 15, 15, 15, 10, 1, 2, 3, 4, 5), CatEch = c("6.08",
"6.09",
"6.10", "6.11", "6.12", "6.12",
"6.13", "6.13", "4.11", "4.12",
"4.13", "4.14", "4.14", "4.14",
"8.09", "8.10", "9.05", "9.06",
"9.07", "9.07", "9.07", "9.07",
"9.07", "6.07", "6.08", "6.09",
"6.10", "6.11", "6.11", "6.11",
"7.04", "7.05", "7.06", "7.07",
"7.08", "7.08", "7.08", "7.09",
"7.09", "7.08", "7.09", "8.02",
"8.03", "8.04", "8.05", "8.05",
"8.05", "8.05", "8.05", "8.05",
"8.05", "8.05", "8.01", "8.02",
"8.03", "8.04", "8.05", "8.06",
"8.06", "8.06", "8.07", "8.07",
"8.07", "6.05", "6.06", "6.07",
"6.08", "6.09", "6.09", "6.10",
"6.10", "6.11", "6.11", "6.11",
"6.11", "6.11", "6.11", "7.15",
"8.05", "8.06", "8.07", "8.07",
"8.08", "8.08", "8.08", "8.08",
"8.09", "8.09", "2.13", "2.13",
"2.14", "2.15", "2.15", "2.15",
"6.10", "7.01", "7.02", "7.03",
"7.04", "7.05")), .Names = c("Cat",
"Ech", "CatEch"), row.names = c("1",
"2", "3", "4", "5", "6",
"7", "8", "10", "11", "12",
"13", "14",
"15", "17", "18", "19", "20",
"21", "22", "23", "24", "25",
"27",
"28", "29", "30", "31", "32",
"33", "35", "36", "37", "38",
"39",
"40", "41", "42", "43", "45",
"46", "47", "48", "49", "50",
"51",
"52", "53", "54", "55", "56",
"57", "59", "60", "61", "62",
"63",
"64", "65", "66", "67", "68",
"69", "71", "72", "73", "74",
"75",
"76", "77", "78", "79", "80",
"81", "82", "83", "84", "86",
"87",
"88", "89", "90", "91", "92",
"93", "94", "95", "96", "98",
"99",
"100", "101", "102", "103",
"105", "106", "107", "108",
"109",
"110"), class = "data.frame")
Any idea ?
--
Michel ARNAUD
Charg? de mission aupr?s du DRH
DGDRD-Drh - TA 174/04
Av Agropolis 34398 Montpellier cedex 5
tel : 04.67.61.75.38
fax : 04.67.61.57.87
port: 06.47.43.55.31
Hi,
Try:
df1$CatEch<-paste0(df1[,1],".",sprintf("%02d",df1[,2]))
?identical(df1,df2)
#[1] TRUE
A.K.
----- Original Message -----
From: Arnaud Michel <michel.arnaud at cirad.fr>
To: R help <r-help at r-project.org>
Cc:
Sent: Sunday, September 8, 2013 12:22 PM
Subject: [R] to avoid a do loop
Hello
I have a large dataframe? (nrow=55000).
This below df1 an extract of the original dataframe
dput(df1)
structure(list(Cat = c(6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4,
4, 8, 8, 9, 9, 9, 9, 9, 9, 9, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2, 6, 7, 7,
7, 7, 7), Ech = c(8, 9, 10, 11, 12, 12, 13, 13, 11, 12, 13, 14,
14, 14, 9, 10, 5, 6, 7, 7, 7, 7, 7, 7, 8, 9, 10, 11, 11, 11,
4, 5, 6, 7, 8, 8, 8, 9, 9, 8, 9, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5,
5, 1, 2, 3, 4, 5, 6, 6, 6, 7, 7, 7, 5, 6, 7, 8, 9, 9, 10, 10,
11, 11, 11, 11, 11, 11, 15, 5, 6, 7, 7, 8, 8, 8, 8, 9, 9, 13,
13, 14, 15, 15, 15, 10, 1, 2, 3, 4, 5)), .Names = c("Cat",
"Ech"
), row.names = c("1", "2", "3", "4",
"5", "6", "7", "8", "10",
"11", "12", "13", "14", "15",
"17", "18", "19", "20", "21",
"22",
"23", "24", "25", "27", "28",
"29", "30", "31", "32", "33",
"35",
"36", "37", "38", "39", "40",
"41", "42", "43", "45", "46",
"47",
"48", "49", "50", "51", "52",
"53", "54", "55", "56", "57",
"59",
"60", "61", "62", "63", "64",
"65", "66", "67", "68", "69",
"71",
"72", "73", "74", "75", "76",
"77", "78", "79", "80", "81",
"82",
"83", "84", "86", "87", "88",
"89", "90", "91", "92", "93",
"94",
"95", "96", "98", "99", "100",
"101", "102", "103", "105",
"106",
"107", "108", "109", "110"), class =
"data.frame")
I do not manage to avoid a do loop because very slow
I want to obtain a new dataframe df2 with a new variable CatEch.
CatEch is the paste of the 2 variables Cat and Ech
dput(df2)
structure(list(Cat = c(6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4,
4, 8, 8, 9, 9, 9, 9, 9, 9, 9, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2, 6, 7, 7,
7, 7, 7), Ech = c(8, 9, 10, 11, 12, 12, 13, 13, 11, 12, 13, 14,
14, 14, 9, 10, 5, 6, 7, 7, 7, 7, 7, 7, 8, 9, 10, 11, 11, 11,
4, 5, 6, 7, 8, 8, 8, 9, 9, 8, 9, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5,
5, 1, 2, 3, 4, 5, 6, 6, 6, 7, 7, 7, 5, 6, 7, 8, 9, 9, 10, 10,
11, 11, 11, 11, 11, 11, 15, 5, 6, 7, 7, 8, 8, 8, 8, 9, 9, 13,
13, 14, 15, 15, 15, 10, 1, 2, 3, 4, 5), CatEch = c("6.08",
"6.09",
"6.10", "6.11", "6.12", "6.12",
"6.13", "6.13", "4.11", "4.12",
"4.13", "4.14", "4.14", "4.14",
"8.09", "8.10", "9.05", "9.06",
"9.07", "9.07", "9.07", "9.07",
"9.07", "6.07", "6.08", "6.09",
"6.10", "6.11", "6.11", "6.11",
"7.04", "7.05", "7.06", "7.07",
"7.08", "7.08", "7.08", "7.09",
"7.09", "7.08", "7.09", "8.02",
"8.03", "8.04", "8.05", "8.05",
"8.05", "8.05", "8.05", "8.05",
"8.05", "8.05", "8.01", "8.02",
"8.03", "8.04", "8.05", "8.06",
"8.06", "8.06", "8.07", "8.07",
"8.07", "6.05", "6.06", "6.07",
"6.08", "6.09", "6.09", "6.10",
"6.10", "6.11", "6.11", "6.11",
"6.11", "6.11", "6.11", "7.15",
"8.05", "8.06", "8.07", "8.07",
"8.08", "8.08", "8.08", "8.08",
"8.09", "8.09", "2.13", "2.13",
"2.14", "2.15", "2.15", "2.15",
"6.10", "7.01", "7.02", "7.03",
"7.04", "7.05")), .Names = c("Cat",
"Ech", "CatEch"), row.names = c("1",
"2", "3", "4", "5", "6",
"7", "8", "10", "11", "12",
"13", "14",
"15", "17", "18", "19", "20",
"21", "22", "23", "24", "25",
"27",
"28", "29", "30", "31", "32",
"33", "35", "36", "37", "38",
"39",
"40", "41", "42", "43", "45",
"46", "47", "48", "49", "50",
"51",
"52", "53", "54", "55", "56",
"57", "59", "60", "61", "62",
"63",
"64", "65", "66", "67", "68",
"69", "71", "72", "73", "74",
"75",
"76", "77", "78", "79", "80",
"81", "82", "83", "84", "86",
"87",
"88", "89", "90", "91", "92",
"93", "94", "95", "96", "98",
"99",
"100", "101", "102", "103",
"105", "106", "107", "108",
"109",
"110"), class = "data.frame")
Any idea ?
--
Michel ARNAUD
Charg? de mission aupr?s du DRH
DGDRD-Drh - TA 174/04
Av Agropolis 34398 Montpellier cedex 5
tel : 04.67.61.75.38
fax : 04.67.61.57.87
port: 06.47.43.55.31
______________________________________________
R-help at r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.
Hello,
Try the following.
df3 <- df1
df3$CatEch <- paste(df1$Cat, sprintf("%02d", df1$Ech), sep =
".")
identical(df2, df3) # TRUE
Hope this helps,
Rui Barradas
Em 08-09-2013 17:22, Arnaud Michel escreveu:> Hello
> I have a large dataframe (nrow=55000).
> This below df1 an extract of the original dataframe
>
> dput(df1)
> structure(list(Cat = c(6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4,
> 4, 8, 8, 9, 9, 9, 9, 9, 9, 9, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7,
> 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
> 8, 8, 8, 8, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
> 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2, 6, 7, 7,
> 7, 7, 7), Ech = c(8, 9, 10, 11, 12, 12, 13, 13, 11, 12, 13, 14,
> 14, 14, 9, 10, 5, 6, 7, 7, 7, 7, 7, 7, 8, 9, 10, 11, 11, 11,
> 4, 5, 6, 7, 8, 8, 8, 9, 9, 8, 9, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5,
> 5, 1, 2, 3, 4, 5, 6, 6, 6, 7, 7, 7, 5, 6, 7, 8, 9, 9, 10, 10,
> 11, 11, 11, 11, 11, 11, 15, 5, 6, 7, 7, 8, 8, 8, 8, 9, 9, 13,
> 13, 14, 15, 15, 15, 10, 1, 2, 3, 4, 5)), .Names = c("Cat",
"Ech"
> ), row.names = c("1", "2", "3",
"4", "5", "6", "7", "8",
"10",
> "11", "12", "13", "14",
"15", "17", "18", "19", "20",
"21", "22",
> "23", "24", "25", "27",
"28", "29", "30", "31", "32",
"33", "35",
> "36", "37", "38", "39",
"40", "41", "42", "43", "45",
"46", "47",
> "48", "49", "50", "51",
"52", "53", "54", "55", "56",
"57", "59",
> "60", "61", "62", "63",
"64", "65", "66", "67", "68",
"69", "71",
> "72", "73", "74", "75",
"76", "77", "78", "79", "80",
"81", "82",
> "83", "84", "86", "87",
"88", "89", "90", "91", "92",
"93", "94",
> "95", "96", "98", "99",
"100", "101", "102", "103",
"105", "106",
> "107", "108", "109", "110"), class
= "data.frame")
>
> I do not manage to avoid a do loop because very slow
> I want to obtain a new dataframe df2 with a new variable CatEch.
> CatEch is the paste of the 2 variables Cat and Ech
> dput(df2)
> structure(list(Cat = c(6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4,
> 4, 8, 8, 9, 9, 9, 9, 9, 9, 9, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7,
> 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
> 8, 8, 8, 8, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
> 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2, 6, 7, 7,
> 7, 7, 7), Ech = c(8, 9, 10, 11, 12, 12, 13, 13, 11, 12, 13, 14,
> 14, 14, 9, 10, 5, 6, 7, 7, 7, 7, 7, 7, 8, 9, 10, 11, 11, 11,
> 4, 5, 6, 7, 8, 8, 8, 9, 9, 8, 9, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5,
> 5, 1, 2, 3, 4, 5, 6, 6, 6, 7, 7, 7, 5, 6, 7, 8, 9, 9, 10, 10,
> 11, 11, 11, 11, 11, 11, 15, 5, 6, 7, 7, 8, 8, 8, 8, 9, 9, 13,
> 13, 14, 15, 15, 15, 10, 1, 2, 3, 4, 5), CatEch = c("6.08",
"6.09",
> "6.10", "6.11", "6.12", "6.12",
"6.13", "6.13", "4.11", "4.12",
> "4.13", "4.14", "4.14", "4.14",
"8.09", "8.10", "9.05", "9.06",
> "9.07", "9.07", "9.07", "9.07",
"9.07", "6.07", "6.08", "6.09",
> "6.10", "6.11", "6.11", "6.11",
"7.04", "7.05", "7.06", "7.07",
> "7.08", "7.08", "7.08", "7.09",
"7.09", "7.08", "7.09", "8.02",
> "8.03", "8.04", "8.05", "8.05",
"8.05", "8.05", "8.05", "8.05",
> "8.05", "8.05", "8.01", "8.02",
"8.03", "8.04", "8.05", "8.06",
> "8.06", "8.06", "8.07", "8.07",
"8.07", "6.05", "6.06", "6.07",
> "6.08", "6.09", "6.09", "6.10",
"6.10", "6.11", "6.11", "6.11",
> "6.11", "6.11", "6.11", "7.15",
"8.05", "8.06", "8.07", "8.07",
> "8.08", "8.08", "8.08", "8.08",
"8.09", "8.09", "2.13", "2.13",
> "2.14", "2.15", "2.15", "2.15",
"6.10", "7.01", "7.02", "7.03",
> "7.04", "7.05")), .Names = c("Cat",
"Ech", "CatEch"), row.names = c("1",
> "2", "3", "4", "5", "6",
"7", "8", "10", "11", "12",
"13", "14",
> "15", "17", "18", "19",
"20", "21", "22", "23", "24",
"25", "27",
> "28", "29", "30", "31",
"32", "33", "35", "36", "37",
"38", "39",
> "40", "41", "42", "43",
"45", "46", "47", "48", "49",
"50", "51",
> "52", "53", "54", "55",
"56", "57", "59", "60", "61",
"62", "63",
> "64", "65", "66", "67",
"68", "69", "71", "72", "73",
"74", "75",
> "76", "77", "78", "79",
"80", "81", "82", "83", "84",
"86", "87",
> "88", "89", "90", "91",
"92", "93", "94", "95", "96",
"98", "99",
> "100", "101", "102", "103",
"105", "106", "107", "108",
"109",
> "110"), class = "data.frame")
> Any idea ?
>