Hi,
Please use ?dput() to show the datasets as one of the rows (Id "four")
in first dataset didn't show 11 elements.
df1 <- structure(list(Id = c("one", "one",
"two", "two", "three", "three",
"three", "four", "five", "five"), col1 =
c("a1", NA, "b1", "b1",
NA, NA, "c1", "d1", "e1", NA), col2 =
c("a2", NA, "b2", "b2",
"c2", NA, "c2", "D2", "e2",
"e2"), col3 = c("a3", "a3", "b3",
"b3", "c3", "c3", "c3", "d3",
"E3", "e3"), col4 = c("a4", "a4",
"B4", "b4", "c4", "c4", "c4",
"d4", "e4", "E4"), col5 = c("a5",
"a5", "b5", "b5", "C5", "c5",
"c5", "d5", "e5", "e5"), col6 =
c("A6",
"a6", "b6", "B6", "c6", "c6",
"C6", "d6", "e6", "e6"), col7 =
c("a7",
"A7", "b7", "b7", "c7", "c7",
"c7", NA, "e7", "e7"), col8 = c("a8",
"a8", "b8", "b8", "c8", "c8",
"c8", NA, "e8", "e8"), col9 = c("a9",
"a9", "b9", "b9", "c9", "C9",
NA, "", "e9", "e9"), col10 = c(NA,
"a10", "b10", "b10", NA, "c10", NA,
"", NA, "e10")), .Names = c("Id",
"col1", "col2", "col3", "col4",
"col5", "col6", "col7", "col8",
"col9", "col10"), class = "data.frame", row.names
= c(NA, -10L
))
df2 <- structure(list(Id = c("one", "one",
"two", "two", "three", "three",
"three", "four", "five", "five"), colnew
= c("A6", "A7", "B4",
"B6", "C5", "C9", "C6", "D2",
"E3", "E4")), .Names = c("Id",
"colnew"), class = "data.frame", row.names = c(NA, -10L))
#expected result
res <- structure(list(Id = c("one", "two",
"three", "four", "five"),
??? col1 = c("a1", "b1", "c1", "d1",
"e1"), col2 = c("a2", "b2",
??? "c2", "D2", "e2"), col3 = c("a3",
"b3", "c3", "d3", "E3"),
??? col4 = c("a4", "B4", "c4", "d4",
"E4"), col5 = c("a5", "b5",
??? "C5", "d5", "e5"), col6 = c("A6",
"B6", "C6", "d6", "e6"),
??? col7 = c("A7", "b7", "c7", NA,
"e7"), col8 = c("a8", "b8",
??? "c8", NA, "e8"), col9 = c("a9",
"b9", "C9", "", "e9"), col10 =
c("a10",
??? "b10", "c10", "", "e10")), .Names =
c("Id", "col1", "col2",
"col3", "col4", "col5", "col6",
"col7", "col8", "col9", "col10"
), class = "data.frame", row.names = c(NA, -5L))
##there would be simple ways to perform this operation.
res1 <- as.data.frame(t(sapply(split(df1, df1$Id), function(x) {
??? x1 <- x[, -1]
??? c(Id = unique(x[, 1]), apply(x1, 2, function(y) {
??????? y1 <- unique(y[!is.na(y)])
??????? y2 <- if (length(y1) == 0) NA else y1
??????? if (any(y2 %in% df2$colnew)) unique(toupper(y2)) else y2
??? }))
})), stringsAsFactors = FALSE)
res1 <- res1[order(gsub("\\d+", "", res1$col1)), ]
row.names(res1) <- 1:nrow(res1)
identical(res, res1)
# [1] TRUE
A.K.
I am stuck in a situation and seek urgent help!.
I have a DF something like this;
Id??????? col1? col2? col3? col4? col5? col6?? col7?? col8? col9? col10
one????? a1??? a2??? a3???? a4??? a5??? A6???? a7???? a8??? a9??? NA
one????? NA??? NA??? a3???? a4??? a5??? a6???? A7???? a8??? a9??? a10
two????? b1??? b2???? b3???? B4??? b5??? b6???? b7??? b8???? b9?? b10
two????? b1??? b2???? b3???? b4??? b5??? B6???? b7??? b8???? b9?? b10
three??? NA?? c2????? c3??? c4??? C5??? c6???? c7??? c8???? c9?? NA
three??? NA??? NA??? c3??? c4??? c5??? c6???? c7??? c8???? C9?? c10
three??? c1??? c2???? c3??? c4??? c5??? C6???? c7??? c8??? NA??? NA
four????? d1??? D2???? d3???? d4??? d5??? d6?? NA?? NA?
five????? e1??? e2??? E3???? e4???? e5???? e6??? e7?? e8???? e9?? NA
five????? NA??? e2??? e3???? E4???? e5???? e6??? e7?? e8???? e9?? e10
* each row is different and some has NA.
* the capital letters in some cells are key values which will be useful for
further analysis
I have another DF which has only the key values
Id???? colnew???
one???? A6???
one???? A7
two???? B4
two???? B6
three?? C5
three?? C9
three?? C6
four???? D2?
five???? E3
five???? E4
Now,
I need to aggregate the first DF? based on "ID" values to get
"unique" entries for each "ID" so that the output should
look like the below
Id??????? col1? col2? col3? col4? col5? col6?? col7?? col8? col9? col10
one????? a1??? a2??? a3???? a4??? a5??? A6???? A7???? a8??? a9??? a10
two????? b1??? b2???? b3???? B4??? b5??? B6???? b7??? b8???? b9?? b10
three??? c1?? c2????? c3??? c4??? C5??? C6???? c7??? c8???? C9?? c10
four????? d1??? D2???? d3???? d4??? d5??? d6?? NA?? NA?
five????? e1??? e2??? E3???? E4???? e5???? e6??? e7?? e8???? e9?? e10
Thanks for the help
Regards,
karthick