HI,
Your desired output is not clear.? May be this helps:
#dat1 is the dataset
dat1$ID<- 1:nrow(dat1)
library(reshape2)
res1<-melt(dat1,id.vars=c("ID","DSYSRTKY"))
res1$value<-res1$value!=""
res1[,2]<- as.integer(as.character(res1[,2]))
?res1[,3]<-as.character(res1[,3])
?colnames(res1)[3:4]<-c("CODE","PRIMARY")
head(res1)
#? ID? DSYSRTKY CODE PRIMARY
#1? 1 100000005?? C1??? TRUE
#2? 2 100000203?? C1??? TRUE
#3? 3 100000315?? C1??? TRUE
#4? 4 100000315?? C1??? TRUE
#5? 5 100000327?? C1??? TRUE
#6? 6 100000327?? C1??? TRUE
A.K.
----- Original Message -----
From: Dark <info at software-solutions.nl>
To: r-help at r-project.org
Cc:
Sent: Tuesday, August 13, 2013 5:46 AM
Subject: [R] Create rows for columns in dataframe
Hi experts,
I have a dataframe with 100k+ records. it has a key/id column and 25 code
columns. I would like to restructure it having a row for each code column.
I have a structure like this (used dput):
structure(list(DSYSRTKY = structure(c(1L, 2L, 3L, 3L, 4L, 4L), .Names
c("1",
"2", "3", "4", "5", "6"),
.Label = c("100000005", "100000203",
"100000315", "100000327"), class = "factor"), C1 =
structure(c(6L,
3L, 2L, 5L, 1L, 4L), .Names = c("1", "2", "3",
"4", "5", "6"), .Label c("41401",
"42831", "45341", "486", "5990",
"71535"), class = "factor"),
? ? C2 = structure(c(5L, 1L, 3L, 6L, 4L, 2L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = c("4019", "51881", "5990",
? ? "6826", "78900", "V4986"), class =
"factor"), C3 = structure(c(6L,
? ? 3L, 5L, 2L, 4L, 1L), .Names = c("1", "2", "3",
"4", "5",
? ? "6"), .Label = c("5119", "5939",
"72400", "7850", "8052",
? ? "V1251"), class = "factor"), C4 = structure(c(6L, 5L,
3L,
? ? 1L, 2L, 4L), .Names = c("1", "2", "3",
"4", "5", "6"), .Label c("3109",
? ? "4019", "4241", "42789", "V1011",
"V454"), class = "factor"),
? ? C5 = structure(c(1L, 1L, 3L, 1L, 2L, 4L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = c("", "2720", "4019",
? ? "7823"), class = "factor"), C6 = structure(c(1L, 1L, 2L,
? ? 1L, 4L, 3L), .Names = c("1", "2", "3",
"4", "5", "6"), .Label = c("",
? ? "311", "41400", "49390"), class =
"factor"), C7 = structure(c(1L,
? ? 1L, 2L, 1L, 3L, 4L), .Names = c("1", "2", "3",
"4", "5",
? ? "6"), .Label = c("", "2724", "2859",
"V4581"), class = "factor"),
? ? C8 = structure(c(1L, 1L, 3L, 1L, 4L, 2L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = c("", "40390", "71680",
? ? "79029"), class = "factor"), C9 = structure(c(1L, 1L,
2L,
? ? 1L, 4L, 3L), .Names = c("1", "2", "3",
"4", "5", "6"), .Label = c("",
? ? "4168", "5859", "V1582"), class =
"factor"), C10 = structure(c(1L,
? ? 1L, 3L, 1L, 1L, 2L), .Names = c("1", "2", "3",
"4", "5",
? ? "6"), .Label = c("", "49390",
"7804"), class = "factor"),
? ? C11 = structure(c(1L, 1L, 3L, 1L, 1L, 2L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = c("", "2724", "V066"), class
"factor"),
? ? C12 = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = c("", "6930"), class = "factor"),
? ? C13 = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = c("", "41400"), class = "factor"),
? ? C14 = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = c("", "V4581"), class = "factor"),
? ? C15 = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = c("", "40291"), class = "factor"),
? ? C16 = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = c("", "4280"), class = "factor"),
? ? C17 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = "", class = "factor"),
? ? C18 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = "", class = "factor"),
? ? C19 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = "", class = "factor"),
? ? C20 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = "", class = "factor"),
? ? C21 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = "", class = "factor"),
? ? C22 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = "", class = "factor"),
? ? C23 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = "", class = "factor"),
? ? C24 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = "", class = "factor"),
? ? C25 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1",
? ? "2", "3", "4", "5", "6"),
.Label = "", class = "factor")), .Names
c("DSYSRTKY",
"C1", "C2", "C3", "C4", "C5",
"C6", "C7", "C8", "C9", "C10",
"C11", "C12", "C13", "C14",
"C15", "C16", "C17", "C18",
"C19",
"C20", "C21", "C22", "C23",
"C24", "C25"), row.names = c("1",
"2", "3", "4", "5", "6"),
class = "data.frame")
Now I want to restructure this dataframe not having 25 code fields but a row
for each code but only if the code has a value!
The new structure should look something like:
NewDataFrame <- data.frame(ID=integer(), DSYSRTKY=integer(),
CODE=character(),? PRIMAIRY=logical())
The ID column should just be an increment. PRIMAIRY is a boolean which
should be true if orriginally was the first code (C1).
It has to be efficient since my real data has many more rows than my example
structure of only 6 rows.
I tried some looping mechanism and it was working but it was not performing
at all.
Hopefully I provided enough information using dput.
Regards Derk
--
View this message in context:
http://r.789695.n4.nabble.com/Create-rows-for-columns-in-dataframe-tp4673607.html
Sent from the R help mailing list archive at Nabble.com.
______________________________________________
R-help at r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.