Hi R-experts, I have a data.frame that I want to reshape to a certain format so I can use it in a tool for further analysis. Basicly I have a very long list with IDs of persons and their codes. I create a row for every person with 25 of their codes. I a person has more then 25 codes, I want to add another row for that person. If a row contains less then 25 codes I want to fill with empty string values. I have manually created a sample rawData and resultData and used dput so you can see my starting DF and the wanted result DF. The sample is of very limited size, the real data would contain a few million(!) records. rawData <- structure(list(PersonID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Person1", "Person2", "Person3", "Person4", "Person5"), class = "factor"), codes = c(34396L, 81878L, 67829L, 13428L, 12992L, 63724L, 85930L, 78497L, 59578L, 50733L, 26154L, 47205L, 74578L, 12204L, 42435L, 96643L, 35242L, 29836L, 73031L, 11326L, 96686L, 55849L, 56415L, 11064L, 78509L, 55715L, 75851L, 60682L, 16277L, 52763L, 23429L, 39723L, 95809L, 60081L, 19618L, 46012L, 79188L, 54664L, 64420L, 72875L, 97428L, 74897L, 75615L, 12023L, 21572L, 56177L, 61704L, 70879L, 69033L, 87224L, 68670L, 65602L, 25476L, 81209L, 62086L, 35492L, 39771L, 14380L, 43858L, 53679L, 78023L, 43785L, 69884L, 12840L, 54021L, 68002L, 79249L, 61784L, 22227L, 28935L, 91406L, 42045L, 97716L, 65690L, 57310L, 57627L, 32227L, 43121L, 22251L, 31255L, 90660L, 89118L, 14558L, 99824L, 25005L, 62186L, 10527L, 99438L, 85656L, 79465L, 35357L, 41697L, 83084L, 83590L, 16234L, 32480L, 50991L, 79524L, 93888L, 32637L, 13253L, 76576L, 48632L, 68014L, 24281L, 74320L, 44601L, 36251L, 27825L, 85569L, 21634L, 50364L, 74436L, 73216L, 89342L, 63562L, 88485L, 40552L, 49359L, 29636L, 26285L, 13263L, 18106L, 78589L, 43479L, 12491L, 50840L, 77453L, 80578L, 43693L, 89857L, 12837L, 55950L, 63049L, 84508L, 29736L, 88194L, 86849L, 54274L, 38713L)), .Names = c("PersonID", "codes"), row.names = c(NA, -140L), class = "data.frame") resultData = structure(list(PersonId = c("Person1", "Person1", "Person2", "Person3", "Person4", "Person5", "Person5", "Person5"), Code1 = c(34396, 55715, 97428, 56177, 68002, 90660, 74320, 89857), Code2 = c(81878, 75851, 74897, 61704, 79249, 89118, 44601, 12837), Code3 = c(67829, 60682, 75615, 70879, 61784, 14558, 36251, 55950), Code4 = c(13428, 16277, 12023, 69033, 22227, 99824, 27825, 63049), Code5 = c(12992, 52763, 21572, 87224, 28935, 25005, 85569, 84508), Code6 = c("63724", "23429", "", "68670", "91406", "62186", "21634", "29736"), Code7 c("85930", "39723", "", "65602", "42045", "10527", "50364", "88194"), Code8 c("78497", "95809", "", "25476", "97716", "99438", "74436", "86849"), Code9 c("59578", "60081", "", "81209", "65690", "85656", "73216", "54274"), Code10 c("50733", "19618", "", "62086", "57310", "79465", "89342", "38713"), Code11 c("26154", "46012", "", "35492", "57627", "35357", "63562", ""), Code12 = c("47205", "79188", "", "39771", "32227", "41697", "88485", ""), Code13 = c("74578", "54664", "", "14380", "43121", "83084", "40552", ""), Code14 = c("12204", "64420", "", "43858", "22251", "83590", "49359", ""), Code15 = c("42435", "72875", "", "53679", "31255", "16234", "29636", ""), Code16 = c("96643", "", "", "", "78023", "32480", "26285", ""), Code17 = c("35242", "", "", "", "43785", "50991", "13263", ""), Code18 = c("29836", "", "", "", "69884", "79524", "18106", ""), Code19 = c("73031", "", "", "", "12840", "93888", "78589", ""), Code20 = c("11326", "", "", "", "54021", "32637", "43479", ""), Code21 = c("96686", "", "", "", "", "13253", "12491", ""), Code22 = c("55849", "", "", "", "", "76576", "50840", ""), Code23 = c("56415", "", "", "", "", "48632", "77453", ""), Code24 = c("11064", "", "", "", "", "68014", "80578", ""), Code25 = c("78509", "", "", "", "", "24281", "43693", "")), .Names = c("PersonId", "Code1", "Code2", "Code3", "Code4", "Code5", "Code6", "Code7", "Code8", "Code9", "Code10", "Code11", "Code12", "Code13", "Code14", "Code15", "Code16", "Code17", "Code18", "Code19", "Code20", "Code21", "Code22", "Code23", "Code24", "Code25"), row.names = c(NA, -8L), class = "data.frame") This sample data explains very well what I'm trying to achieve. As you can see there are 2 rows for Person1 and 3 rows for Person 5 because they have respectively 40 and 60 codes. I'm a big fan of the data.table package so maybe someone has an solution using that package? But of course any solution is welcome:-) Thanks for any help in advance, Regards Dark -- View this message in context: http://r.789695.n4.nabble.com/Reshape-large-Data-Frame-to-new-format-tp4687431.html Sent from the R help mailing list archive at Nabble.com.