Lauri Nikkinen
2010-Mar-25 18:04 UTC
[R] Problem with reshaping data from wide to long format
Hi, I have a data.frame in wide format which I would like to reshape into a long format: example (nonsense) data:> dput(perus2)structure(list(id = c(30L, 38L, 21L, 12L, 22L, 28L, 31L, 44L, 8L, 47L, 23L, 20L, 41L, 42L, 29L, 50L, 5L, 33L, 4L, 17L, 11L, 1L, 18L, 6L, 9L, 32L, 16L, 14L, 39L, 48L, 37L, 43L, 25L, 27L, 35L, 40L, 45L, 49L, 2L, 34L, 10L, 15L, 24L, 26L, 46L, 7L, 13L, 3L, 19L, 36L), height1 = c(157.10664436, 162.77883643, 162.57095167, 155.82784611, 158.17430233, 156.21247668, 159.33063095, 160.47626229, 157.54657592, 155.01414518, 158.5172723, 162.89026674, 156.08622305, 159.32250375, 162.23610226, 160.21190346, NA, 155.54527115, 155.94912976, 160.49239576, 159.29210577, 158.93378272, 159.56724207, 162.36425594, 161.60135907, 161.75075865, 162.17477449, 157.24285415, NA, NA, 160.51351242, 159.91685827, NA, 161.20032847, 163.04696627, 161.12942938, NA, 158.80416016, 155.23140708, 156.31264334, 162.37744195, 160.90847312, NA, NA, 161.25101061, 158.11268844, 157.49338051, 156.92212718, 155.53316098, 157.68964234), height2 = c(168.21926725, 161.56637515, 167.67702789, NA, 162.30948163, 167.82148015, NA, 167.22602729, 163.12053538, NA, 165.14942295, 167.87354665, 160.38044351, 164.10900509, 168.07467582, NA, NA, 163.03725294, 167.76514949, 161.69014727, 161.5809546, 161.14341254, 164.80047183, 162.05997825, 164.11475164, 166.40195203, NA, 166.0826708, 165.7232283, 164.05199508, 167.3839719, 166.66838331, 162.50198958, 167.25560972, 167.67882525, 160.43653059, 163.93406069, 160.84337152, 163.74551267, 163.51428188, 163.29711762, NA, 168.345659, NA, 168.00421653, NA, 167.63059459, NA, NA, 168.30745505 ), height3 = c(170.90466092, NA, 167.38512765, 172.00014248, 171.84958505, NA, 169.97286094, NA, NA, 169.53023204, 167.64452841, 170.94253421, NA, NA, 169.11010524, 172.06679578, NA, 168.09847664, 169.42974919, 171.17927523, 166.7736197, 169.12926314, 166.34368104, 173.18168378, NA, 170.345414, NA, 172.09205926, NA, NA, NA, NA, 170.59135788, 167.48001013, 170.83121727, 167.90871568, 169.7154018, NA, 171.11203348, 165.25337916, NA, NA, NA, 167.2210484, 166.26445432, 167.3911325, NA, NA, 169.20495164, 171.19481305), weight1 = c(NA, 42.12117886, 35.006739718, 30.104617789, 37.596524918, 32.681661769, 43.273125397, 40.919884581, 41.417712325, 32.363735308, 33.492860521, 40.679869782, 41.051721029, 39.156918258, 35.594038489, 44.788378759, 31.706213547, 37.600275868, 30.562918803, NA, 37.842347921, 34.049919571, 35.390284683, 41.169443504, 36.314510306, NA, 32.315887067, 34.815182022, 43.753041021, 42.631667022, 43.964726468, 38.665918845, 35.758478283, 31.642950399, 33.31580307, 41.171896621, 34.11696779, 33.872158362, 42.70435576, 41.153390371, 43.830697643, 35.223807744, NA, 37.210599185, 44.775833501, 30.659131352, 40.399049686, 44.368921711, 41.850601771, 37.850971621), weight2 = c(46.614122964, 49.810266131, NA, 51.600870342, 49.009289571, 44.041339142, 52.877022993, NA, 49.99494035, 40.401355811, 43.25422231, 53.470358871, 52.762958304, 52.586173237, 50.286215216, NA, 44.273412353, 40.473174437, 51.956636192, 46.237083243, 40.679582358, NA, NA, 53.008553857, 47.919812716, 48.191047925, 54.024111477, NA, 45.548940909, NA, 53.838601463, NA, 50.50111976, 53.918903127, 45.481404898, 54.465265395, 50.656535116, 53.037833591, 53.346378286, NA, 40.582047152, 44.724081659, 50.858338403, NA, NA, 40.679801628, 44.8645364, 44.011082672, 51.955119361, NA), weight3 = c(55.198326807, 54.174241705, NA, 54.069526393, 52.746444632, NA, NA, 61.469501304, 51.508762087, 58.266821929, 58.647671581, 59.09318286, 52.884083243, 63.804598792, 58.559146181, NA, 56.556534826, 51.071810157, NA, 62.371304581, NA, 55.025181044, 55.16444531, 59.571659821, 52.516719702, NA, 57.547876457, 61.692374636, 57.952322011, 64.959944764, 61.631626094, 50.46350485, 51.336554259, 60.036043059, NA, NA, 52.306778299, 50.312191646, 57.281096013, 52.174733394, 51.751740252, NA, 61.142851937, 64.93667359, 57.041472991, 56.285936174, 63.046773681, NA, NA, 55.571892474), sp = c(0L, 0L, 0L, 0L, 1L, 0L, NA, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, NA, 0L, 1L, 0L, 1L, NA, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, NA, 1L, 0L, 0L, 0L, 1L, 1L), bmi1 = c(NA, 15.8966143320452, 13.245421082447, 12.3977530712043, 15.027123283702, 13.3928404493200, 17.0458911682568, 15.8895939465852, 16.6866120773622, 13.4683992248259, 13.3290457842967, 15.3316637234161, 16.8500900221585, 15.4260331046336, 13.5232867850418, 17.4492104953802, NA, 15.540944398053, 12.5669219098308, NA, 14.9138430142300, 13.4798062642435, 13.8994168072589, 15.6168753286232, 13.9056141468016, NA, 12.2871023625512, 14.0807839118743, NA, NA, 17.0640132231600, 15.1195838206639, NA, 12.1771351358901, 12.5321285552922, 15.8580993895571, NA, 13.4313331175885, 17.7220103647077, 16.8429207495369, 16.6236731966879, 13.6043712362307, NA, NA, 17.2202236903033, 12.2638378494873, 16.2872041214028, 18.0181625678781, 17.3003826973727, 15.2219641887793), bmi2 = c(16.4727475841863, 19.081692780389, NA, NA, 18.6033272435455, 15.6374315460333, NA, NA, 18.7892211535392, NA, 15.8589484308730, 18.9735487397628, 20.5128647833983, 19.5257058569701, 17.8009993860965, NA, NA, 15.2262715351502, 18.4602479341394, 17.6857432790636, 15.5810293774179, NA, NA, 20.1834035524235, 17.7917993591549, 17.4040185299102, NA, NA, 16.5848486941011, NA, 19.2161345801121, NA, 19.1242161059593, 19.2743465419037, 16.1762342002915, 21.1598751272938, 18.8493807769291, 20.5012074556641, 19.8960161913625, NA, 15.2186774698114, NA, 17.9456140418110, NA, NA, NA, 15.9660179598009, NA, NA, NA), bmi3 = c(18.8980981166013, NA, NA, 18.2765808591596, 17.8606070520157, NA, NA, NA, NA, 20.2734200134073, 20.8675761771662, 20.2226053401634, NA, NA, 20.4764948398200, NA, NA, 18.0739757601718, NA, 21.2854316398646, NA, 19.2364074182253, 19.9363954704867, 19.8625857537345, NA, NA, NA, 20.8309858751195, NA, NA, NA, NA, 17.6405719924585, 21.4035643852817, NA, NA, 18.1599838638817, NA, 19.563664852469, 19.1055498419989, NA, NA, NA, 23.2224562874904, 20.6344030442757, 20.087921316982, NA, NA, NA, 18.9615564192124), spf = structure(c(2L, 2L, 2L, 2L, 1L, 2L, NA, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, NA, 2L, 1L, 2L, 1L, NA, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, NA, 1L, 2L, 2L, 2L, 1L, 1L), .Label = c("Female", "Male"), class = "factor")), .Names = c("id", "height1", "height2", "height3", "weight1", "weight2", "weight3", "sp", "bmi1", "bmi2", "bmi3", "spf"), row.names = c(NA, -50L), class "data.frame")>I'm doing this but I get an error:> perus2_trans <- reshape(perus2, varying = list(c("height1", "height2", "height3"),+ c("weight1", "weight2", "weight3"), + c("bmi1", "bmi2", "bmi3")), + v.names=c("heigth","weigth", "bmi"), + timevar = "time", + times = factor(c(1, 2, 3)), + idvar = c("id", "sp"), + direction = "long") Error in `row.names<-.data.frame`(`*tmp*`, value = c("30.0.1", "38.0.1", : duplicate 'row.names' are not allowed In addition: Warning message: non-unique value when setting 'row.names': ?NA.1?>This will fix this error, but I would like to avoid this kind of tweak for NA values:> perus2$sp[is.na(perus2$sp)] <- 999 > perus2_trans <- reshape(perus2, varying = list(c("height1", "height2", "height3"),+ c("weight1", "weight2", "weight3"), + c("bmi1", "bmi2", "bmi3")), + v.names=c("heigth","weigth", "bmi"), + timevar = "time", + times = factor(c(1, 2, 3)), + idvar = c("id", "sp"), + direction = "long")> perus2_trans <- perus2_trans[order(perus2_trans$id, perus2_trans$time),] > perus2_trans$sp[perus2_trans$sp == 999] <- NAHow to avoid this <- 999 substitute? Thanks in advance. -Lauri
Gabor Grothendieck
2010-Mar-25 18:31 UTC
[R] Problem with reshaping data from wide to long format
Try new.row.names = 1:150 as an arg to reshape. On Thu, Mar 25, 2010 at 2:04 PM, Lauri Nikkinen <lauri.nikkinen at iki.fi> wrote:> Hi, > > I have a data.frame in wide format which I would like to reshape into > a long format: > > example (nonsense) data: > >> dput(perus2) > structure(list(id = c(30L, 38L, 21L, 12L, 22L, 28L, 31L, 44L, > 8L, 47L, 23L, 20L, 41L, 42L, 29L, 50L, 5L, 33L, 4L, 17L, 11L, > 1L, 18L, 6L, 9L, 32L, 16L, 14L, 39L, 48L, 37L, 43L, 25L, 27L, > 35L, 40L, 45L, 49L, 2L, 34L, 10L, 15L, 24L, 26L, 46L, 7L, 13L, > 3L, 19L, 36L), height1 = c(157.10664436, 162.77883643, 162.57095167, > 155.82784611, 158.17430233, 156.21247668, 159.33063095, 160.47626229, > 157.54657592, 155.01414518, 158.5172723, 162.89026674, 156.08622305, > 159.32250375, 162.23610226, 160.21190346, NA, 155.54527115, 155.94912976, > 160.49239576, 159.29210577, 158.93378272, 159.56724207, 162.36425594, > 161.60135907, 161.75075865, 162.17477449, 157.24285415, NA, NA, > 160.51351242, 159.91685827, NA, 161.20032847, 163.04696627, 161.12942938, > NA, 158.80416016, 155.23140708, 156.31264334, 162.37744195, 160.90847312, > NA, NA, 161.25101061, 158.11268844, 157.49338051, 156.92212718, > 155.53316098, 157.68964234), height2 = c(168.21926725, 161.56637515, > 167.67702789, NA, 162.30948163, 167.82148015, NA, 167.22602729, > 163.12053538, NA, 165.14942295, 167.87354665, 160.38044351, 164.10900509, > 168.07467582, NA, NA, 163.03725294, 167.76514949, 161.69014727, > 161.5809546, 161.14341254, 164.80047183, 162.05997825, 164.11475164, > 166.40195203, NA, 166.0826708, 165.7232283, 164.05199508, 167.3839719, > 166.66838331, 162.50198958, 167.25560972, 167.67882525, 160.43653059, > 163.93406069, 160.84337152, 163.74551267, 163.51428188, 163.29711762, > NA, 168.345659, NA, 168.00421653, NA, 167.63059459, NA, NA, 168.30745505 > ), height3 = c(170.90466092, NA, 167.38512765, 172.00014248, > 171.84958505, NA, 169.97286094, NA, NA, 169.53023204, 167.64452841, > 170.94253421, NA, NA, 169.11010524, 172.06679578, NA, 168.09847664, > 169.42974919, 171.17927523, 166.7736197, 169.12926314, 166.34368104, > 173.18168378, NA, 170.345414, NA, 172.09205926, NA, NA, NA, NA, > 170.59135788, 167.48001013, 170.83121727, 167.90871568, 169.7154018, > NA, 171.11203348, 165.25337916, NA, NA, NA, 167.2210484, 166.26445432, > 167.3911325, NA, NA, 169.20495164, 171.19481305), weight1 = c(NA, > 42.12117886, 35.006739718, 30.104617789, 37.596524918, 32.681661769, > 43.273125397, 40.919884581, 41.417712325, 32.363735308, 33.492860521, > 40.679869782, 41.051721029, 39.156918258, 35.594038489, 44.788378759, > 31.706213547, 37.600275868, 30.562918803, NA, 37.842347921, 34.049919571, > 35.390284683, 41.169443504, 36.314510306, NA, 32.315887067, 34.815182022, > 43.753041021, 42.631667022, 43.964726468, 38.665918845, 35.758478283, > 31.642950399, 33.31580307, 41.171896621, 34.11696779, 33.872158362, > 42.70435576, 41.153390371, 43.830697643, 35.223807744, NA, 37.210599185, > 44.775833501, 30.659131352, 40.399049686, 44.368921711, 41.850601771, > 37.850971621), weight2 = c(46.614122964, 49.810266131, NA, 51.600870342, > 49.009289571, 44.041339142, 52.877022993, NA, 49.99494035, 40.401355811, > 43.25422231, 53.470358871, 52.762958304, 52.586173237, 50.286215216, > NA, 44.273412353, 40.473174437, 51.956636192, 46.237083243, 40.679582358, > NA, NA, 53.008553857, 47.919812716, 48.191047925, 54.024111477, > NA, 45.548940909, NA, 53.838601463, NA, 50.50111976, 53.918903127, > 45.481404898, 54.465265395, 50.656535116, 53.037833591, 53.346378286, > NA, 40.582047152, 44.724081659, 50.858338403, NA, NA, 40.679801628, > 44.8645364, 44.011082672, 51.955119361, NA), weight3 = c(55.198326807, > 54.174241705, NA, 54.069526393, 52.746444632, NA, NA, 61.469501304, > 51.508762087, 58.266821929, 58.647671581, 59.09318286, 52.884083243, > 63.804598792, 58.559146181, NA, 56.556534826, 51.071810157, NA, > 62.371304581, NA, 55.025181044, 55.16444531, 59.571659821, 52.516719702, > NA, 57.547876457, 61.692374636, 57.952322011, 64.959944764, 61.631626094, > 50.46350485, 51.336554259, 60.036043059, NA, NA, 52.306778299, > 50.312191646, 57.281096013, 52.174733394, 51.751740252, NA, 61.142851937, > 64.93667359, 57.041472991, 56.285936174, 63.046773681, NA, NA, > 55.571892474), sp = c(0L, 0L, 0L, 0L, 1L, 0L, NA, 0L, 0L, 0L, > 1L, 1L, 1L, 0L, 0L, 0L, 0L, NA, 0L, 1L, 0L, 1L, NA, 0L, 0L, 0L, > 0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, > 0L, NA, 1L, 0L, 0L, 0L, 1L, 1L), bmi1 = c(NA, 15.8966143320452, > 13.245421082447, 12.3977530712043, 15.027123283702, 13.3928404493200, > 17.0458911682568, 15.8895939465852, 16.6866120773622, 13.4683992248259, > 13.3290457842967, 15.3316637234161, 16.8500900221585, 15.4260331046336, > 13.5232867850418, 17.4492104953802, NA, 15.540944398053, 12.5669219098308, > NA, 14.9138430142300, 13.4798062642435, 13.8994168072589, 15.6168753286232, > 13.9056141468016, NA, 12.2871023625512, 14.0807839118743, NA, > NA, 17.0640132231600, 15.1195838206639, NA, 12.1771351358901, > 12.5321285552922, 15.8580993895571, NA, 13.4313331175885, 17.7220103647077, > 16.8429207495369, 16.6236731966879, 13.6043712362307, NA, NA, > 17.2202236903033, 12.2638378494873, 16.2872041214028, 18.0181625678781, > 17.3003826973727, 15.2219641887793), bmi2 = c(16.4727475841863, > 19.081692780389, NA, NA, 18.6033272435455, 15.6374315460333, > NA, NA, 18.7892211535392, NA, 15.8589484308730, 18.9735487397628, > 20.5128647833983, 19.5257058569701, 17.8009993860965, NA, NA, > 15.2262715351502, 18.4602479341394, 17.6857432790636, 15.5810293774179, > NA, NA, 20.1834035524235, 17.7917993591549, 17.4040185299102, > NA, NA, 16.5848486941011, NA, 19.2161345801121, NA, 19.1242161059593, > 19.2743465419037, 16.1762342002915, 21.1598751272938, 18.8493807769291, > 20.5012074556641, 19.8960161913625, NA, 15.2186774698114, NA, > 17.9456140418110, NA, NA, NA, 15.9660179598009, NA, NA, NA), > ? ?bmi3 = c(18.8980981166013, NA, NA, 18.2765808591596, 17.8606070520157, > ? ?NA, NA, NA, NA, 20.2734200134073, 20.8675761771662, 20.2226053401634, > ? ?NA, NA, 20.4764948398200, NA, NA, 18.0739757601718, NA, 21.2854316398646, > ? ?NA, 19.2364074182253, 19.9363954704867, 19.8625857537345, > ? ?NA, NA, NA, 20.8309858751195, NA, NA, NA, NA, 17.6405719924585, > ? ?21.4035643852817, NA, NA, 18.1599838638817, NA, 19.563664852469, > ? ?19.1055498419989, NA, NA, NA, 23.2224562874904, 20.6344030442757, > ? ?20.087921316982, NA, NA, NA, 18.9615564192124), spf = structure(c(2L, > ? ?2L, 2L, 2L, 1L, 2L, NA, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, > ? ?2L, NA, 2L, 1L, 2L, 1L, NA, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, > ? ?1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, NA, 1L, 2L, > ? ?2L, 2L, 1L, 1L), .Label = c("Female", "Male"), class = "factor")), > .Names = c("id", > "height1", "height2", "height3", "weight1", "weight2", "weight3", > "sp", "bmi1", "bmi2", "bmi3", "spf"), row.names = c(NA, -50L), class > "data.frame") >> > > I'm doing this but I get an error: > >> perus2_trans <- reshape(perus2, varying = list(c("height1", "height2", "height3"), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?c("weight1", "weight2", "weight3"), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?c("bmi1", "bmi2", "bmi3")), > + ? ? ? ? ? ? ? ? v.names=c("heigth","weigth", "bmi"), > + ? ? ? ? ? ? ? ? timevar = "time", > + ? ? ? ? ? ? ? ? times = factor(c(1, 2, 3)), > + ? ? ? ? ? ? ? ? idvar = c("id", "sp"), > + ? ? ? ? ? ? ? ? direction = "long") > Error in `row.names<-.data.frame`(`*tmp*`, value = c("30.0.1", "38.0.1", ?: > ?duplicate 'row.names' are not allowed > In addition: Warning message: > non-unique value when setting 'row.names': ?NA.1? >> > > This will fix this error, but I would like to avoid this kind of tweak > for NA values: > >> perus2$sp[is.na(perus2$sp)] <- 999 >> perus2_trans <- reshape(perus2, varying = list(c("height1", "height2", "height3"), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?c("weight1", "weight2", "weight3"), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?c("bmi1", "bmi2", "bmi3")), > + ? ? ? ? ? ? ? ? v.names=c("heigth","weigth", "bmi"), > + ? ? ? ? ? ? ? ? timevar = "time", > + ? ? ? ? ? ? ? ? times = factor(c(1, 2, 3)), > + ? ? ? ? ? ? ? ? idvar = c("id", "sp"), > + ? ? ? ? ? ? ? ? direction = "long") >> perus2_trans <- perus2_trans[order(perus2_trans$id, perus2_trans$time),] >> perus2_trans$sp[perus2_trans$sp == 999] <- NA > > How to avoid this <- 999 substitute? Thanks in advance. > > -Lauri > > ______________________________________________ > R-help at r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. >