Dear all, I have a matrix (dimension, 16 x 12) where 2nd column represents class (1,1,1,1,1,2,2,2, etc) information. I want to estimate average and median values for each of the class and add this information as a row at end of the each classes. for example: dput(dat) structure(list(class = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L), name1 = c(2.554923977, 2.371586762, 2.497293431, 2.464827875, 2.981934845, 2.228995664, 2.099640729, 1.900314302, 2.630005966, 2.632590262, 2.581887814, 2.408797563, 2.098761103, 3.070460716, 1.436980716, 1.645121806), name2 = c(1.297412278, 1.104804244, 1.30621114, 1.126009533, 1.466740841, 1.012041118, 0.923466541, 0.840575023, 1.285530176, 1.041909333, 1.194917856, 1.085015826, 1.047492703, 1.587558217, 0.593340012, 0.723630088 ), name3 = c(0.587160798, 0.596127884, 0.623760721, 0.549016135, 0.686642084, 0.487523394, 0.458620467, 0.397974913, 0.615928976, 0.546005649, 0.657383069, 0.546613129, 0.476503461, 0.749062102, 0.304160587, 0.29037358), name4 = c(2.833441759, 2.713374426, 2.532626548, 2.409093102, 3.014912721, 2.113507947, 2.017291324, 1.667744912, 2.602560666, 2.31649643, 2.761204809, 2.433963493, 2.229911767, 3.191646399, 1.269919241, 1.387479858), name5 = c(2.172365295, 1.955695471, 2.141072829, 1.975743278, 2.377018372, 1.791300389, 1.669079382, 1.500209628, 2.164401874, 1.830038378, 2.106750025, 1.92888294, 1.707217549, 2.585082653, 1.114841754, 1.315712452 ), name6 = c(0.715129844, 0.688186262, 0.70133748, 0.709362008, 0.712145174, 0.563593885, 0.532109761, 0.472197304, 0.690165016, 0.65635473, 0.615835066, 0.64310098, 0.562974891, 0.900622255, 0.408546784, 0.416284408), name7 = c(1.995505133, 1.860095899, 1.843151597, 1.709861774, 2.155993511, 1.506409746, 1.315405587, 1.234544153, 1.96629927, 1.74879757, 1.93994009, 1.660173854, 1.556735295, 2.355723318, 0.866634243, 1.013367677), name8 = c(0.275484997, 0.233856392, 0.294021245, 0.315504347, 0.251906585, 0.250263636, 0.348599173, 0.273806933, 0.32067937, 0.278581115, 0.293726291, 0.308350808, 0.201297444, 0.351927886, 0.204230625, 0.185681471 ), name9 = c(2.461066627, 2.210756164, 2.289047888, 2.253988252, 2.668184733, 1.911697836, 1.793443775, 1.560027186, 2.36941155, 1.961911111, 2.391501376, 2.002215107, 1.932144233, 2.73705052, 1.15580754, 1.807697999), name10 = c(0.723025351, 0.613147422, 0.805399925, 0.65651577, 0.779389048, 0.54260459, 0.492283542, 0.507969501, 0.749700016, 0.644231327, 0.810319215, 0.620331891, 0.600240557, 0.884775748, 0.40006142, 0.391661912), name11 = c(0.308565619, 0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.361013073, 0.430744786, 0.468818055, 0.166072668, 0.369262627, 0.297666411, 0.256091173, 0.123021464, 0.308188684, 0.646436241, 0.722972632 )), .Names = c("class", "name1", "name2", "name3", "name4", "name5", "name6", "name7", "name8", "name9", "name10", "name11"), class = "data.frame", row.names = c("ara1", "ara2", "ara3", "ara4", "ara5", "ara6", "ara7", "ara8", "ara9", "ara10", "ara11", "ara12", "ara13", "ara14", "ara15", "ara16" )) I wrote this: avg<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], function(x) mean(x,na.rm=T)) ) med<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], function(x) median(x,na.rm=T)) ) # avg # class name1 name2 name3 name4 name5 name6 name7 name#8 name9 name10 name11 #1 1 2.574113 1.2602356 0.6085415 2.700690 2.124379 0.7052322 1.912922 #0.2741547 2.376609 0.7154955 0.3654845 #2 2 2.214739 1.0154032 0.4900119 2.100276 1.781248 0.5645165 1.505665 #0.2983373 1.908645 0.5731394 0.3566621 #3 3 2.541092 1.1072810 0.5833339 2.503888 1.955224 0.6384303 1.782971 #0.2935527 2.118543 0.6916275 0.3076734 #4 4 2.202068 1.0761303 0.5099087 2.230492 1.802381 0.6240480 1.593031 #0.2524853 1.941667 0.6283592 0.3592155 #5 5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368 #0.1856815 1.807698 0.3916619 0.7229726 #> med # class name1 name2 name3 name4 name5 name6 name7 name#8 name9 name10 name11 #1 1 2.497293 1.2974123 0.5961279 2.713374 2.141073 0.7093620 1.860096 #0.2754850 2.289048 0.7230254 0.3637169 #2 2 2.164318 0.9677538 0.4730719 2.065400 1.730190 0.5478518 1.410908 #0.2972432 1.852571 0.5252870 0.3958789 #3 3 2.581888 1.0850158 0.5466131 2.433963 1.928883 0.6431010 1.748798 #0.2937263 2.002215 0.6442313 0.2976664 #4 4 2.098761 1.0474927 0.4765035 2.229912 1.707218 0.5629749 1.556735 #0.2042306 1.932144 0.6002406 0.3081887 #5 5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368 #0.1856815 1.807698 0.3916619 0.7229726 But I do not know how can I add this information in the original data? For example, for class 1, the output will look like this: dput(res1) structure(list(class = c(1L, 1L, 1L, 1L, 1L, 1L, 1L), name1 c(2.554923977, 2.371586762, 2.497293431, 2.464827875, 2.981934845, 2.574113378, 2.497293431), name2 = c(1.297412278, 1.104804244, 1.30621114, 1.126009533, 1.466740841, 1.260235607, 1.297412278), name3 = c(0.587160798, 0.596127884, 0.623760721, 0.549016135, 0.686642084, 0.608541525, 0.596127884), name4 = c(2.833441759, 2.713374426, 2.532626548, 2.409093102, 3.014912721, 2.700689711, 2.713374426), name5 = c(2.172365295, 1.955695471, 2.141072829, 1.975743278, 2.377018372, 2.124379049, 2.141072829), name6 = c(0.715129844, 0.688186262, 0.70133748, 0.709362008, 0.712145174, 0.705232154, 0.709362008), name7 = c(1.995505133, 1.860095899, 1.843151597, 1.709861774, 2.155993511, 1.912921583, 1.860095899), name8 = c(0.275484997, 0.233856392, 0.294021245, 0.315504347, 0.251906585, 0.274154713, 0.275484997), name9 = c(2.461066627, 2.210756164, 2.289047888, 2.253988252, 2.668184733, 2.376608733, 2.289047888), name10 = c(0.723025351, 0.613147422, 0.805399925, 0.65651577, 0.779389048, 0.715495503, 0.723025351), name11 = c(0.308565619, 0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.365484455, 0.363716904)), .Names = c("class", "name1", "name2", "name3", "name4", "name5", "name6", "name7", "name8", "name9", "name10", "name11"), class = "data.frame", row.names = c("ara1", "ara2", "ara3", "ara4", "ara5", "Avg", "Med")) And same will be for other classes. Thanks a lot !!!! Nico [[alternative HTML version deleted]]
Hi, Your dput() suggests dat as data.frame. ##Using the results you got, res2 <- do.call(rbind,lapply(unique(dat$class),function(i) {x1 <-rbind(dat[dat$class==i,], avg[avg$class==i,], med[med$class==i,]); rownames(x1)[!grepl("ara",rownames(x1))] <- paste0(c("Avg", "Med"), i); x1})) A.K. On Saturday, April 26, 2014 8:39 PM, Nico Met <nicomet80 at gmail.com> wrote: Dear all, I have a matrix (dimension, 16 x 12) where? 2nd column represents class (1,1,1,1,1,2,2,2, etc) information. I want to estimate average? and median values for each of the class and add this information as a row at end of the each classes. for example: dput(dat) structure(list(class = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L), name1 = c(2.554923977, 2.371586762, 2.497293431, 2.464827875, 2.981934845, 2.228995664, 2.099640729, 1.900314302, 2.630005966, 2.632590262, 2.581887814, 2.408797563, 2.098761103, 3.070460716, 1.436980716, 1.645121806), name2 = c(1.297412278, 1.104804244, 1.30621114, 1.126009533, 1.466740841, 1.012041118, 0.923466541, 0.840575023, 1.285530176, 1.041909333, 1.194917856, 1.085015826, 1.047492703, 1.587558217, 0.593340012, 0.723630088 ), name3 = c(0.587160798, 0.596127884, 0.623760721, 0.549016135, 0.686642084, 0.487523394, 0.458620467, 0.397974913, 0.615928976, 0.546005649, 0.657383069, 0.546613129, 0.476503461, 0.749062102, 0.304160587, 0.29037358), name4 = c(2.833441759, 2.713374426, 2.532626548, 2.409093102, 3.014912721, 2.113507947, 2.017291324, 1.667744912, 2.602560666, 2.31649643, 2.761204809, 2.433963493, 2.229911767, 3.191646399, 1.269919241, 1.387479858), name5 = c(2.172365295, 1.955695471, 2.141072829, 1.975743278, 2.377018372, 1.791300389, 1.669079382, 1.500209628, 2.164401874, 1.830038378, 2.106750025, 1.92888294, 1.707217549, 2.585082653, 1.114841754, 1.315712452 ), name6 = c(0.715129844, 0.688186262, 0.70133748, 0.709362008, 0.712145174, 0.563593885, 0.532109761, 0.472197304, 0.690165016, 0.65635473, 0.615835066, 0.64310098, 0.562974891, 0.900622255, 0.408546784, 0.416284408), name7 = c(1.995505133, 1.860095899, 1.843151597, 1.709861774, 2.155993511, 1.506409746, 1.315405587, 1.234544153, 1.96629927, 1.74879757, 1.93994009, 1.660173854, 1.556735295, 2.355723318, 0.866634243, 1.013367677), name8 = c(0.275484997, 0.233856392, 0.294021245, 0.315504347, 0.251906585, 0.250263636, 0.348599173, 0.273806933, 0.32067937, 0.278581115, 0.293726291, 0.308350808, 0.201297444, 0.351927886, 0.204230625, 0.185681471 ), name9 = c(2.461066627, 2.210756164, 2.289047888, 2.253988252, 2.668184733, 1.911697836, 1.793443775, 1.560027186, 2.36941155, 1.961911111, 2.391501376, 2.002215107, 1.932144233, 2.73705052, 1.15580754, 1.807697999), name10 = c(0.723025351, 0.613147422, 0.805399925, 0.65651577, 0.779389048, 0.54260459, 0.492283542, 0.507969501, 0.749700016, 0.644231327, 0.810319215, 0.620331891, 0.600240557, 0.884775748, 0.40006142, 0.391661912), name11 = c(0.308565619, 0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.361013073, 0.430744786, 0.468818055, 0.166072668, 0.369262627, 0.297666411, 0.256091173, 0.123021464, 0.308188684, 0.646436241, 0.722972632 )), .Names = c("class", "name1", "name2", "name3", "name4", "name5", "name6", "name7", "name8", "name9", "name10", "name11"), class = "data.frame", row.names = c("ara1", "ara2", "ara3", "ara4", "ara5", "ara6", "ara7", "ara8", "ara9", "ara10", "ara11", "ara12", "ara13", "ara14", "ara15", "ara16" )) I wrote this: avg<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], function(x) mean(x,na.rm=T)) ) med<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], function(x) median(x,na.rm=T)) ) # avg #? class? ? name1? ? name2? ? name3? ? name4? ? name5? ? name6? ? name7 ? ? name#8? ? name9? ? name10? ? name11 #1? ? 1 2.574113 1.2602356 0.6085415 2.700690 2.124379 0.7052322 1.912922 #0.2741547 2.376609 0.7154955 0.3654845 #2? ? 2 2.214739 1.0154032 0.4900119 2.100276 1.781248 0.5645165 1.505665 #0.2983373 1.908645 0.5731394 0.3566621 #3? ? 3 2.541092 1.1072810 0.5833339 2.503888 1.955224 0.6384303 1.782971 #0.2935527 2.118543 0.6916275 0.3076734 #4? ? 4 2.202068 1.0761303 0.5099087 2.230492 1.802381 0.6240480 1.593031 #0.2524853 1.941667 0.6283592 0.3592155 #5? ? 5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368 #0.1856815 1.807698 0.3916619 0.7229726 #> med #? class? ? name1? ? name2? ? name3? ? name4? ? name5? ? name6? ? name7 ? ? name#8? ? name9? ? name10? ? name11 #1? ? 1 2.497293 1.2974123 0.5961279 2.713374 2.141073 0.7093620 1.860096 #0.2754850 2.289048 0.7230254 0.3637169 #2? ? 2 2.164318 0.9677538 0.4730719 2.065400 1.730190 0.5478518 1.410908 #0.2972432 1.852571 0.5252870 0.3958789 #3? ? 3 2.581888 1.0850158 0.5466131 2.433963 1.928883 0.6431010 1.748798 #0.2937263 2.002215 0.6442313 0.2976664 #4? ? 4 2.098761 1.0474927 0.4765035 2.229912 1.707218 0.5629749 1.556735 #0.2042306 1.932144 0.6002406 0.3081887 #5? ? 5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368 #0.1856815 1.807698 0.3916619 0.7229726 But I do not know how can I add this information in the original data? For example, for class 1, the output will look like this: dput(res1) structure(list(class = c(1L, 1L, 1L, 1L, 1L, 1L, 1L), name1 c(2.554923977, 2.371586762, 2.497293431, 2.464827875, 2.981934845, 2.574113378, 2.497293431), name2 = c(1.297412278, 1.104804244, 1.30621114, 1.126009533, 1.466740841, 1.260235607, 1.297412278), name3 = c(0.587160798, 0.596127884, 0.623760721, 0.549016135, 0.686642084, 0.608541525, 0.596127884), name4 = c(2.833441759, 2.713374426, 2.532626548, 2.409093102, 3.014912721, 2.700689711, 2.713374426), name5 = c(2.172365295, 1.955695471, 2.141072829, 1.975743278, 2.377018372, 2.124379049, 2.141072829), name6 = c(0.715129844, 0.688186262, 0.70133748, 0.709362008, 0.712145174, 0.705232154, 0.709362008), name7 = c(1.995505133, 1.860095899, 1.843151597, 1.709861774, 2.155993511, 1.912921583, 1.860095899), name8 = c(0.275484997, 0.233856392, 0.294021245, 0.315504347, 0.251906585, 0.274154713, 0.275484997), name9 = c(2.461066627, 2.210756164, 2.289047888, 2.253988252, 2.668184733, 2.376608733, 2.289047888), name10 = c(0.723025351, 0.613147422, 0.805399925, 0.65651577, 0.779389048, 0.715495503, 0.723025351), name11 = c(0.308565619, 0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.365484455, 0.363716904)), .Names = c("class", "name1", "name2", "name3", "name4", "name5", "name6", "name7", "name8", "name9", "name10", "name11"), class = "data.frame", row.names = c("ara1", "ara2", "ara3", "ara4", "ara5", "Avg", "Med")) And same will be for other classes. Thanks a lot !!!! Nico ??? [[alternative HTML version deleted]] ______________________________________________ R-help at r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
On Apr 26, 2014, at 5:37 PM, Nico Met wrote:> Dear all, > > > > I have a matrix (dimension, 16 x 12) where 2nd column represents class > (1,1,1,1,1,2,2,2, etc) information. I want to estimate average and median > values for each of the class and add this information as a row at end of > the each classes. >Well it does have a dimension attribute but it is a data.frame, NOT a matrix. The term "class" is a reserved word in R. What is it that you mean by that word? if it is for each column then: sapply( dat, function(x) c( mean(x), median(x)) )> sapply( dat, function(x) c( mean_x = mean(x), median_x = median(x)) )class name1 name2 name3 name4 name5 name6 mean_x 2.4375 2.350258 1.102291 0.5358036 2.343448 1.895963 0.6242466 median_x 2.0000 2.436813 1.094910 0.5478146 2.421528 1.942289 0.6497279 name7 name8 name9 name10 name11 mean_x 1.67054 0.2742449 2.094122 0.6388536 0.3736069 median_x 1.72933 0.2770331 2.106486 0.6322816 0.3623650 -- David.> > for example: > > dput(dat) > > structure(list(class = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, > > 3L, 3L, 3L, 4L, 4L, 4L, 5L), name1 = c(2.554923977, 2.371586762, > > 2.497293431, 2.464827875, 2.981934845, 2.228995664, 2.099640729, > > 1.900314302, 2.630005966, 2.632590262, 2.581887814, 2.408797563, > > 2.098761103, 3.070460716, 1.436980716, 1.645121806), name2 = c(1.297412278, > > 1.104804244, 1.30621114, 1.126009533, 1.466740841, 1.012041118, > > 0.923466541, 0.840575023, 1.285530176, 1.041909333, 1.194917856, > > 1.085015826, 1.047492703, 1.587558217, 0.593340012, 0.723630088 > > ), name3 = c(0.587160798, 0.596127884, 0.623760721, 0.549016135, > > 0.686642084, 0.487523394, 0.458620467, 0.397974913, 0.615928976, > > 0.546005649, 0.657383069, 0.546613129, 0.476503461, 0.749062102, > > 0.304160587, 0.29037358), name4 = c(2.833441759, 2.713374426, > > 2.532626548, 2.409093102, 3.014912721, 2.113507947, 2.017291324, > > 1.667744912, 2.602560666, 2.31649643, 2.761204809, 2.433963493, > > 2.229911767, 3.191646399, 1.269919241, 1.387479858), name5 = c(2.172365295, > > 1.955695471, 2.141072829, 1.975743278, 2.377018372, 1.791300389, > > 1.669079382, 1.500209628, 2.164401874, 1.830038378, 2.106750025, > > 1.92888294, 1.707217549, 2.585082653, 1.114841754, 1.315712452 > > ), name6 = c(0.715129844, 0.688186262, 0.70133748, 0.709362008, > > 0.712145174, 0.563593885, 0.532109761, 0.472197304, 0.690165016, > > 0.65635473, 0.615835066, 0.64310098, 0.562974891, 0.900622255, > > 0.408546784, 0.416284408), name7 = c(1.995505133, 1.860095899, > > 1.843151597, 1.709861774, 2.155993511, 1.506409746, 1.315405587, > > 1.234544153, 1.96629927, 1.74879757, 1.93994009, 1.660173854, > > 1.556735295, 2.355723318, 0.866634243, 1.013367677), name8 = c(0.275484997, > > 0.233856392, 0.294021245, 0.315504347, 0.251906585, 0.250263636, > > 0.348599173, 0.273806933, 0.32067937, 0.278581115, 0.293726291, > > 0.308350808, 0.201297444, 0.351927886, 0.204230625, 0.185681471 > > ), name9 = c(2.461066627, 2.210756164, 2.289047888, 2.253988252, > > 2.668184733, 1.911697836, 1.793443775, 1.560027186, 2.36941155, > > 1.961911111, 2.391501376, 2.002215107, 1.932144233, 2.73705052, > > 1.15580754, 1.807697999), name10 = c(0.723025351, 0.613147422, > > 0.805399925, 0.65651577, 0.779389048, 0.54260459, 0.492283542, > > 0.507969501, 0.749700016, 0.644231327, 0.810319215, 0.620331891, > > 0.600240557, 0.884775748, 0.40006142, 0.391661912), name11 = c(0.308565619, > > 0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.361013073, > > 0.430744786, 0.468818055, 0.166072668, 0.369262627, 0.297666411, > > 0.256091173, 0.123021464, 0.308188684, 0.646436241, 0.722972632 > > )), .Names = c("class", "name1", "name2", "name3", "name4", "name5", > > "name6", "name7", "name8", "name9", "name10", "name11"), class = "data.frame", > row.names = c("ara1", > > "ara2", "ara3", "ara4", "ara5", "ara6", "ara7", "ara8", "ara9", > > "ara10", "ara11", "ara12", "ara13", "ara14", "ara15", "ara16" > > )) > > > I wrote this: > > > > avg<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], > function(x) mean(x,na.rm=T)) ) > > > med<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], function(x) > median(x,na.rm=T)) ) > > > # avg > > # class name1 name2 name3 name4 name5 name6 name7 > name#8 name9 name10 name11 > > #1 1 2.574113 1.2602356 0.6085415 2.700690 2.124379 0.7052322 1.912922 > #0.2741547 2.376609 0.7154955 0.3654845 > > #2 2 2.214739 1.0154032 0.4900119 2.100276 1.781248 0.5645165 1.505665 > #0.2983373 1.908645 0.5731394 0.3566621 > > #3 3 2.541092 1.1072810 0.5833339 2.503888 1.955224 0.6384303 1.782971 > #0.2935527 2.118543 0.6916275 0.3076734 > > #4 4 2.202068 1.0761303 0.5099087 2.230492 1.802381 0.6240480 1.593031 > #0.2524853 1.941667 0.6283592 0.3592155 > > #5 5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368 > #0.1856815 1.807698 0.3916619 0.7229726 > > #> med > > # class name1 name2 name3 name4 name5 name6 name7 > name#8 name9 name10 name11 > > #1 1 2.497293 1.2974123 0.5961279 2.713374 2.141073 0.7093620 1.860096 > #0.2754850 2.289048 0.7230254 0.3637169 > > #2 2 2.164318 0.9677538 0.4730719 2.065400 1.730190 0.5478518 1.410908 > #0.2972432 1.852571 0.5252870 0.3958789 > > #3 3 2.581888 1.0850158 0.5466131 2.433963 1.928883 0.6431010 1.748798 > #0.2937263 2.002215 0.6442313 0.2976664 > > #4 4 2.098761 1.0474927 0.4765035 2.229912 1.707218 0.5629749 1.556735 > #0.2042306 1.932144 0.6002406 0.3081887 > > #5 5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368 > #0.1856815 1.807698 0.3916619 0.7229726 > > > > > But I do not know how can I add this information in the original data? > > > For example, for class 1, the output will look like this: > > dput(res1) > > structure(list(class = c(1L, 1L, 1L, 1L, 1L, 1L, 1L), name1 > c(2.554923977, > > 2.371586762, 2.497293431, 2.464827875, 2.981934845, 2.574113378, > > 2.497293431), name2 = c(1.297412278, 1.104804244, 1.30621114, > > 1.126009533, 1.466740841, 1.260235607, 1.297412278), name3 = c(0.587160798, > > 0.596127884, 0.623760721, 0.549016135, 0.686642084, 0.608541525, > > 0.596127884), name4 = c(2.833441759, 2.713374426, 2.532626548, > > 2.409093102, 3.014912721, 2.700689711, 2.713374426), name5 = c(2.172365295, > > 1.955695471, 2.141072829, 1.975743278, 2.377018372, 2.124379049, > > 2.141072829), name6 = c(0.715129844, 0.688186262, 0.70133748, > > 0.709362008, 0.712145174, 0.705232154, 0.709362008), name7 = c(1.995505133, > > 1.860095899, 1.843151597, 1.709861774, 2.155993511, 1.912921583, > > 1.860095899), name8 = c(0.275484997, 0.233856392, 0.294021245, > > 0.315504347, 0.251906585, 0.274154713, 0.275484997), name9 = c(2.461066627, > > 2.210756164, 2.289047888, 2.253988252, 2.668184733, 2.376608733, > > 2.289047888), name10 = c(0.723025351, 0.613147422, 0.805399925, > > 0.65651577, 0.779389048, 0.715495503, 0.723025351), name11 = c(0.308565619, > > 0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.365484455, > > 0.363716904)), .Names = c("class", "name1", "name2", "name3", > > "name4", "name5", "name6", "name7", "name8", "name9", "name10", > > "name11"), class = "data.frame", row.names = c("ara1", "ara2", > > "ara3", "ara4", "ara5", "Avg", "Med")) > > > > And same will be for other classes. > > > Thanks a lot !!!! > > > Nico > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help at r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code.David Winsemius Alameda, CA, USA
Berend Hasselman
2014-Apr-27 09:01 UTC
[R] average and median values for each of the class
On 27-04-2014, at 02:37, Nico Met <nicomet80 at gmail.com> wrote:> Dear all, > > > > I have a matrix (dimension, 16 x 12) where 2nd column represents class > (1,1,1,1,1,2,2,2, etc) information. I want to estimate average and median > values for each of the class and add this information as a row at end of > the each classes. > > > for example: > > dput(dat) > > structure(list(class = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, > > 3L, 3L, 3L, 4L, 4L, 4L, 5L), name1 = c(2.554923977, 2.371586762, > > 2.497293431, 2.464827875, 2.981934845, 2.228995664, 2.099640729, > > 1.900314302, 2.630005966, 2.632590262, 2.581887814, 2.408797563, > > 2.098761103, 3.070460716, 1.436980716, 1.645121806), name2 = c(1.297412278, > > 1.104804244, 1.30621114, 1.126009533, 1.466740841, 1.012041118, > > 0.923466541, 0.840575023, 1.285530176, 1.041909333, 1.194917856, > > 1.085015826, 1.047492703, 1.587558217, 0.593340012, 0.723630088 > > ), name3 = c(0.587160798, 0.596127884, 0.623760721, 0.549016135, > > 0.686642084, 0.487523394, 0.458620467, 0.397974913, 0.615928976, > > 0.546005649, 0.657383069, 0.546613129, 0.476503461, 0.749062102, > > 0.304160587, 0.29037358), name4 = c(2.833441759, 2.713374426, > > 2.532626548, 2.409093102, 3.014912721, 2.113507947, 2.017291324, > > 1.667744912, 2.602560666, 2.31649643, 2.761204809, 2.433963493, > > 2.229911767, 3.191646399, 1.269919241, 1.387479858), name5 = c(2.172365295, > > 1.955695471, 2.141072829, 1.975743278, 2.377018372, 1.791300389, > > 1.669079382, 1.500209628, 2.164401874, 1.830038378, 2.106750025, > > 1.92888294, 1.707217549, 2.585082653, 1.114841754, 1.315712452 > > ), name6 = c(0.715129844, 0.688186262, 0.70133748, 0.709362008, > > 0.712145174, 0.563593885, 0.532109761, 0.472197304, 0.690165016, > > 0.65635473, 0.615835066, 0.64310098, 0.562974891, 0.900622255, > > 0.408546784, 0.416284408), name7 = c(1.995505133, 1.860095899, > > 1.843151597, 1.709861774, 2.155993511, 1.506409746, 1.315405587, > > 1.234544153, 1.96629927, 1.74879757, 1.93994009, 1.660173854, > > 1.556735295, 2.355723318, 0.866634243, 1.013367677), name8 = c(0.275484997, > > 0.233856392, 0.294021245, 0.315504347, 0.251906585, 0.250263636, > > 0.348599173, 0.273806933, 0.32067937, 0.278581115, 0.293726291, > > 0.308350808, 0.201297444, 0.351927886, 0.204230625, 0.185681471 > > ), name9 = c(2.461066627, 2.210756164, 2.289047888, 2.253988252, > > 2.668184733, 1.911697836, 1.793443775, 1.560027186, 2.36941155, > > 1.961911111, 2.391501376, 2.002215107, 1.932144233, 2.73705052, > > 1.15580754, 1.807697999), name10 = c(0.723025351, 0.613147422, > > 0.805399925, 0.65651577, 0.779389048, 0.54260459, 0.492283542, > > 0.507969501, 0.749700016, 0.644231327, 0.810319215, 0.620331891, > > 0.600240557, 0.884775748, 0.40006142, 0.391661912), name11 = c(0.308565619, > > 0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.361013073, > > 0.430744786, 0.468818055, 0.166072668, 0.369262627, 0.297666411, > > 0.256091173, 0.123021464, 0.308188684, 0.646436241, 0.722972632 > > )), .Names = c("class", "name1", "name2", "name3", "name4", "name5", > > "name6", "name7", "name8", "name9", "name10", "name11"), class = "data.frame", > row.names = c("ara1", > > "ara2", "ara3", "ara4", "ara5", "ara6", "ara7", "ara8", "ara9", > > "ara10", "ara11", "ara12", "ara13", "ara14", "ara15", "ara16" > > )) > > > I wrote this: > > > > avg<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], > function(x) mean(x,na.rm=T)) ) > > > med<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], function(x) > median(x,na.rm=T)) ) > > > # avg > > # class name1 name2 name3 name4 name5 name6 name7 > name#8 name9 name10 name11 > > #1 1 2.574113 1.2602356 0.6085415 2.700690 2.124379 0.7052322 1.912922 > #0.2741547 2.376609 0.7154955 0.3654845 > > #2 2 2.214739 1.0154032 0.4900119 2.100276 1.781248 0.5645165 1.505665 > #0.2983373 1.908645 0.5731394 0.3566621 > > #3 3 2.541092 1.1072810 0.5833339 2.503888 1.955224 0.6384303 1.782971 > #0.2935527 2.118543 0.6916275 0.3076734 > > #4 4 2.202068 1.0761303 0.5099087 2.230492 1.802381 0.6240480 1.593031 > #0.2524853 1.941667 0.6283592 0.3592155 > > #5 5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368 > #0.1856815 1.807698 0.3916619 0.7229726 > > #> med > > # class name1 name2 name3 name4 name5 name6 name7 > name#8 name9 name10 name11 > > #1 1 2.497293 1.2974123 0.5961279 2.713374 2.141073 0.7093620 1.860096 > #0.2754850 2.289048 0.7230254 0.3637169 > > #2 2 2.164318 0.9677538 0.4730719 2.065400 1.730190 0.5478518 1.410908 > #0.2972432 1.852571 0.5252870 0.3958789 > > #3 3 2.581888 1.0850158 0.5466131 2.433963 1.928883 0.6431010 1.748798 > #0.2937263 2.002215 0.6442313 0.2976664 > > #4 4 2.098761 1.0474927 0.4765035 2.229912 1.707218 0.5629749 1.556735 > #0.2042306 1.932144 0.6002406 0.3081887 > > #5 5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368 > #0.1856815 1.807698 0.3916619 0.7229726 > > > > > But I do not know how can I add this information in the original data? > > > For example, for class 1, the output will look like this: > > dput(res1) > > structure(list(class = c(1L, 1L, 1L, 1L, 1L, 1L, 1L), name1 > c(2.554923977, > > 2.371586762, 2.497293431, 2.464827875, 2.981934845, 2.574113378, > > 2.497293431), name2 = c(1.297412278, 1.104804244, 1.30621114, > > 1.126009533, 1.466740841, 1.260235607, 1.297412278), name3 = c(0.587160798, > > 0.596127884, 0.623760721, 0.549016135, 0.686642084, 0.608541525, > > 0.596127884), name4 = c(2.833441759, 2.713374426, 2.532626548, > > 2.409093102, 3.014912721, 2.700689711, 2.713374426), name5 = c(2.172365295, > > 1.955695471, 2.141072829, 1.975743278, 2.377018372, 2.124379049, > > 2.141072829), name6 = c(0.715129844, 0.688186262, 0.70133748, > > 0.709362008, 0.712145174, 0.705232154, 0.709362008), name7 = c(1.995505133, > > 1.860095899, 1.843151597, 1.709861774, 2.155993511, 1.912921583, > > 1.860095899), name8 = c(0.275484997, 0.233856392, 0.294021245, > > 0.315504347, 0.251906585, 0.274154713, 0.275484997), name9 = c(2.461066627, > > 2.210756164, 2.289047888, 2.253988252, 2.668184733, 2.376608733, > > 2.289047888), name10 = c(0.723025351, 0.613147422, 0.805399925, > > 0.65651577, 0.779389048, 0.715495503, 0.723025351), name11 = c(0.308565619, > > 0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.365484455, > > 0.363716904)), .Names = c("class", "name1", "name2", "name3", > > "name4", "name5", "name6", "name7", "name8", "name9", "name10", > > "name11"), class = "data.frame", row.names = c("ara1", "ara2", > > "ara3", "ara4", "ara5", "Avg", "Med")) > > > > And same will be for other classes.Please do not post in HTML, as requested by Posting Guide. It tends to mess things up and makes your code and results unreadable. You cannot use ?Avg? and ?Med? unmodified as rownames. For each ?class? (group would ba better name) you must append something different e.g. the ?class?-number. Try this: library(plyr) g <- function(dat) { avg <- as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], function(x) mean(x,na.rm=T)) ) med <- as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], function(x) median(x,na.rm=T)) ) z <- rbind(dat,avg,med) z } DAT1 <- ddply(dat,.(class),.fun=g) rownames(DAT1) <- do.call(c,lapply(split(dat,dat["class"]), FUN=function(x) c(rownames(x),paste0("Avg",x[,"class"][1]),paste0("Med",x[,"class"][1])))) DAT1 Convoluted but it works. Maybe someone else can come up with something shorter and more elegant. Berend