Dear all, I would like to do t-test across two columns "name" with different "cat" with overall mean ("val"). (Removing if there is a single observation) And finally, make a matrix with t-value and p-value associated with a name (in rows) and cat (in columns) dput(x) structure(list(name = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("12.2", "15.9" ), class = "factor"), cat = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("p178266580", "p178269196", "p178316310", "p191287337", "p195158904", "p196921846", "p197427158", "p238921966"), class = "factor"), val = c(148.90772, 184.253375, 183.974866666667, 191.868125, 173.30515, 187.876975, 177.453775, 184.799525, 212.39065, 205.504525, 186.152025, 194.337075, 193.2703, 204.71665, 211.4452, 202.609175, 203.72918, 193.7261, 196.1186, 202.79556, 203.48818, 191.13744, 205.23315, 198.66842, 196.81032, 200.90512, 206.13564, 205.372225, 196.22835, 211.04686, 219.977133333333, 224.7602, 231.6596, 211.105816666667, 215.44474, 210.83514, 228.173125, 224.09034, 212.96026, 239.0085, 213.5407, 227.12115, 209.24888, 232.8964, 232.22146, 228.1643, 236.43082, 232.20792, 238.49192, 224.64014, 233.75898, 207.06138, 215.3649, 211.14802, 201.86854, 200.52278, 199.05752, 194.90904, 214.44334, 249.357266666667, 239.98525, 234.508483333333, 243.865083333333, 233.595816666667, 248.1219, 225.289416666667, 248.220883333333, 193.69566, 198.43578, 205.06055, 208.525975, 198.28692, 206.88496, 201.60162, 205.7943, 210.5117, 196.69886, 193.58288, 198.86094, 201.81676, 225.8266, 205.879725, 218.370475, 214.006125, 198.74038, 206.00314, 198.37446, 225.5357, 216.721025, 226.543925, 158.1011, 158.15674, 166.07518, 179.942225, 158.16046, 165.0685, 159.56146 )), .Names = c("name", "cat", "val"), class = "data.frame", row.names = c( NA, 97L)) Thanks Nico [[alternative HTML version deleted]]
Hi, Not sure about the format for the 2nd part. df1<- ##data library(plyr) df2<-ddply(df1,.(name,cat),summarize, cbind(t.test(val,df1$val)$statistic,t.test(val,df1$val)$p.value)) ?df3<-cbind(df2[,1:2],data.frame(df2[,3])) ?colnames(df3)[3:4]<- c("t-val","p.val") library(reshape2) df3m<-? melt(df3,id.var=c("name","cat")) xtabs(value~name+cat+variable,data=df3m) , , variable = t-val ????? cat name????? p178266580??? p178269196??? p178316310??? p191287337??? p195158904 ? 12.2 -1.1697701975 -5.2812696387 -1.2740973341? 2.1926665883? 0.1529759080 ? 15.9 -2.5063901671? 0.0000000000 -0.2169806106? 1.5455008954 -1.6574358795 ????? cat name????? p196921846??? p197427158??? p238921966 ? 12.2? 0.2260409495 -0.3320635130? 3.3659689025 ? 15.9? 6.6278680348? 0.0000000000? 0.0000000000 , , variable = p.val ????? cat name????? p178266580??? p178269196??? p178316310??? p191287337??? p195158904 ? 12.2? 0.3092408498? 0.0003382099? 0.3762474897? 0.0419925673? 0.8812900356 ? 15.9? 0.0147796276? 0.0000000000? 0.8365830321? 0.1822041450? 0.1096087365 ????? cat name????? p196921846??? p197427158??? p238921966 ? 12.2? 0.8226135494? 0.7435688987? 0.0071990164 ? 15.9? 0.0005489640? 0.0000000000? 0.0000000000 #or res<-dcast(df3m,name~cat+variable,value.var="value") row.names(res)<- res[,1] ?res1<- res[,-1] res1 ???? p178266580_t-val p178266580_p.val p178269196_t-val p178269196_p.val 12.2???????? -1.16977?????? 0.30924085???????? -5.28127???? 0.0003382099 15.9???????? -2.50639?????? 0.01477963?????????????? NA?????????????? NA ???? p178316310_t-val p178316310_p.val p191287337_t-val p191287337_p.val 12.2?????? -1.2740973??????? 0.3762475???????? 2.192667?????? 0.04199257 15.9?????? -0.2169806??????? 0.8365830???????? 1.545501?????? 0.18220414 ???? p195158904_t-val p195158904_p.val p196921846_t-val p196921846_p.val 12.2??????? 0.1529759??????? 0.8812900??????? 0.2260409????? 0.822613549 15.9?????? -1.6574359??????? 0.1096087??????? 6.6278680????? 0.000548964 ???? p197427158_t-val p197427158_p.val p238921966_t-val p238921966_p.val 12.2?????? -0.3320635??????? 0.7435689???????? 3.365969????? 0.007199016 15.9?????????????? NA?????????????? NA?????????????? NA?????????????? NA A.K. ----- Original Message ----- From: Nico Met <nicomet80 at gmail.com> To: R help <r-help at r-project.org> Cc: Sent: Monday, July 15, 2013 11:50 AM Subject: [R] t-test across columns Dear all, I would like to do t-test across two columns "name" with different "cat" with overall mean ("val"). (Removing if there is a single observation) And finally, make a matrix with t-value and p-value associated with a name (in rows) and cat (in columns) dput(x) structure(list(name = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("12.2", "15.9" ), class = "factor"), cat = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("p178266580", "p178269196", "p178316310", "p191287337", "p195158904", "p196921846", "p197427158", "p238921966"), class = "factor"), val = c(148.90772, 184.253375, 183.974866666667, 191.868125, 173.30515, 187.876975, 177.453775, 184.799525, 212.39065, 205.504525, 186.152025, 194.337075, 193.2703, 204.71665, 211.4452, 202.609175, 203.72918, 193.7261, 196.1186, 202.79556, 203.48818, 191.13744, 205.23315, 198.66842, 196.81032, 200.90512, 206.13564, 205.372225, 196.22835, 211.04686, 219.977133333333, 224.7602, 231.6596, 211.105816666667, 215.44474, 210.83514, 228.173125, 224.09034, 212.96026, 239.0085, 213.5407, 227.12115, 209.24888, 232.8964, 232.22146, 228.1643, 236.43082, 232.20792, 238.49192, 224.64014, 233.75898, 207.06138, 215.3649, 211.14802, 201.86854, 200.52278, 199.05752, 194.90904, 214.44334, 249.357266666667, 239.98525, 234.508483333333, 243.865083333333, 233.595816666667, 248.1219, 225.289416666667, 248.220883333333, 193.69566, 198.43578, 205.06055, 208.525975, 198.28692, 206.88496, 201.60162, 205.7943, 210.5117, 196.69886, 193.58288, 198.86094, 201.81676, 225.8266, 205.879725, 218.370475, 214.006125, 198.74038, 206.00314, 198.37446, 225.5357, 216.721025, 226.543925, 158.1011, 158.15674, 166.07518, 179.942225, 158.16046, 165.0685, 159.56146 )), .Names = c("name", "cat", "val"), class = "data.frame", row.names = c( NA, 97L)) Thanks Nico ??? [[alternative HTML version deleted]] ______________________________________________ R-help at r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
This may be close to what you want:> t.val <- by(x, x$cat, function(y) if (min(table(y$name)>1)) {+ t.test(val~name, y)})> t.out <- do.call(rbind, sapply(t.val, function(y) c(y$statistic,+ p.value=y$p.value)))> t.outt p.value p178266580 -0.1156475 0.9144054453 p178316310 -1.0874356 0.4143944591 p191287337 -0.6776053 0.5315717871 p195158904 1.1522850 0.2769290728 p196921846 -4.2342996 0.0003925339 But I'm not sure what you mean about columns for each cat unless you want the frequencies:> freq.out <- xtabs(~cat+name, x) > freq.out <- freq.out[apply(freq.out, 1, function(y) min(y) > 1),] > freq.outname cat 12.2 15.9 p178266580 4 11 p178316310 2 3 p191287337 3 5 p195158904 8 7 p196921846 26 5> results <- cbind(freq.out, t.out) > results12.2 15.9 t p.value p178266580 4 11 -0.1156475 0.9144054453 p178316310 2 3 -1.0874356 0.4143944591 p191287337 3 5 -0.6776053 0.5315717871 p195158904 8 7 1.1522850 0.2769290728 p196921846 26 5 -4.2342996 0.0003925339 ------------------------------------- David L Carlson Associate Professor of Anthropology Texas A&M University College Station, TX 77840-4352 -----Original Message----- From: r-help-bounces at r-project.org [mailto:r-help-bounces at r-project.org] On Behalf Of Nico Met Sent: Monday, July 15, 2013 10:50 AM To: R help Subject: [R] t-test across columns Dear all, I would like to do t-test across two columns "name" with different "cat" with overall mean ("val"). (Removing if there is a single observation) And finally, make a matrix with t-value and p-value associated with a name (in rows) and cat (in columns) dput(x) structure(list(name = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("12.2", "15.9" ), class = "factor"), cat = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label c("p178266580", "p178269196", "p178316310", "p191287337", "p195158904", "p196921846", "p197427158", "p238921966"), class = "factor"), val = c(148.90772, 184.253375, 183.974866666667, 191.868125, 173.30515, 187.876975, 177.453775, 184.799525, 212.39065, 205.504525, 186.152025, 194.337075, 193.2703, 204.71665, 211.4452, 202.609175, 203.72918, 193.7261, 196.1186, 202.79556, 203.48818, 191.13744, 205.23315, 198.66842, 196.81032, 200.90512, 206.13564, 205.372225, 196.22835, 211.04686, 219.977133333333, 224.7602, 231.6596, 211.105816666667, 215.44474, 210.83514, 228.173125, 224.09034, 212.96026, 239.0085, 213.5407, 227.12115, 209.24888, 232.8964, 232.22146, 228.1643, 236.43082, 232.20792, 238.49192, 224.64014, 233.75898, 207.06138, 215.3649, 211.14802, 201.86854, 200.52278, 199.05752, 194.90904, 214.44334, 249.357266666667, 239.98525, 234.508483333333, 243.865083333333, 233.595816666667, 248.1219, 225.289416666667, 248.220883333333, 193.69566, 198.43578, 205.06055, 208.525975, 198.28692, 206.88496, 201.60162, 205.7943, 210.5117, 196.69886, 193.58288, 198.86094, 201.81676, 225.8266, 205.879725, 218.370475, 214.006125, 198.74038, 206.00314, 198.37446, 225.5357, 216.721025, 226.543925, 158.1011, 158.15674, 166.07518, 179.942225, 158.16046, 165.0685, 159.56146 )), .Names = c("name", "cat", "val"), class = "data.frame", row.names = c( NA, 97L)) Thanks Nico [[alternative HTML version deleted]] ______________________________________________ R-help at r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.