Felipe Carrillo
2014-Aug-12 17:16 UTC
[R] t.test of matching columns from two datasets using plyr
Hi, I Have two datasets df1 and df2 with 3 matching columns. I need to do a t.test of sp1, sp2 and sp3? and var1, var2 and var3 where the year, month and location match. I can do it with sapply or mapply but I want the end result to be a data.frame. I prefer to do it with plyr or dplyr as I have been using these packages throughout this project. My final dataframe should have the t.test statistic and the p.value. ? Sample datasets first dataframe df1 <- structure(list(Year = c(1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L), month = c("Feb", "Mar", "Mar", "Mar", "Mar", "Mar", "Mar", "Mar", "Mar", "Mar", "Mar", "Mar", "Mar", "Mar", "Mar", "Apr", "Apr", "Apr", "Apr", "Apr", "Apr"), location = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 2L, 4L, 4L, 1L, 4L, 4L, 3L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 2L), .Label = c("Far West", "North", "Other", "South", "West"), class = "factor"), var1 = c(111.6, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 31.4, 245.9, 46.3, 59.8, 206.1, 200.3, 88, 73.4, 33.9, 7.1), var2 = c(0, 4.7, 4.4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 159.8, 0, 0, 142.2, 94.3, 0, 0, 0, 0), var3 = c(180.2, 14.1, 123.7, 17.4, 5.5, 12.9, 39.3, 21, 66.6, 12.2, 13.6, 15.7, 36.9, 0, 143.5, 35.5, 235.6, 51.3, 230.6, 81.3, 190.9)), .Names = c("Year", "month", "location", "var1", "var2", "var3"), row.names = 17093:17113, class = "data.frame") second dataframe df2 <- structure(list(Year = c(1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L, 1995L), month = c("Apr", "Apr", "Apr", "Apr", "Apr", "Apr", "Apr", "Apr", "May", "May", "May", "May", "May", "May", "May", "May", "May", "May", "May", "May", "May"), location = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Far West", "North", "South", "West"), class = "factor"), sp1 = c(853.0055629, 147.7158909, 160.1536518, 65.01652491, 2332.609706, 701.4706852, 11.36420842, 0, 2645.671425, 2769.409257, 523.4284249, 135.1274855, 72.22498557, 35.07497333, 572.087043, 150.4768424, 111.5881472, 61.21848041, 392.0651906, 0, 771.0337355), sp2 = c(10.27717546, 0, 0, 0, 0, 10.16624181, 0, 0, 0, 307.7121397, 52.34284249, 19.30392649, 24.07499519, 0, 35.75544018, 42.99338354, 0, 40.81232027, 0, 90.9210806, 622.7580172), sp3 = c(92.49457911, 128.0204387, 203.8319205, 175.5446173, 120.6522262, 71.1636927, 107.95998, 57.14456898, 43.37166271, 153.8560698, 104.685685, 77.21570598, 96.29998075, 187.0665244, 0, 0, 111.5881472, 163.2492811, 26.13767938, 45.4605403, 207.5860057)), .Names = c("Year", "month", "location", "sp1", "sp2", "sp3"), row.names = 30:50, class = "data.frame") ? Thank you much. [[alternative HTML version deleted]]