I have written a few different summary functions. I want to calculate the statistics by groups and I am having trouble getting the output as a dataframe. I have attached one example with a small dataset that calculates summary stats and percentiles, I have others that calculate upper confidence limits etc. I would like the output to be converted to a dataframe with one of the columns as the grouping variable. This seems simple but my attempts with do.call("cbind") and rbind have not worked so I have concluded I a missing something obvious. Any help is appreciated. Thanks, Mike areas <- structure (list(N_Type = structure(c(4, 1, 4, 1, 1, 4, 1, 4, 4, 1, 4, 1, 4, 1, 4, 1, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1, 4, 1, 2, 1, 2, 1, 4, 1, 4, 1, 4, 1, 4, 1, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1, 4, 1, 2, 1, 2, 1, 1, 4, 1, 4, 4, 1, 4, 1, 4, 1, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 1, 4, 1, 4, 2, 1, 2, 1, 1, 4, 1, 4, 1, 4, 4, 1, 4, 1), .Label = c("All", "Inside 370", "Not Applicable", "Outside 370" ), class = "factor"), AdRes = c(23.7, 23.7, 42.4, 42.4, 630, 630, 990, 990, 72.85, 72.85, 70.6, 70.6, 10, 10, 21.7, 21.7, 171.66, 171.66, 306, 306, 62.1, 62.1, 53.25, 53.25, 208, 208, 64.8, 64.8, 87.3, 87.3, 356, 356, 25.8, 25.8, 156, 156, 166, 166, 135.5, 135.5, 170.5, 170.5, 203, 203, 227.5, 227.5, 224, 224, 123, 123, 140.66, 140.66, 142.5, 142.5, 44.65, 44.65, 50.3, 50.3, 1320, 1320, 577, 577, 71.1, 71.1, 411, 411, 104, 104, 122, 122, 201, 201, 230, 230, 192, 192, 304, 304, 184.5, 184.5, 350, 350, 536, 536, 470.5, 470.5, 172, 172, 166, 166, 205, 205, 595, 595, 227.5, 227.5, 9.1, 9.1, 14.6, 14.6, 10.9, 10.9, 11.1, 11.1, 313.5, 313.5, 53.8, 53.8, 29.8, 29.8, 29.5, 29.5, 34.05, 34.05, 21.8, 21.8, 385.5, 385.5, 541, 541, 168, 168, 119, 119, 376, 376, 91.9, 91.9, 97.76, 97.76, 164, 164, 244, 244, 303.5, 303.5, 388, 388, 59.8, 59.8, 227.5, 227.5, 165, 165, 19.15, 19.15, 651, 651, 195, 195, 190, 190, 164, 164, 190, 190, 334, 334)), .Names c("N_Type", "AdRes"), row.names = c("8956", "8957", "8972", "8973", "8974", "8975", "8976", "8977", "8978", "8979", "8980", "8981", "8982", "8983", "8984", "8985", "9159", "9160", "9175", "9176", "9177", "9178", "9185", "9186", "9201", "9202", "9203", "9204", "9205", "9206", "9207", "9208", "9209", "9210", "9217", "9218", "9233", "9234", "9241", "9242", "9261", "9262", "9277", "9278", "9285", "9286", "9301", "9302", "9309", "9310", "9329", "9330", "9345", "9346", "9353", "9354", "9369", "9370", "9371", "9372", "9373", "9374", "9410", "9411", "9412", "9413", "9414", "9415", "9422", "9423", "9424", "9425", "9426", "9427", "9428", "9429", "9430", "9431", "9432", "9433", "9434", "9435", "9436", "9437", "9444", "9445", "9452", "9453", "9454", "9455", "9456", "9457", "9458", "9459", "9460", "9461", "9468", "9469", "9470", "9471", "9472", "9473", "9474", "9475", "9476", "9477", "9478", "9479", "9480", "9481", "9488", "9489", "9496", "9497", "9498", "9499", "9720", "9721", "9722", "9723", "9724", "9725", "9726", "9727", "9728", "9729", "9730", "9731", "9732", "9733", "9734", "9735", "9736", "9737", "9738", "9739", "9740", "9741", "9742", "9743", "9744", "9745", "9746", "9747", "9748", "9749", "9750", "9751", "9752", "9753", "9754", "9755", "9756", "9757", "9758", "9759", "9760", "9761"), class = "data.frame") Pstats <- function(x) { Max = max(x) Min = min(x) AMean = mean(x) AStdev = sd(x) Samples <- length(x) p10 <- quantile(x,0.1,na.rm = TRUE, names = FALSE) p20 <- quantile(x,0.2,na.rm = TRUE, names = FALSE) p30 <- quantile(x,0.3,na.rm = TRUE, names = FALSE) p40 <- quantile(x,0.4,na.rm = TRUE, names = FALSE) p50 <- quantile(x,0.5,na.rm = TRUE, names = FALSE) p60 <- quantile(x,0.6,na.rm = TRUE, names = FALSE) p70 <- quantile(x,0.7,na.rm = TRUE, names = FALSE) p80 <- quantile(x,0.8,na.rm = TRUE, names = FALSE) p90 <- quantile(x,0.9,na.rm = TRUE, names = FALSE) Result <- data.frame(Samples,AMean,AStdev, Min,Max,p10,p20,p30,p40,p50,p60,p70,p80,p90) return(Result) #write.table(Result, file = "Results.csv", sep = ",",row.names FALSE) } attach(areas) res <- by(areas, N_Type, function (x) (Pstats(AdRes))) #need to convert res to a dataframe Michael Bock, PhD ENVIRON International Corporation 136 Commercial Street, Suite 402 Portland, ME 04101 phone: 207.347.4413 fax: 207.347.4384 This message contains information that may be confidential, ...{{dropped}}
Try this: Pstats <- function(x) c(Max = max(x), Min = min(x), AMean = mean(x), AStdev = sd(x), Samples = length(x), quantile(x, 1:9/10, na.rm = TRUE)) res <- with(areas, by(AdRes, N_Type, Pstats)) do.call("rbind", res) Also, check out summaryBy in the doBy package at genetics.agrsci.dk/~sorenh/misc/index.html On 1/3/06, Mike Bock <mbock at environcorp.com> wrote:> I have written a few different summary functions. I want to calculate > the statistics by groups and I am having trouble getting the output as a > dataframe. I have attached one example with a small dataset that > calculates summary stats and percentiles, I have others that calculate > upper confidence limits etc. I would like the output to be converted to > a dataframe with one of the columns as the grouping variable. This seems > simple but my attempts with do.call("cbind") and rbind have not worked > so I have concluded I a missing something obvious. Any help is > appreciated. > > Thanks, > Mike > > > > areas <- structure (list(N_Type = structure(c(4, 1, 4, 1, 1, 4, 1, 4, 4, > 1, 4, 1, 4, 1, 4, 1, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1, > 4, 1, 4, 1, 4, 1, 4, 1, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, > 4, 1, 4, 4, 1, 4, 1, 2, 1, 2, 1, 4, 1, 4, 1, 4, 1, 4, 1, 1, 4, > 1, 4, 1, 4, 1, 4, 4, 1, 4, 1, 2, 1, 2, 1, 1, 4, 1, 4, 4, 1, 4, > 1, 4, 1, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1, > 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, > 1, 4, 1, 4, 1, 1, 4, 1, 4, 2, 1, 2, 1, 1, 4, 1, 4, 1, 4, 4, 1, > 4, 1), .Label = c("All", "Inside 370", "Not Applicable", "Outside 370" > ), class = "factor"), AdRes = c(23.7, 23.7, 42.4, 42.4, 630, > 630, 990, 990, 72.85, 72.85, 70.6, 70.6, 10, 10, 21.7, 21.7, > 171.66, 171.66, 306, 306, 62.1, 62.1, 53.25, 53.25, 208, 208, > 64.8, 64.8, 87.3, 87.3, 356, 356, 25.8, 25.8, 156, 156, 166, > 166, 135.5, 135.5, 170.5, 170.5, 203, 203, 227.5, 227.5, 224, > 224, 123, 123, 140.66, 140.66, 142.5, 142.5, 44.65, 44.65, 50.3, > 50.3, 1320, 1320, 577, 577, 71.1, 71.1, 411, 411, 104, 104, 122, > 122, 201, 201, 230, 230, 192, 192, 304, 304, 184.5, 184.5, 350, > 350, 536, 536, 470.5, 470.5, 172, 172, 166, 166, 205, 205, 595, > 595, 227.5, 227.5, 9.1, 9.1, 14.6, 14.6, 10.9, 10.9, 11.1, 11.1, > 313.5, 313.5, 53.8, 53.8, 29.8, 29.8, 29.5, 29.5, 34.05, 34.05, > 21.8, 21.8, 385.5, 385.5, 541, 541, 168, 168, 119, 119, 376, > 376, 91.9, 91.9, 97.76, 97.76, 164, 164, 244, 244, 303.5, 303.5, > 388, 388, 59.8, 59.8, 227.5, 227.5, 165, 165, 19.15, 19.15, 651, > 651, 195, 195, 190, 190, 164, 164, 190, 190, 334, 334)), .Names > c("N_Type", > "AdRes"), row.names = c("8956", "8957", "8972", "8973", "8974", > "8975", "8976", "8977", "8978", "8979", "8980", "8981", "8982", > "8983", "8984", "8985", "9159", "9160", "9175", "9176", "9177", > "9178", "9185", "9186", "9201", "9202", "9203", "9204", "9205", > "9206", "9207", "9208", "9209", "9210", "9217", "9218", "9233", > "9234", "9241", "9242", "9261", "9262", "9277", "9278", "9285", > "9286", "9301", "9302", "9309", "9310", "9329", "9330", "9345", > "9346", "9353", "9354", "9369", "9370", "9371", "9372", "9373", > "9374", "9410", "9411", "9412", "9413", "9414", "9415", "9422", > "9423", "9424", "9425", "9426", "9427", "9428", "9429", "9430", > "9431", "9432", "9433", "9434", "9435", "9436", "9437", "9444", > "9445", "9452", "9453", "9454", "9455", "9456", "9457", "9458", > "9459", "9460", "9461", "9468", "9469", "9470", "9471", "9472", > "9473", "9474", "9475", "9476", "9477", "9478", "9479", "9480", > "9481", "9488", "9489", "9496", "9497", "9498", "9499", "9720", > "9721", "9722", "9723", "9724", "9725", "9726", "9727", "9728", > "9729", "9730", "9731", "9732", "9733", "9734", "9735", "9736", > "9737", "9738", "9739", "9740", "9741", "9742", "9743", "9744", > "9745", "9746", "9747", "9748", "9749", "9750", "9751", "9752", > "9753", "9754", "9755", "9756", "9757", "9758", "9759", "9760", > "9761"), class = "data.frame") > > > Pstats <- function(x) > { > Max = max(x) > Min = min(x) > AMean = mean(x) > AStdev = sd(x) > Samples <- length(x) > p10 <- quantile(x,0.1,na.rm = TRUE, names = FALSE) > p20 <- quantile(x,0.2,na.rm = TRUE, names = FALSE) > p30 <- quantile(x,0.3,na.rm = TRUE, names = FALSE) > p40 <- quantile(x,0.4,na.rm = TRUE, names = FALSE) > p50 <- quantile(x,0.5,na.rm = TRUE, names = FALSE) > p60 <- quantile(x,0.6,na.rm = TRUE, names = FALSE) > p70 <- quantile(x,0.7,na.rm = TRUE, names = FALSE) > p80 <- quantile(x,0.8,na.rm = TRUE, names = FALSE) > p90 <- quantile(x,0.9,na.rm = TRUE, names = FALSE) > Result <- data.frame(Samples,AMean,AStdev, > Min,Max,p10,p20,p30,p40,p50,p60,p70,p80,p90) > return(Result) > #write.table(Result, file = "Results.csv", sep = ",",row.names > FALSE) > } > > attach(areas) > res <- by(areas, N_Type, function (x) > (Pstats(AdRes))) > > #need to convert res to a dataframe > > > > Michael Bock, PhD > ENVIRON International Corporation > 136 Commercial Street, Suite 402 > Portland, ME 04101 > phone: 207.347.4413 > fax: 207.347.4384 > > > > > This message contains information that may be confidential, ...{{dropped}} > > ______________________________________________ > R-help at stat.math.ethz.ch mailing list > stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide! R-project.org/posting-guide.html >