Marc Schwartz
2002-Jul-27 18:43 UTC
R Code for X-Tab with Row/Col Proportions, Expected Vals and Tests
Recently, I noted a post and replies on R-Help from Professor Marc Feldesman regarding a cross tabulation function that generates row and column proportions, marginal values, expected cell values and tests for independence presumably similar in a fashion to the output of the S-Plus crosstabs() function or the SAS Proc Freq. Martin Maechler had posted some code in reply for folks to update and translate. In parallel, out of my own needs for something functionally similar, I have been working for a while on a function that takes either a two-dimensional matrix or two vectors and generates just such a table, with output similar to the aforementioned S-Plus/SAS functions. The code for the function is attached. My goal would be to make this available to the R community as open source, either independently or perhaps if deemed appropriate, as a base function. I would appreciate any constructive criticism of the code itself or the output. My guess is that there may be more efficient means of performing some of the manipulations and/or perhaps being consistent with R coding and output standards. If there are any suggestions for improvement, I would be more than happy to incorporate these and make them available to the community. Thank you for your consideration and I look forward to any feedback. Best regards, Marc Schwartz
Marc Schwartz
2002-Jul-28 22:25 UTC
R Code for X-Tab with Row/Col Proportions, Expected Vals and Tests
Recently, I noted a post and replies on R-Help from Professor Marc Feldesman regarding a cross tabulation function that generates row and column proportions, marginal values, expected cell values and tests for independence presumably similar in a fashion to the output of the S-Plus crosstabs() function or SAS Proc Freq. Martin Maechler had posted some code in reply for folks to update and translate. In parallel, out of my own needs for something functionally similar, I have been working for a while on a function that takes either a two-dimensional matrix or two vectors and generates just such a table, with output similar to the aforementioned S-Plus/SAS functions. Column widths are adjusted based upon the dimnames and the digits argument and the printed table is labeled with vector names if used. The code for the function is below. If anyone wishes me to e-mail a text file containing the code, let me know. My goal would be to make this available to the R community as open source, either independently or perhaps if deemed appropriate, as a base function. I would appreciate any constructive criticism of the code itself or the output. My guess is that there may be more efficient means of performing some of the manipulations and/or perhaps being consistent with core R coding and output standards. If there are any suggestions for improvement, I would be more than happy to incorporate these and make them available to the community. Thank you for your consideration and I look forward to any feedback. Best regards, Marc Schwartz ------------------------------------------------ CrossTable <- function (x, y, digits = 3, expected = FALSE, correct = TRUE) { Syntax <- paste("\nSyntax:", "CrossTable(x, y, digits = 3, drop = TRUE,", " expected = FALSE, correct = TRUE)\n", "x: A vector in a matrix or dataframe OR", " if y not present, a two-dimension matrix", "y: A vector in a matrix or dataframe.", "digits: Number of digits after the decimal", " point for cell proportions", "expected: If TRUE, expected cell counts from the", " Chi^2 will be included.", "correct: If TRUE, the Yates correction will be", " applied in the Chi^2 test.", sep = "\n") # Do error checking if (missing(x)) stop(Syntax) if (missing(y)) { # if only x is specified, it must be a 2 dimensional matrix if (length(dim(x)) != 2) stop("x must be a 2 dimensional matrix if y is not given") if(any(dim(x) < 2)) stop("x must have at least 2 rows and columns") if(any(x < 0) || any(is.na(x))) stop("all entries of x must be nonnegative and finite") else t <- x } else { if(length(x) != length(y)) stop("x and y must have the same length") # Create Titles for Table From Vector Names RowData <- deparse(substitute(x)) ColData <- deparse(substitute(y)) # Remove unused factor levels from vectors x <- factor(x) y <- factor(y) if((nlevels(x) < 2) || (nlevels(y) < 2)) stop("x and y must have at least 2 levels") # Generate table t <- table(x, y) } # Generate cell proportion of row CPR <- prop.table(t, 1) # Generate cell proportion of col CPC <- prop.table(t, 2) # Generate cell proportion of total CPT <- prop.table(t) # Generate summary counts GT <- sum(t) RS <- rowSums(t) CS <- colSums(t) # Column and Row Total Headings ColTotal <- "Column Total" RowTotal <- "Row Total" # Set consistent col widths based upon dimnames and table vals CWidth <- max(digits + 2, c(nchar(t), nchar(dimnames(t)[[2]]), nchar(RS), nchar(CS), nchar(RowTotal))) RWidth <- max(c(nchar(dimnames(t)[[1]]), nchar(ColTotal))) # Adjust first column width if Data Titles present if (exists("RowData")) RWidth <- max(RWidth, nchar(RowData)) # Create row separators RowSep <- paste(rep("-", CWidth + 2), collapse = "") RowSep1 <- paste(rep("-", RWidth + 1), collapse = "") SpaceSep1 <- paste(rep(" ", RWidth), collapse = "") SpaceSep2 <- paste(rep(" ", CWidth), collapse = "") # Create formatted Names FirstCol <- formatC(dimnames(t)[[1]], width = RWidth, format = "s") ColTotal <- formatC(ColTotal, width = RWidth, format = "s") RowTotal <- formatC(RowTotal, width = CWidth, format = "s") # Perform Chi-Square Test CST <- chisq.test(t, correct = correct) # Perform Fisher Tests FTt <- fisher.test(t, alternative = "two.sided") # If 2 x 2, include one tailed values if (all(dim(t) == 2)) { FTl <- fisher.test(t, alternative = "less") FTg <- fisher.test(t, alternative = "greater") } # Print Cell Layout cat(rep("\n", 2)) cat("|-----------------|\n") cat("| N |\n") if (expected) cat("| Expected N |\n") cat("| N / Row Total |\n") cat("| N / Col Total |\n") cat("| N / Table Total |\n") cat("|-----------------|\n") cat(rep("\n", 2)) cat("Total Observations in Table: ", GT, "\n") cat(rep("\n", 2)) # Print Column headings # print vector names if present if (exists("RowData")) { cat(SpaceSep1, "|", ColData, "\n") cat(formatC(RowData, width = RWidth, format = "s"), formatC(dimnames(t)[[2]], width = CWidth, format = "s"), RowTotal, sep = " | ", collapse = "\n") } else cat(SpaceSep1, formatC(dimnames(t)[[2]], width = CWidth, format = "s"), RowTotal, sep = " | ", collapse = "\n") cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse = "\n") # Print table cells for (i in 1:nrow(t)) { # print N cat(FirstCol[i], formatC(c(t[i, ], RS[i]), width = CWidth), sep = " | ", collapse = "\n") # print Expected N? if (expected) cat(SpaceSep1, formatC(CST$expected[i, ], digits = digits, format = "f", width = CWidth), SpaceSep2, sep = " | ", collapse = "\n") # print cell row proportions cat(SpaceSep1, formatC(c(CPR[i, ], RS[i] / GT), width = CWidth, digits = digits, format = "f"), sep = " | ", collapse = "\n") # print cell col proportions cat(SpaceSep1, formatC(CPC[i, ], width = CWidth, digits = digits, format = "f"), SpaceSep2, sep = " | ", collapse = "\n") # print cell Table proportions cat(SpaceSep1, formatC(CPT[i, ], width = CWidth, digits = digits, format = "f"), SpaceSep2, sep = " | ", collapse = "\n") # print row separator cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse = "\n") } # Print Column Totals cat(ColTotal, formatC(c(CS, GT), width = CWidth), sep = " | ", collapse = "\n") # Print col proportions cat(SpaceSep1, formatC(CS / GT, width = CWidth, digits = digits, format = "f"), SpaceSep2, sep = " | ", collapse = "\n") cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse = "\n") # Print Statistics cat(rep("\n", 2)) cat("Tests for Independence of All Table Factors\n\n\n") cat(CST$method,"\n\n") cat("Chi^2 = ", CST$statistic, " d.f. = ", CST$parameter, " p = ", CST$p.value, "\n") cat(rep("\n", 2)) cat("Fisher's Exact Test for Count Data\n\n") # if 2 x 2 table print one and two-tailed values if (all(dim(t) == 2)) { cat("Sample estimate odds ratio: ", FTt$estimate, "\n\n") cat("Alternative hypothesis: true odds ratio is not equal to 1\n") cat("p = ", FTt$p.value, "\n") cat("95% confidence interval: ", FTt$conf.int, "\n\n") cat("Alternative hypothesis: true odds ratio is less than 1\n") cat("p = ", FTl$p.value, "\n") cat("95% confidence interval: ", FTl$conf.int, "\n\n") cat("Alternative hypothesis: true odds ratio is greater than 1\n") cat("p = ", FTg$p.value, "\n") cat("95% confidence interval: ", FTg$conf.int, "\n\n") } else { cat("Alternative hypothesis: two.sided\n") cat("p = ", FTt$p.value, "\n") } cat(rep("\n", 2)) } -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.- r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html Send "info", "help", or "[un]subscribe" (in the "body", not the subject !) To: r-devel-request@stat.math.ethz.ch _._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._