Full_Name: Chien-yu Peng Version: 2.0.1 OS: Windows XP Professional Submission from: (NULL) (140.109.72.181) Dear all: Although I don't know you, I am thankful for your help. When I use the function mantelhaen.test for R x C x K (R, C > 2) table, the output is not the same as SAS's. I don't know that the result consist with one of SAS's. But it works correctly for 2 x 2 x K table. In addition, the function mantelhaen.test does not contain such as test for general association, test for row means scores differ, and test for nonzero correlation in SAS. Could you tell me "Is it something wrong in the function (mantelhaen.test) for R x C x K (R, C > 2) table"? Therefore, I modify part of the function such that output looks as the same as SASs. mh.test <- function(x) { if (any(apply(x, 3, sum) < 2)) stop("sample size in each stratum must be > 1") I <- dim(x)[1]; J <- dim(x)[2]; K <- dim(x)[3] df_GMH <- (I - 1) * (J - 1); df_SMH <- I - 1; df_CSMH <- 1 A_GMH <- cbind(diag(I-1), 0) %x% cbind(diag(J-1), 0) A_SMH <- cbind(diag(I-1), 0) %x% t(1:J) A_CSMH <- t(1:I) %x% t(1:J) Y_GMH <- matrix(0, nc = 1, nr = df_GMH) Y_SMH <- matrix(0, nc = 1, nr = df_SMH) Y_CSMH <- matrix(0, nc = 1, nr = df_CSMH) S_GMH <- matrix(0, nc = df_GMH, nr = df_GMH) S_SMH <- matrix(0, nc = df_SMH, nr = df_SMH) S_CSMH <- matrix(0, nc = df_CSMH, nr = df_CSMH) for(k in 1:K) { V <- NULL f <- x[, , k] ntot <- sum(f) p_ip <- apply(f, 1, sum) / ntot p_pj <- apply(f, 2, sum) / ntot m <- p_ip %x% p_pj * ntot V <- ntot^2 * ((diag(p_ip) - p_ip %*% t(p_ip)) %x% (diag(p_pj) - p_pj %*% t(p_pj))) / (ntot-1) Y_GMH <- Y_GMH + A_GMH %*% (c(t(f)) - m) Y_SMH <- Y_SMH + A_SMH %*% (c(t(f)) - m) Y_CSMH <- Y_CSMH + A_CSMH %*% (c(t(f)) - m) S_GMH <- S_GMH + A_GMH %*% V %*% t(A_GMH) S_SMH <- S_SMH + A_SMH %*% V %*% t(A_SMH) S_CSMH <- S_CSMH + A_CSMH %*% V %*% t(A_CSMH) } Q_GMH <- t(Y_GMH) %*% solve(S_GMH) %*% Y_GMH Q_SMH <- t(Y_SMH) %*% solve(S_SMH) %*% Y_SMH Q_CSMH <- t(Y_CSMH) %*% solve(S_CSMH) %*% Y_CSMH STATISTIC <- c(Q_CSMH, Q_SMH, Q_GMH) PARAMETER <- c(df_CSMH, df_SMH, df_GMH) PVAL <- pchisq(STATISTIC, PARAMETER, lower = FALSE) result <- cbind(STATISTIC, PARAMETER, PVAL) rownames(result) <- list("Nonzero Correlation", "Row Mean Scores Differ", "General Association") colnames(result) <- list("Statistic", "df", "p-value") return (result) } x <- array(c(23,23,20,24,7,10,13,10,2,5,5,6,18,18,13,9,6,6,13,15,1,2,2,2,8,12,11,7,6,4,6,7,3,4,2,4,12,15,14,13,9,3,8,6,1,2,3,4), dim = c(4,3,4)) mh.test(x) Statistic df p-value Nonzero Correlation 6.34043 1 0.01180163 Row Mean Scores Differ 6.59013 3 0.08617498 General Association 10.59828 6 0.10161441
>>>>> chienyu writes:> Full_Name: Chien-yu Peng > Version: 2.0.1 > OS: Windows XP Professional > Submission from: (NULL) (140.109.72.181)> Dear all: > Although I don't know you, I am thankful for your help. > When I use the function mantelhaen.test for R x C x K (R, C > 2) table, > the output is not the same as SAS's. I don't know that the result consist with > one of SAS's. But it works correctly for 2 x 2 x K table. > In addition, the function mantelhaen.test does not contain such as test for > general association, test for row means scores differ, and test for nonzero > correlation in SAS.> Could you tell me "Is it something wrong in the function (mantelhaen.test) for R > x C x K (R, C > 2) table"?> Therefore, I modify part of the function such that output looks as the same as > SASs.> mh.test <- function(x) { > if (any(apply(x, 3, sum) < 2)) > stop("sample size in each stratum must be > 1") > I <- dim(x)[1]; J <- dim(x)[2]; K <- dim(x)[3] > df_GMH <- (I - 1) * (J - 1); df_SMH <- I - 1; df_CSMH <- 1 > A_GMH <- cbind(diag(I-1), 0) %x% cbind(diag(J-1), 0) > A_SMH <- cbind(diag(I-1), 0) %x% t(1:J) > A_CSMH <- t(1:I) %x% t(1:J) > Y_GMH <- matrix(0, nc = 1, nr = df_GMH) > Y_SMH <- matrix(0, nc = 1, nr = df_SMH) > Y_CSMH <- matrix(0, nc = 1, nr = df_CSMH) > S_GMH <- matrix(0, nc = df_GMH, nr = df_GMH) > S_SMH <- matrix(0, nc = df_SMH, nr = df_SMH) > S_CSMH <- matrix(0, nc = df_CSMH, nr = df_CSMH) > for(k in 1:K) { > V <- NULL > f <- x[, , k] > ntot <- sum(f) > p_ip <- apply(f, 1, sum) / ntot > p_pj <- apply(f, 2, sum) / ntot > m <- p_ip %x% p_pj * ntot > V <- ntot^2 * ((diag(p_ip) - p_ip %*% t(p_ip)) %x% (diag(p_pj) - p_pj > %*% t(p_pj))) / (ntot-1) > Y_GMH <- Y_GMH + A_GMH %*% (c(t(f)) - m) > Y_SMH <- Y_SMH + A_SMH %*% (c(t(f)) - m) > Y_CSMH <- Y_CSMH + A_CSMH %*% (c(t(f)) - m) > S_GMH <- S_GMH + A_GMH %*% V %*% t(A_GMH) > S_SMH <- S_SMH + A_SMH %*% V %*% t(A_SMH) > S_CSMH <- S_CSMH + A_CSMH %*% V %*% t(A_CSMH) > } > Q_GMH <- t(Y_GMH) %*% solve(S_GMH) %*% Y_GMH > Q_SMH <- t(Y_SMH) %*% solve(S_SMH) %*% Y_SMH > Q_CSMH <- t(Y_CSMH) %*% solve(S_CSMH) %*% Y_CSMH > STATISTIC <- c(Q_CSMH, Q_SMH, Q_GMH) > PARAMETER <- c(df_CSMH, df_SMH, df_GMH) > PVAL <- pchisq(STATISTIC, PARAMETER, lower = FALSE) > result <- cbind(STATISTIC, PARAMETER, PVAL) > rownames(result) <- list("Nonzero Correlation", "Row Mean Scores Differ", > "General Association") > colnames(result) <- list("Statistic", "df", "p-value") > return (result) > }> x <- array(c(23,23,20,24,7,10,13,10,2,5,5,6,18,18,13,9,6,6,13,15,1,2,2,2,8,12,11,7,6,4,6,7,3,4,2,4,12,15,14,13,9,3,8,6,1,2,3,4), > dim = c(4,3,4))> mh.test(x) > Statistic df p-value > Nonzero Correlation 6.34043 1 0.01180163 > Row Mean Scores Differ 6.59013 3 0.08617498 > General Association 10.59828 6 0.10161441This is already fixed in R-devel, to be released as R 2.1.0 on Apr 18: R> mantelhaen.test(x) Cochran-Mantel-Haenszel test data: x Cochran-Mantel-Haenszel M^2 = 10.5983, df = 6, p-value = 0.1016 as you get, whereas in 2.0.1, quite incorrectly Cochran-Mantel-Haenszel M^2 = 35.9735, df = 6, p-value = 2.790e-06 Best -k
Thanks for the report and code. There's two parts to this: a suggested enhancement to show components of the (R-1)(C-1) df "general association" test that are useful for ordinal variables, and a bug report on the test itself: R gives 35.9 and the suggested code (and apparently SAS) give 10.6. I'll look into the bug. To be a proper replacement for mantelhaen.test your code would need to return an object of class "htest", the way the other test functions do. If the function is going to compute the test statistics for ordinal data It would also be better to have the option to test for differences in column means rather than row means, and to specify scores. -thomas On Thu, 7 Apr 2005 chienyu@stat.sinica.edu.tw wrote:> Full_Name: Chien-yu Peng > Version: 2.0.1 > OS: Windows XP Professional > Submission from: (NULL) (140.109.72.181) > > > Dear all: > Although I don't know you, I am thankful for your help. > When I use the function mantelhaen.test for R x C x K (R, C > 2) table, > the output is not the same as SAS's. I don't know that the result consist with > one of SAS's. But it works correctly for 2 x 2 x K table. > In addition, the function mantelhaen.test does not contain such as test for > general association, test for row means scores differ, and test for nonzero > correlation in SAS. > > Could you tell me "Is it something wrong in the function (mantelhaen.test) for R > x C x K (R, C > 2) table"? > > Therefore, I modify part of the function such that output looks as the same as > SASs. > > mh.test <- function(x) { > if (any(apply(x, 3, sum) < 2)) > stop("sample size in each stratum must be > 1") > I <- dim(x)[1]; J <- dim(x)[2]; K <- dim(x)[3] > df_GMH <- (I - 1) * (J - 1); df_SMH <- I - 1; df_CSMH <- 1 > A_GMH <- cbind(diag(I-1), 0) %x% cbind(diag(J-1), 0) > A_SMH <- cbind(diag(I-1), 0) %x% t(1:J) > A_CSMH <- t(1:I) %x% t(1:J) > Y_GMH <- matrix(0, nc = 1, nr = df_GMH) > Y_SMH <- matrix(0, nc = 1, nr = df_SMH) > Y_CSMH <- matrix(0, nc = 1, nr = df_CSMH) > S_GMH <- matrix(0, nc = df_GMH, nr = df_GMH) > S_SMH <- matrix(0, nc = df_SMH, nr = df_SMH) > S_CSMH <- matrix(0, nc = df_CSMH, nr = df_CSMH) > for(k in 1:K) { > V <- NULL > f <- x[, , k] > ntot <- sum(f) > p_ip <- apply(f, 1, sum) / ntot > p_pj <- apply(f, 2, sum) / ntot > m <- p_ip %x% p_pj * ntot > V <- ntot^2 * ((diag(p_ip) - p_ip %*% t(p_ip)) %x% (diag(p_pj) - p_pj > %*% t(p_pj))) / (ntot-1) > Y_GMH <- Y_GMH + A_GMH %*% (c(t(f)) - m) > Y_SMH <- Y_SMH + A_SMH %*% (c(t(f)) - m) > Y_CSMH <- Y_CSMH + A_CSMH %*% (c(t(f)) - m) > S_GMH <- S_GMH + A_GMH %*% V %*% t(A_GMH) > S_SMH <- S_SMH + A_SMH %*% V %*% t(A_SMH) > S_CSMH <- S_CSMH + A_CSMH %*% V %*% t(A_CSMH) > } > Q_GMH <- t(Y_GMH) %*% solve(S_GMH) %*% Y_GMH > Q_SMH <- t(Y_SMH) %*% solve(S_SMH) %*% Y_SMH > Q_CSMH <- t(Y_CSMH) %*% solve(S_CSMH) %*% Y_CSMH > STATISTIC <- c(Q_CSMH, Q_SMH, Q_GMH) > PARAMETER <- c(df_CSMH, df_SMH, df_GMH) > PVAL <- pchisq(STATISTIC, PARAMETER, lower = FALSE) > result <- cbind(STATISTIC, PARAMETER, PVAL) > rownames(result) <- list("Nonzero Correlation", "Row Mean Scores Differ", > "General Association") > colnames(result) <- list("Statistic", "df", "p-value") > return (result) > } > > x <- array(c(23,23,20,24,7,10,13,10,2,5,5,6,18,18,13,9,6,6,13,15,1,2,2,2,8,12,11,7,6,4,6,7,3,4,2,4,12,15,14,13,9,3,8,6,1,2,3,4), > dim = c(4,3,4)) > > mh.test(x) > Statistic df p-value > Nonzero Correlation 6.34043 1 0.01180163 > Row Mean Scores Differ 6.59013 3 0.08617498 > General Association 10.59828 6 0.10161441 > > ______________________________________________ > R-devel@stat.math.ethz.ch mailing list > https://stat.ethz.ch/mailman/listinfo/r-devel >Thomas Lumley Assoc. Professor, Biostatistics tlumley@u.washington.edu University of Washington, Seattle