This is an OpenPGP/MIME signed message (RFC 2440 and 3156)
--------------enig5A700CC15DEABD01445A1B70
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: quoted-printable
Hi,
I have modified the code originally posted to include capability for
arbitrary weightings for the rows and columns (which apply in the
nominal-ordinal and the ordinal-ordinal cases). The code is pasted below
(as well as an example based on job satisfaction data).
However, I cannot figure out how to make the output of class "htest"
make nicely-formatted output. When I use the class htest, with a list of
df's, test statistics, etc, the printed output looks ugly. (See the last
few commented lines at the end of the function for my aborted attempt to
do this. To see what happens when that code is used, comment out the
last four uncommented lines (the ones that mention variable "result"),
and uncomment all the commented lines at the end.)
I am an R newbie by any standard, so if anyone could clue me in on how
to use htest in the case of multiple statistics and dfs, that would be
great.
Once we have that, does that mean that this can serve as a good
replacement for mantelhaen.test() in R?
Any comments appreciated!
code follows:
------
# takes a 3-d array x, scores for rows, and scores for columns
# runs nominal-nominal, ordinal-nominal, and ordinal-ordinal CMH tests
# this function is based on one posted at
# http://bugs.r-project.org/cgi-bin/R/wishlist?id=3D7779;user=3Dguest
mh.test <- function(x, row_scores=3DNULL, col_scores=3DNULL) {
if (length(dim(x)) !=3D 3){
stop("x must be a 3 dimensional array")
}
if (any(apply(x, 3, sum) < 2)){
stop("sample size in each stratum must be > 1")
}
I <- dim(x)[1]; J <- dim(x)[2]; K <- dim(x)[3]
if ( (length(row_scores) !=3D I & !is.null(row_scores) ) |
(length(col_scores) !=3D J & !is.null(col_scores)) ){
stop("scores dimensions must equal to data dimensions")
}
if (is.null(row_scores)){
row_scores =3D 1:I
}
if (is.null(col_scores)) {
col_scores =3D 1:J
}
df_GMH <- (I - 1) * (J - 1); df_SMH <- I - 1; df_CSMH <- 1
A_GMH <- cbind(diag(I-1), 0) %x% cbind(diag(J-1), 0)
#A_SMH <- cbind(diag(I-1), 0) %x% t(1:J)
A_SMH <- cbind(diag(I-1), 0) %x% t(col_scores)
#A_CSMH <- t(1:I) %x% t(1:J)
A_CSMH <- t(row_scores) %x% t(col_scores)
Y_GMH <- matrix(0, nc =3D 1, nr =3D df_GMH)
Y_SMH <- matrix(0, nc =3D 1, nr =3D df_SMH)
Y_CSMH <- matrix(0, nc =3D 1, nr =3D df_CSMH)
S_GMH <- matrix(0, nc =3D df_GMH, nr =3D df_GMH)
S_SMH <- matrix(0, nc =3D df_SMH, nr =3D df_SMH)
S_CSMH <- matrix(0, nc =3D df_CSMH, nr =3D df_CSMH)
for(k in 1:K) {
V <- NULL
f <- x[, , k]
ntot <- sum(f)
p_ip <- apply(f, 1, sum) / ntot
p_pj <- apply(f, 2, sum) / ntot
m <- p_ip %x% p_pj * ntot
V <- ntot^2 * ((diag(p_ip) - p_ip %*% t(p_ip)) %x% (diag(p_pj) -
p_pj %*% t(p_pj))) / (ntot-1)
Y_GMH <- Y_GMH + A_GMH %*% (c(t(f)) - m)
Y_SMH <- Y_SMH + A_SMH %*% (c(t(f)) - m)
Y_CSMH <- Y_CSMH + A_CSMH %*% (c(t(f)) - m)
S_GMH <- S_GMH + A_GMH %*% V %*% t(A_GMH)
S_SMH <- S_SMH + A_SMH %*% V %*% t(A_SMH)
S_CSMH <- S_CSMH + A_CSMH %*% V %*% t(A_CSMH)
}
Q_GMH <- t(Y_GMH) %*% solve(S_GMH) %*% Y_GMH
Q_SMH <- t(Y_SMH) %*% solve(S_SMH) %*% Y_SMH
Q_CSMH <- t(Y_CSMH) %*% solve(S_CSMH) %*% Y_CSMH
STATISTIC <- c(Q_CSMH, Q_SMH, Q_GMH)
PARAMETER <- c(df_CSMH, df_SMH, df_GMH)
PVAL <- pchisq(STATISTIC, PARAMETER, lower =3D FALSE)
result <- cbind(STATISTIC, PARAMETER, PVAL)
rownames(result) <- list("Nonzero Correlation", "Row Mean
Scores
Differ", "General Association")
colnames(result) <- list("Statistic", "df",
"p-value")
return (result)
#DNAME <- deparse(substitute(x))
#METHOD =3D "Cochran-Mantel-Haenszel Statistics (Based on Table
Scores)"
#names(STATISTIC) =3D list("Nonzero Correlation", "Row Mean
Scores
Differ", "General Association")
#names(PARAMETER) =3D list("df", "df", "df")
#structure(list(statistic =3D STATISTIC, parameter =3D PARAMETER,
p.value =3D PVAL, method =3D METHOD, data.name=3DDNAME), class =3D
"power=2Ehtest")
#data.name =3D DNAME, observed =3D x, expected =3D E, residuals =3D (x -
E)/sqrt(E)), class =3D "htest")
}
--------test results:--------> Satisfaction <-
+ as.table(array(c(1, 2, 0, 0, 3, 3, 1, 2,
+ 11, 17, 8, 4, 2, 3, 5, 2,
+ 1, 0, 0, 0, 1, 3, 0, 1,
+ 2, 5, 7, 9, 1, 1, 3, 6),
+ dim =3D c(4, 4, 2),
+ dimnames =3D
+ list(Income =3D
+ c("<5000", "5000-15000",
+ "15000-25000", ">25000"),
+ "JobSatisfaction" =3D
+ c("V_D", "L_S", "M_S",
"V_S"),
+ Gender =3D c("Female",
"Male"))))> Satisfaction
, , Gender =3D Female
JobSatisfaction
Income V_D L_S M_S V_S
<5000 1 3 11 2
5000-15000 2 3 17 3
15000-25000 0 1 8 5
>25000 0 2 4 2
, , Gender =3D Male
JobSatisfaction
Income V_D L_S M_S V_S
<5000 1 1 2 1
5000-15000 0 3 5 1
15000-25000 0 0 7 3
>25000 0 1 9 6> mh.test(Satisfaction, c(3,10,20,35), c(1,3,4,5))
Statistic df p-value
Nonzero Correlation 6.156301 1 0.01309447
Row Mean Scores Differ 9.034222 3 0.02883933
General Association 10.200089 9 0.33453118
--------------enig5A700CC15DEABD01445A1B70
Content-Type: application/pgp-signature; name="signature.asc"
Content-Description: OpenPGP digital signature
Content-Disposition: attachment; filename="signature.asc"
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.2.2 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org
iD8DBQFFRqs35/k4vslVlLIRAg/5AJ93AGEdIakk7T27coDij6jAdo7n6gCeNP/0
YoIcLAawlVvg4yILUXHJZM0=+GEH
-----END PGP SIGNATURE-----
--------------enig5A700CC15DEABD01445A1B70--