On Thu, 2005-06-23 at 23:12 +0800, ronggui wrote:> i have a data frame(dat) which has many variables.and i use the
> following script to get the crosstable.
>
>
>danx2<-c("x1.1","x1.2","x1.3","x1.4","x1.5","x2","x4","x5","x6","x7","x8.1","x8.2","x8.3","x8.4","x11",
>
"x13","x17","x19","x20","x21")
>
>indep<-c("x23","x24","x25","x26","x27","x28.1","x28.2","x29")
> >for (k in indep){
> for (i in danx2){
> a<-chisq.test(dat[,i],dat[,k])$p.v<=0.05
> if (a)
>
{CrossTable(dat[,i],dat[,k],chisq=T,format="SPSS");cat(rep("=",50),"\n","\n")}
> }
>
> it has a little pitfall:the dimnames of table is dat[,i] and
> dat[,k],but i want it to be like x2,x23...
> is there any good way to do this?
> and in the command
CrossTable(dat[,i],dat[,k],chisq=T,format="SPSS")
> in the loop,is there any other way to get the variable other than
> dat[,i] and dat[,k]?
> thank you !
Hi,
I am in between meetings here. Sorry for the delay in my reply to your
query.
The best solution is for me to add two new args to CrossTable() to allow
you to specify these names explicitly, rather than having them as the
way they are now, which simply takes the x and y args and does:
RowData <- deparse(substitute(x))
ColData <- deparse(substitute(y))
The result is that whatever is passed as the x and y arguments, will be
used as the titles for the row and column labels as you have noted.
In the mean time, I am attaching an update to CrossTable (which I have
not extensively tested yet), that you can source() into R via the
console. The update has two new args called "RowData" and
"ColData"
which will default to NULL, so as to not impact current default
behavior. You can then set these as part of your loop by passing the
index values.
Using one of the examples in ?CrossTable:
> CrossTable(infert$education, infert$induced, RowData =
"Education",
ColData = "Induced")
Cell Contents
|-------------------------|
| N |
| Chi-square contribution |
| N / Row Total |
| N / Col Total |
| N / Table Total |
|-------------------------|
Total Observations in Table: 248
| Induced
Education | 0 | 1 | 2 | Row Total |
-------------|-----------|-----------|-----------|-----------|
0-5yrs | 4 | 2 | 6 | 12 |
| 1.232 | 0.506 | 9.898 | |
| 0.333 | 0.167 | 0.500 | 0.048 |
| 0.028 | 0.029 | 0.162 | |
| 0.016 | 0.008 | 0.024 | |
-------------|-----------|-----------|-----------|-----------|
6-11yrs | 78 | 27 | 15 | 120 |
| 1.121 | 1.059 | 0.471 | |
| 0.650 | 0.225 | 0.125 | 0.484 |
| 0.545 | 0.397 | 0.405 | |
| 0.315 | 0.109 | 0.060 | |
-------------|-----------|-----------|-----------|-----------|
12+ yrs | 61 | 39 | 16 | 116 |
| 0.518 | 1.627 | 0.099 | |
| 0.526 | 0.336 | 0.138 | 0.468 |
| 0.427 | 0.574 | 0.432 | |
| 0.246 | 0.157 | 0.065 | |
-------------|-----------|-----------|-----------|-----------|
Column Total | 143 | 68 | 37 | 248 |
| 0.577 | 0.274 | 0.149 | |
-------------|-----------|-----------|-----------|-----------|
Let me know if this works or you find a problem. I will do further
testing here as soon as time permits and get an update to Greg and Nitin
to include into gregmisc.
HTH,
Marc Schwartz
-------------- next part --------------
CrossTable <- function (x, y,
digits = 3,
max.width = 5,
expected = FALSE,
prop.r = TRUE,
prop.c = TRUE,
prop.t = TRUE,
prop.chisq=TRUE,
chisq = FALSE,
fisher = FALSE,
mcnemar = FALSE,
resid = FALSE,
sresid = FALSE,
asresid = FALSE,
missing.include = FALSE,
format=c("SAS","SPSS"),
RowData=NULL,
ColData=NULL,
...
)
{
format=match.arg(format)
## Ensure that max.width >= 1
if (max.width < 1)
stop("max.width must be >= 1")
## Set 'x' vector flag
vector.x <- FALSE
## Ensure that if (expected), a chisq is done
if (expected)
chisq <- TRUE
if (missing(y))
{
## is x a vector?
if (is.null(dim(x)))
{
#TotalN <- length(x)
if (missing.include)
x <- factor(x,exclude=NULL)
else
## Remove any unused factor levels
x <- factor(x)
t <- t(as.matrix(table(x)))
vector.x <- TRUE
}
## is x a matrix?
else if (length(dim(x) == 2))
{
if(any(x < 0) || any(is.na(x)))
stop("all entries of x must be nonnegative and finite")
## Add generic dimnames if required
## check each dimname separately, in case user has defined one or the
other
if (is.null(rownames(x)))
rownames(x) <- paste("[", 1:nrow(x), ",]",
sep = "")
if (is.null(colnames(x)))
colnames(x) <- paste("[,", 1:ncol(x), "]",
sep = "")
t <- x
}
else
stop("x must be either a vector or a 2 dimensional matrix, if y is
not given")
}
else
{
if(length(x) != length(y))
stop("x and y must have the same length")
## Create Titles for Table From Vector Names
if(is.null(RowData))
RowData <- deparse(substitute(x))
if (is.null(ColData))
ColData <- deparse(substitute(y))
if (missing.include)
{
x <- factor(x,exclude=c())
y <- factor(y,exclude=c())
}
else
{
## Remove unused factor levels from vectors
x <- factor(x)
y <- factor(y)
}
## Generate table
t <- table(x, y)
}
## if t is not at least a 2 x 2, do not do stats
## even if any set to TRUE. Do not do col/table props
if (any(dim(t) < 2))
{
prop.c <- prop.r <- prop.chisq <- chisq <- expected <-
fisher <- mcnemar <- FALSE
}
## Generate cell proportion of row
CPR <- prop.table(t, 1)
## Generate cell proportion of col
CPC <- prop.table(t, 2)
## Generate cell proportion of total
CPT <- prop.table(t)
## Generate summary counts
GT <- sum(t)
RS <- rowSums(t)
CS <- colSums(t)
if (length(dim(x) == 2))
TotalN <- GT
else
TotalN <- length(x)
## Column and Row Total Headings
ColTotal <- "Column Total"
RowTotal <- "Row Total"
## Set consistent column widths based upon dimnames and table values
CWidth <- max(digits + 2, c(nchar(t), nchar(dimnames(t)[[2]]), nchar(RS),
nchar(CS), nchar(RowTotal)))
RWidth <- max(c(nchar(dimnames(t)[[1]]), nchar(ColTotal)))
## Adjust first column width if Data Titles present
if (exists("RowData"))
RWidth <- max(RWidth, nchar(RowData))
## Create row separators
RowSep <- paste(rep("-", CWidth + 2), collapse = "")
RowSep1 <- paste(rep("-", RWidth + 1), collapse = "")
SpaceSep1 <- paste(rep(" ", RWidth), collapse = "")
SpaceSep2 <- paste(rep(" ", CWidth), collapse = "")
## Create formatted Names
FirstCol <- formatC(dimnames(t)[[1]], width = RWidth, format =
"s")
ColTotal <- formatC(ColTotal, width = RWidth, format = "s")
RowTotal <- formatC(RowTotal, width = CWidth, format = "s")
## Perform Chi-Square Tests
## Needs to be before the table output, in case (expected = TRUE)
if (chisq)
{
if (all(dim(t) == 2))
CSTc <- chisq.test(t, correct = TRUE, ...)
CST <- chisq.test(t, correct = FALSE, ...)
}
else
CST <- suppressWarnings(chisq.test(t, correct = FALSE))
if (asresid & !vector.x)
ASR <- (CST$observed-CST$expected)/sqrt(CST$expected*((1-RS/GT) %*%
t(1-CS/GT)))
print.CrossTable.SAS <- function()
{
if (exists("RowData"))
{
cat(SpaceSep1, "|", ColData, "\n")
cat(formatC(RowData, width = RWidth, format= "s"),
formatC(dimnames(t)[[2]], width = CWidth, format = "s"),
RowTotal, sep = " | ", collapse = "\n")
}
else
cat(SpaceSep1, formatC(dimnames(t)[[2]], width = CWidth,
format = "s"), RowTotal, sep = " |
",
collapse = "\n")
cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse =
"\n")
## Print table cells
for (i in 1:nrow(t))
{
cat(FirstCol[i], formatC(c(t[i, ], RS[i]), width = CWidth, format =
"d"),
sep = " | ", collapse = "\n")
if (expected)
cat(SpaceSep1, formatC(CST$expected[i, ], digits = digits,
format = "f", width = CWidth),
SpaceSep2, sep = " | ",
collapse = "\n")
if (prop.chisq)
cat(SpaceSep1, formatC((((CST$expected[i, ]-t[i,
])^2)/CST$expected[i, ]),
width = CWidth, digits = digits, format = "f"),
SpaceSep2,
sep = " | ", collapse = "\n")
if (prop.r)
cat(SpaceSep1, formatC(c(CPR[i, ], RS[i]/GT),
width = CWidth, digits = digits, format =
"f"),
sep = " | ", collapse = "\n")
if (prop.c)
cat(SpaceSep1, formatC(CPC[i, ], width = CWidth,
digits = digits, format = "f"),
SpaceSep2,
sep = " | ", collapse = "\n")
if (prop.t)
cat(SpaceSep1, formatC(CPT[i, ], width = CWidth,
digits = digits, format = "f"),
SpaceSep2,
sep = " | ", collapse = "\n")
cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|",
collapse = "\n")
}
## Print Column Totals
cat(ColTotal, formatC(c(CS, GT), width = CWidth, format = "d"),
sep = " | ",
collapse = "\n")
if (prop.c)
cat(SpaceSep1, formatC(CS/GT, width = CWidth, digits = digits,
format = "f"), SpaceSep2, sep = "
| ", collapse = "\n")
cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse =
"\n")
} ## End Of print.Crosstable.SAS function
print.CrossTable.SPSS <- function()
{
## similar to SPSS behaviour
## Print Column headings
if (exists("RowData"))
{
cat(SpaceSep1, "|", ColData, "\n")
cat(cat(formatC(RowData, width = RWidth, format =
"s"),sep=" | ",
collapse=""),
cat(formatC(dimnames(t)[[2]], width = CWidth-1, format =
"s"),
sep=" | ", collapse=""),
cat(RowTotal, sep = " | ", collapse = "\n"),
sep="", collapse="")
}
else
cat(SpaceSep1, formatC(dimnames(t)[[2]], width = CWidth, format =
"s"), RowTotal,
sep = " | ", collapse = "\n")
cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse =
"\n")
## Print table cells
for (i in 1:nrow(t))
{
cat(cat(FirstCol[i], sep=" | ", collapse=""),
cat(formatC(c(t[i, ], RS[i]), width = CWidth-1, format =
"d"),
sep = " | ", collapse = "\n"),
sep="", collapse="")
if (expected)
cat(cat(SpaceSep1, sep=" | ", collapse=""),
cat(formatC(CST$expected[i, ], digits = digits, format =
"f",
width = CWidth-1), sep=" | ",
collapse=""),
cat(SpaceSep2, sep = " | ", collapse =
"\n"), sep="", collapse="")
if (prop.chisq)
cat(cat(SpaceSep1, sep=" | ", collapse=""),
cat(formatC((((CST$expected[i, ]-t[i, ])^2)/CST$expected[i, ]),
digits = digits, format = "f",
width = CWidth-1), sep=" | ",
collapse=""),
cat(SpaceSep2, sep = " | ", collapse =
"\n"), sep="", collapse="")
if (prop.r)
cat(cat(SpaceSep1, sep=" | ", collapse=""),
cat(formatC(c(CPR[i, ]*100, 100*RS[i] / GT),
width = CWidth-1, digits = digits, format =
"f"),
sep = "% | ", collapse = "\n"),
sep="", collapse="")
if (prop.c)
cat(cat(SpaceSep1, sep=" | ", collapse=""),
cat(formatC(CPC[i, ]*100, width = CWidth-1,
digits = digits, format = "f"),
sep="% | ", collapse=""),
cat(SpaceSep2, sep = " | ", collapse =
"\n"), sep="", collapse="")
if (prop.t)
cat(cat(SpaceSep1, sep=" | ", collapse=""),
cat(formatC(CPT[i, ]*100, width = CWidth-1, digits = digits,
format = "f"), sep="% | ",
collapse=""),
cat(SpaceSep2, sep = " | ", collapse =
"\n"), sep="", collapse="")
if (resid)
cat(cat(SpaceSep1,sep=" | ",collapse = ""),
cat(formatC(CST$observed[i, ]-CST$expected[i, ], digits =
digits,
format = "f", width = CWidth-1), sep =
" | ",
collapse = ""),
cat(SpaceSep2,sep = " | ", collapse =
"\n"),sep="",collapse="")
if (sresid)
cat(cat(SpaceSep1,sep=" | ",collapse = ""),
cat(formatC(CST$residual[i, ], digits = digits,
format = "f", width = CWidth-1), sep =
" | ",
collapse = ""),
cat(SpaceSep2,sep = " | ", collapse =
"\n"),sep="",collapse="")
if (asresid)
cat(cat(SpaceSep1,sep=" | ",collapse = ""),
cat(formatC(ASR[i, ], digits = digits,
format = "f", width = CWidth-1), sep =
" | ",
collapse = ""),
cat(SpaceSep2,sep = " | ", collapse =
"\n"),sep="",collapse="")
cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse =
"\n")
}
## Print Column Totals
cat(cat(ColTotal,sep=" | ",collapse=""),
cat(formatC(c(CS, GT), width = CWidth-1, format = "d"), sep
= " | ",
collapse = "\n"),sep="",collapse="")
if (prop.c)
cat(cat(SpaceSep1,sep=" | ",collapse=""),
cat(formatC(100*CS/GT, width = CWidth-1, digits = digits,
format = "f"),sep = "% | ", collapse
= ""),
cat(SpaceSep2,sep = " | ", collapse =
"\n"),sep="",collapes="")
cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse =
"\n")
} ## End of print.CrossTable.SPSS function
## Print Function For 1 X N Vector In SAS Format
print.CrossTable.vector.SAS <- function()
{
if (length(t) > max.width)
{
## set breakpoints for output based upon max.width
final.row <- length(t) %% max.width
max <- length(t) - final.row
## Define breakpoint indices for each row
start <- seq(1, max, max.width)
end <- start + (max.width - 1)
## Add final.row if required
if (final.row > 0)
{
start <- c(start, end[length(end)] + 1)
end <- c(end, end[length(end)] + final.row)
}
}
else
{
## Each value printed horizontally in a single row
start <- 1
end <- length(t)
}
SpaceSep3 <- paste(SpaceSep2, " ", sep = "")
for (i in 1:length(start))
{
## print column labels
cat(SpaceSep2, formatC(dimnames(t)[[2]][start[i]:end[i]], width =
CWidth, format = "s"),
sep = " | ", collapse = "\n")
cat(SpaceSep3, rep(RowSep, (end[i] - start[i]) + 1), sep =
"|", collapse = "\n")
cat(SpaceSep2, formatC(t[, start[i]:end[i]], width = CWidth, format =
"d"), sep = " | ", collapse = "\n")
cat(SpaceSep2, formatC(CPT[, start[i]:end[i]], width = CWidth, digits
= digits, format = "f"),
sep = " | ", collapse = "\n")
cat(SpaceSep3, rep(RowSep, (end[i] - start[i]) + 1), sep =
"|", collapse = "\n")
cat("\n\n")
}
} ## End of print.Crosstable.vector.SAS function
## Print function for 1 X N vector in SPSS format
print.CrossTable.vector.SPSS <- function()
{
if (length(t) > max.width)
{
## set breakpoints for output based upon max.width
final.row <- length(t) %% max.width
max <- length(t) - final.row
## Define breakpoint indices for each row
start <- seq(1, max, max.width)
end <- start + (max.width - 1)
## Add final.row if required
if (final.row > 0)
{
start <- c(start, end[length(end)] + 1)
end <- c(end, end[length(end)] + final.row)
}
}
else
{
## Each value printed horizontally in a single row
start <- 1
end <- length(t)
}
SpaceSep3 <- paste(SpaceSep2, " ", sep = "")
for (i in 1:length(start))
{
cat(cat(SpaceSep2,sep=" | ",collapse=""),
cat(formatC(dimnames(t)[[2]][start[i]:end[i]],
width = CWidth-1, format = "s"), sep =
" | ", collapse = "\n"),
sep="",collapse="")
cat(SpaceSep3, rep(RowSep, (end[i] - start[i]) +
1), sep = "|", collapse = "\n")
cat(cat(SpaceSep2,sep=" | ",collapse=""),
cat(formatC(t[, start[i]:end[i]], width = CWidth-1, format =
"d"),
sep = " | ", collapse = "\n"),
sep="",collapse="")
cat(cat(SpaceSep2, sep=" | ",collapse=""),
cat(formatC(CPT[, start[i]:end[i]], width = CWidth-1,
digits = digits, format = "f"), sep =
"% | ",
collapse =
""),sep="",collapse="\n")
cat(SpaceSep3, rep(RowSep, (end[i] - start[i]) +
1), sep = "|", collapse = "\n")
} ## End of for (i in 1:length(start))
if (GT < TotalN)
cat("\nNumber of Missing Observations: ",TotalN-GT,"
(",100*(TotalN-GT)/TotalN,"%)\n",sep="")
} ## End of print.CrossTable.vector.SPSS Function
print.statistics <- function()
{
## Print Statistics
if (chisq)
{
cat(rep("\n", 2))
cat("Statistics for All Table Factors\n\n\n")
cat(CST$method,"\n")
cat("------------------------------------------------------------\n")
cat("Chi^2 = ", CST$statistic, " d.f. = ",
CST$parameter, " p = ", CST$p.value, "\n\n")
if (all(dim(t) == 2))
{
cat(CSTc$method,"\n")
cat("------------------------------------------------------------\n")
cat("Chi^2 = ", CSTc$statistic, " d.f. = ",
CSTc$parameter, " p = ", CSTc$p.value, "\n")
}
}
## Perform McNemar tests
if (mcnemar)
{
McN <- mcnemar.test(t, correct = FALSE)
cat(rep("\n", 2))
cat(McN$method,"\n")
cat("------------------------------------------------------------\n")
cat("Chi^2 = ", McN$statistic, " d.f. = ",
McN$parameter, " p = ", McN$p.value, "\n\n")
if (all(dim(t) == 2))
{
McNc <- mcnemar.test(t, correct = TRUE)
cat(McNc$method,"\n")
cat("------------------------------------------------------------\n")
cat("Chi^2 = ", McNc$statistic, " d.f. = ",
McNc$parameter, " p = ", McNc$p.value, "\n")
}
}
## Perform Fisher Tests
if (fisher)
{
cat(rep("\n", 2))
FTt <- fisher.test(t, alternative = "two.sided")
if (all(dim(t) == 2))
{
FTl <- fisher.test(t, alternative = "less")
FTg <- fisher.test(t, alternative = "greater")
}
cat("Fisher's Exact Test for Count Data\n")
cat("------------------------------------------------------------\n")
if (all(dim(t) == 2))
{
cat("Sample estimate odds ratio: ", FTt$estimate,
"\n\n")
cat("Alternative hypothesis: true odds ratio is not equal to
1\n")
cat("p = ", FTt$p.value, "\n")
cat("95% confidence interval: ", FTt$conf.int,
"\n\n")
cat("Alternative hypothesis: true odds ratio is less than
1\n")
cat("p = ", FTl$p.value, "\n")
cat("95% confidence interval: ", FTl$conf.int,
"\n\n")
cat("Alternative hypothesis: true odds ratio is greater than
1\n")
cat("p = ", FTg$p.value, "\n")
cat("95% confidence interval: ", FTg$conf.int,
"\n\n")
}
else
{
cat("Alternative hypothesis: two.sided\n")
cat("p = ", FTt$p.value, "\n")
}
} ## End Of If(Fisher) Loop
cat(rep("\n", 2))
## Create list of results for invisible()
CT <- list(t = t, prop.row = CPR, prop.col = CPC, prop.tbl = CPT)
if (any(chisq, fisher, mcnemar))
{
if (all(dim(t) == 2))
{
if (chisq)
CT <- c(CT, list(chisq = CST, chisq.corr = CSTc))
if (fisher)
CT <- c(CT, list(fisher.ts = FTt, fisher.tl = FTl, fisher.gt
= FTg))
if (mcnemar)
CT <- c(CT, list(mcnemar = McN, mcnemar.corr = McNc))
}
else
{
if (chisq)
CT <- c(CT, list(chisq = CST))
if (fisher)
CT <- c(CT, list(fisher.ts = FTt))
if (mcnemar)
CT <- c(CT, list(mcnemar = McN))
}
} ## End of if(any(chisq, fisher, mcnemar)) loop
## return list(CT)
invisible(CT)
} ## End of print.statistics function
## Printing the tables
if (format=="SAS")
{
## Print Cell Layout
cat(rep("\n", 2))
cat(" Cell Contents\n")
cat("|-------------------------|\n")
cat("| N |\n")
if (expected)
cat("| Expected N |\n")
if (prop.chisq)
cat("| Chi-square contribution |\n")
if (prop.r)
cat("| N / Row Total |\n")
if (prop.c)
cat("| N / Col Total |\n")
if (prop.t)
cat("| N / Table Total |\n")
cat("|-------------------------|\n")
cat(rep("\n", 2))
cat("Total Observations in Table: ", GT, "\n")
cat(rep("\n", 2))
if (!vector.x)
print.CrossTable.SAS()
else
print.CrossTable.vector.SAS()
print.statistics()
}
else if (format == "SPSS")
{
## Print Cell Layout
cat("\n")
cat(" Cell Contents\n")
cat("|-------------------------|\n")
cat("| Count |\n")
if (!vector.x)
{
if (expected)
cat("| Expected Values |\n")
if (prop.chisq)
cat("| Chi-square contribution |\n")
if (prop.r)
cat("| Row Percent |\n")
if (prop.c)
cat("| Column Percent |\n")
if (prop.t)
cat("| Total Percent |\n")
if (resid)
cat("| Residual |\n")
if (sresid)
cat("| Std Residual |\n")
if (asresid)
cat("| Adj Std Resid |\n")
}
else
cat("| Row Percent |\n")
cat("|-------------------------|\n")
cat("\n")
cat("Total Observations in Table: ", GT, "\n")
cat("\n")
if (!vector.x)
print.CrossTable.SPSS()
else print.CrossTable.vector.SPSS()
print.statistics()
if (any(dim(t) >= 2) & any(chisq,mcnemar,fisher))
{
MinExpF = min(CST$expected)
cat(' Minimum expected
frequency:',MinExpF,"\n")
NMinExpF = length(CST$expected[which(CST$expected<5)])
if (NMinExpF > 0)
{
NCells = length(CST$expected)
cat('Cells with Expected Frequency < 5:
',NMinExpF,' of ',NCells,"
(",100*NMinExpF/NCells,"%)\n",sep="")
}
cat("\n")
} ## End of if (any(dim(t)...
} ## End of if(format=="SPSS") loop
else
stop("unknown format")
} ## End of the main function Crosstable.R