An alternative solution that allows you to break it down by categories is in
the functions below. Comments/suggestions welcome and encouraged.
Note that much thanks is due to those who responded to an earlier post of
mine on a similar topic.
To use (assuming you have a data.frame falled pathDist with columns
distances (your numeric data of interest), Capacity, Surface, and SITE
(vectors of categories)):
ops=c(quote(mean),quote(median),quote(sd),quote(length))
ops.desc=list(mean="Mean",median="Median",sd="S.D.",length="N")
pathDist.indices=list
(Surface=pathDist$Surface,Capacity=pathDist$Capacity,Site=pathDist$SITE)
pathDist.bytable=bytable(pathDist$distances,pathDist.indices,ops=ops,
ops.desc=ops.desc,na.rm=TRUE)
pathDist.table=latex.table.by(pathDist.bytable,num.by.vars=length(
pathDist.indices),caption="Path Characteristics")
print(pathDist.table,
type="latex",file="E:/xdrive/projects/Ghana_air/backpack_data/plots/pathDist.table.tex",
include.rownames = FALSE, include.colnames = TRUE, sanitize.text.function force,
tabular.environment='longtable', floating=FALSE)
# Make a table by group
# Usage:
# print(latex.table.by(test.df), include.rownames = FALSE,
include.colnames = TRUE, sanitize.text.function = force)
# then add \usepackage{multirow} to the preamble of your LaTeX document
# for longtable support, add ,tabular.environment='longtable' to the
print
command (plus add in ,floating=FALSE), then \usepackage{longtable} to the
LaTeX preamble
latex.table.by = function(df,num.by.vars=1,...) {
# first num.by.vars groups must be sorted and in descending order of
priority
if(!is.numeric(num.by.vars) | length(num.by.vars)!=1) {
stop("num.by.vars must be a number")
}
# Create a by.vars vector
by.vars=1:num.by.vars
numcols=length(colnames(df))
df.original=df
# Initialize our clines variable (gives the start column of the cline
for each row)
clines = rep(num.by.vars+1,length(df[[1]]))
# - Make grouping columns multirow - #
for(b in rev(by.vars)) {
# Create a groups variable for all by.vars up to the current one
groups=rep("",length(df[[b]]))
for(by.vars.index in 1:b) {
groups = paste(groups,df.original[[by.vars.index]],sep="")
}
# Add multirow code to current column according to the groups
pattern
df[[b]] <- as.character(df[[b]])
rle.lengths <- rle(groups)$lengths
first <- !duplicated(groups)
df[[b]][!first] <- ""
df[[b]][first] <- paste("\\multirow{", rle.lengths,
"}{*}{",
df[[b]][first], "}")
# Store this by.var's information in the clines variable
clines[first]=b
}
# Specify horizontal lines wherever all combinations of grouping
variables change
df[[1]]<-paste("\\cline{",clines,"-",numcols,"}",df[[1]],sep="")
align.by.vars = sapply(list(rep("|c", (length(by.vars)+1)
)),paste,collapse="")
align.other.vars = sapply(list(rep("r|",
(length(colnames(df))-length(
by.vars)) )),paste,collapse="")
align.df = paste("|", align.by.vars , "|" ,
align.other.vars ,sep="")
xt=xtable(df, align = align.df,...)
return(xt)
}
bytable = function(datavec,indices,ops=c(quote(mean)),ops.desc=list
(mean="Mean"),na.rm=TRUE,...) {
groups=as.character()
combinations.others=c()
# indices should be a list of grouping vectors, just like you would pass
to -by-, but with sensible names for each vector
if(!is.list(indices)) {
stop("indices needs to be a list")
}
# Create a selector variable from the indices given as a list
if(length(indices) > 1) {
for(indexnum in length(indices):1) {
groups=paste(groups,indices[[indexnum]],sep="")
}
}
if(length(indices)==1) {
groups=indices[[1]]
}
first=!duplicated(groups)
# Initialize data frame with grouping variables (indices)
bynames=dimnames(by(datavec,indices,function(x) x=1)) # run a dummy by
statement to get the name order out...highly inefficient...could use
indices.levels=lapply(indices,function(x) x[!duplicated(x)]) instead, as
long as we're sure the ordering is the same
for(indexnum in length(indices):1) {
# get the number of combinations of other index levels after this
one (e.g. the number of replicates we need to make of each one in this
index)
others.selector=rep(TRUE,length(indices))
others.selector[length(indices):indexnum]=FALSE
numcombinations.others = prod(unlist(subset(lapply(bynames,length),
others.selector)))
# Replicate each level of this index the number of existing
combinations of other indices
newcolumn=rep(bynames[[indexnum]],each=numcombinations.others)
if(indexnum==length(indices)) { # first run
by.df=data.frame(newcolumn)
}
if(indexnum!=length(indices)) {
# newcolumn is too short by some multiple so we have to fix that
newcolumn=rep(newcolumn, length(rownames(by.df))/length(newcolumn)
)
# now attach our new column
by.df=cbind(by.df,newcolumn)
}
}
colnames(by.df)<-rev(names(indices))
# Run -by- for each operation
for(op in ops) {
by.df[[deparse(op)]]=as.numeric(by(datavec,indices,eval(op)))
colnames(by.df)[ colnames(by.df)==deparse(op) ] = ops.desc
[[deparse(op)]]
}
if(na.rm) {
#this assumes that the NA's in the last one will be the same as the
NA's in all ops
by.df=subset(by.df,!is.na(by.df[[length(by.df)]]))
}
return(by.df)
}
[[alternative HTML version deleted]]