Jose Claudio Faria
2005-Jul-07 11:57 UTC
[R] Tables: Invitation to make a collective package
Hi All, I would like to make an invitation to make a collective package with all functions related to TABLES. I know that there are many packages with these functions, the original idea is collect all this functions and to make a single package, because is arduous for the user know all this functions broadcast in many packages. So, I think that the original packages can continue with its original functions, but, is very good to know that exist one package with many (I dream all) the functions related to tables. I've been working with these functions (while I am learning R programming): ####################### # Tables - Package # ####################### # # 1. Tables # # # Common function # tb.make.table.I <- function(x, start, end, h, right) { f <- table(cut(x, br=seq(start, end, h), right=right)) # Absolut freq fr <- f/length(x) # Relative freq frP <- 100*(f/length(x)) # Relative freq, % fac <- cumsum(f) # Cumulative freq facP <- 100*(cumsum(f/length(x))) # Cumulative freq, % fi <- round(f, 2) fr <- round(as.numeric(fr), 2) frP <- round(as.numeric(frP), 2) fac <- round(as.numeric(fac), 2) facP <- round(as.numeric(facP),2) res <- data.frame(fi, fr, frP, fac, facP) # Make final table names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)') return(res) } # # Common function # tb.make.table.II <- function (x, k, breaks=c('Sturges', 'Scott', 'FD'), right=FALSE) { x <- na.omit(x) # User defines only x and/or 'breaks' # (x, {k,}[breaks, right]) if (missing(k)) { brk <- match.arg(breaks) switch(brk, Sturges = k <- nclass.Sturges(x), Scott = k <- nclass.scott(x), FD = k <- nclass.FD(x)) tmp <- range(x) start <- tmp[1] - abs(tmp[2])/100 end <- tmp[2] + abs(tmp[2])/100 R <- end-start h <- R/k } # User defines 'x' and 'k' # (x, k,[breaks, right]) else { tmp <- range(x) start <- tmp[1] - abs(tmp[2])/100 end <- tmp[2] + abs(tmp[2])/100 R <- end-start h <- R/abs(k) } tbl <- tb.make.table.I(x, start, end, h, right) return(tbl) } # # With Gabor Grotendieck suggestions (thanks Gabor, very much!) # tb.table <- function(x, ...) UseMethod("tb.table") # # Table form vectors # tb.table.default <- function(x, k, start, end, h, breaks=c('Sturges', 'Scott', 'FD'), right=FALSE) { # User defines nothing or not 'x' isn't numeric -> stop stopifnot(is.numeric(x)) x <- na.omit(x) # User defines only 'x' # (x, {k, start, end, h}, [breaks, right]) if (missing(k) && missing(start) && missing(end) && missing(h) ) { brk <- match.arg(breaks) switch(brk, Sturges = k <- nclass.Sturges(x), Scott = k <- nclass.scott(x), FD = k <- nclass.FD(x)) tmp <- range(x) start <- tmp[1] - abs(tmp[2])/100 end <- tmp[2] + abs(tmp[2])/100 R <- end-start h <- R/k } # User defines 'x' and 'k' # (x, k, {start, end, h}, [breaks, right]) else if (missing(start) && missing(end) && missing(h)) { stopifnot(length(k) >= 1) tmp <- range(x) start <- tmp[1] - abs(tmp[2])/100 end <- tmp[2] + abs(tmp[2])/100 R <- end-start h <- R/abs(k) } # User defines 'x', 'start' and 'end' # (x, {k,} start, end, {h,} [breaks, right]) else if (missing(k) && missing(h)) { stopifnot(length(start) >= 1, length(end) >=1) tmp <- range(x) R <- end-start k <- sqrt(abs(R)) if (k < 5) k <- 5 # min value of k h <- R/k } # User defines 'x', 'start', 'end' and 'h' # (x, {k,} start, end, h, [breaks, right]) else if (missing(k)) { stopifnot(length(start) >= 1, length(end) >= 1, length(h) >= 1) } else stop('Error, please, see the function sintax!') tbl <- tb.make.table.I(x, start, end, h, right) return(tbl) } # # Table form data.frame # tb.table.data.frame <- function(df, k, by, breaks=c('Sturges', 'Scott', 'FD'), right=FALSE) { stopifnot(is.data.frame(df)) tmpList <- list() nameF <- character() nameY <- character() # User didn't defines a factor if (missing(by)) { logCol <- sapply(df, is.numeric) for (i in 1:ncol(df)) { if (logCol[i]) { x <- as.matrix(df[ ,i]) tbl <- tb.make.table.II(x, k, breaks, right) tmpList <- c(tmpList, list(tbl)) } } valCol <- logCol[logCol] names(tmpList) <- names(valCol) return(tmpList) } # User defines one factor else { namesdf <- names(df) pos <- which(namesdf == by) stopifnot(is.factor((df[[pos]]))) numF <- table(df[[pos]]) for(i in 1:length(numF)) { tmpdf <- subset(df, df[[pos]] == names(numF[i])) logCol <- sapply(tmpdf, is.numeric) for (j in 1:ncol(tmpdf)) { if (logCol[j]) { x <- as.matrix(tmpdf[ ,j]) tbl <- tb.make.table.II(x, k, breaks, right) newFY <- list(tbl) nameF <- names(numF[i]) nameY <- names(logCol[j]) nameFY <- paste(nameF,'.', nameY, sep="") names(newFY) <- sub(' +$', '', nameFY) tmpList <- c(tmpList, newFY) } } } } return(tmpList) } ############################ # Tables package # # to try # ############################ # 1.Tables # 1.1. Tables from vectors # Making a vector set.seed(1) x=rnorm(100, 5, 1) #x=as.factor(rep(1:10, 10)) # to try tbl <- tb.table(x) print(tbl); cat('\n') # Equal to above tbl <- tb.table(x, breaks='Sturges') print(tbl); cat('\n') tbl <- tb.table(x, breaks='Scott') print(tbl); cat('\n') tbl <- tb.table(x, breaks='FD') print(tbl); cat('\n') tbl <- tb.table(x, breaks='F', right=T) print(tbl); cat('\n') tbl <- tb.table(x, k=4) print(tbl); cat('\n') tbl <- tb.table(x, k=20) print(tbl); cat('\n') # Partial tbl <- tb.table(x, start=4, end=6) print(tbl); cat('\n') # Partial tbl <- tb.table(x, start=4.5, end=5.5) print(tbl); cat('\n') # Nonsense tbl <- tb.table(x, start=0, end=10, h=.5) print(tbl); cat('\n') # First and last class forced (fi=0) tbl <- tb.table(x, start=1, end=9, h=1) print(tbl); cat('\n') tbl <- tb.table(x, start=1, end=10, h=2) print(tbl); cat('\n') # 1.2. Tables from data.frame # 1.2.1. Making a data.frame mdf=data.frame(X1=rep(LETTERS[1:4], 25), X2=as.factor(rep(1:10, 10)), Y1=c(NA, NA, rnorm(96, 10, 1), NA, NA), Y2=rnorm(100, 58, 4), Y3=c(NA, NA, rnorm(98, -20, 2))) tbl <- tb.table(mdf) print(tbl) # Equal to above tbl <- tb.table(mdf, breaks='Sturges') print(tbl) tbl <- tb.table(mdf, breaks='Scott') print(tbl) tbl <- tb.table(mdf, breaks='FD') print(tbl) tbl <- tb.table(mdf, k=4) print(tbl) tbl <- tb.table(mdf, k=10) print(tbl) levels(mdf$X1) tbl=tb.table(mdf, k=5, by='X1') length(tbl) names(tbl) print(tbl) tbl=tb.table(mdf, breaks='FD', by='X1') print(tbl) # A 'big' result: X2 is a factor with 10 levels! tbl=tb.table(mdf, breaks='FD', by='X2') print(tbl) # 1.2.2. Using 'iris' tbl=tb.table(iris, k=5) print(tbl) levels(iris$Species) tbl=tb.table(iris, k=5, by='Species') length(tbl) names(tbl) print(tbl) tbl=tb.table(iris, k=5, by='Species', right=T) print(tbl) tbl=tb.table(iris, breaks='FD', by='Species') print(tbl) library(MASS) levels(Cars93$Origin) tbl=tb.table(Cars93, k=5, by='Origin') names(tbl) print(tbl) tbl=tb.table(Cars93, breaks='FD', by='Origin') print(tbl) I find that this package would be very useful and would like to hear the opinion of the interested parties in participating. Best regards, -- Jose Claudio Faria Brasil/Bahia/UESC/DCET Estatistica Experimental/Prof. Adjunto mails: joseclaudio.faria at terra.com.br jc_faria at uesc.br jc_faria at uol.com.br tel: 73-3634.2779
Gabor Grothendieck
2005-Jul-07 12:28 UTC
[R] Tables: Invitation to make a collective package
If the functionality you are thinking of already exists across multiple packages an alternative to creating a new package would be to create a task view as in: http://cran.r-project.org/src/contrib/Views/ as explained in the ctv package and the article in R News 5/1. On 7/7/05, Jose Claudio Faria <joseclaudio.faria at terra.com.br> wrote:> Hi All, > > I would like to make an invitation to make a collective package with all > functions related to TABLES. > > I know that there are many packages with these functions, the original idea is > collect all this functions and to make a single package, because is arduous for > the user know all this functions broadcast in many packages. > > So, I think that the original packages can continue with its original functions, > but, is very good to know that exist one package with many (I dream all) the > functions related to tables. > > I've been working with these functions (while I am learning R programming): > > ####################### > # Tables - Package # > ####################### > > # > # 1. Tables > # > > # > # Common function > # > tb.make.table.I <- function(x, > start, > end, > h, > right) > { > f <- table(cut(x, br=seq(start, end, h), right=right)) # Absolut freq > fr <- f/length(x) # Relative freq > frP <- 100*(f/length(x)) # Relative freq, % > fac <- cumsum(f) # Cumulative freq > facP <- 100*(cumsum(f/length(x))) # Cumulative freq, % > fi <- round(f, 2) > fr <- round(as.numeric(fr), 2) > frP <- round(as.numeric(frP), 2) > fac <- round(as.numeric(fac), 2) > facP <- round(as.numeric(facP),2) > res <- data.frame(fi, fr, frP, fac, facP) # Make final table > names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)') > return(res) > } > > # > # Common function > # > tb.make.table.II <- function (x, > k, > breaks=c('Sturges', 'Scott', 'FD'), > right=FALSE) > { > x <- na.omit(x) > > # User defines only x and/or 'breaks' > # (x, {k,}[breaks, right]) > if (missing(k)) { > brk <- match.arg(breaks) > switch(brk, > Sturges = k <- nclass.Sturges(x), > Scott = k <- nclass.scott(x), > FD = k <- nclass.FD(x)) > tmp <- range(x) > start <- tmp[1] - abs(tmp[2])/100 > end <- tmp[2] + abs(tmp[2])/100 > R <- end-start > h <- R/k > } > > # User defines 'x' and 'k' > # (x, k,[breaks, right]) > else { > tmp <- range(x) > start <- tmp[1] - abs(tmp[2])/100 > end <- tmp[2] + abs(tmp[2])/100 > R <- end-start > h <- R/abs(k) > } > tbl <- tb.make.table.I(x, start, end, h, right) > return(tbl) > } > > # > # With Gabor Grotendieck suggestions (thanks Gabor, very much!) > # > tb.table <- function(x, ...) UseMethod("tb.table") > > # > # Table form vectors > # > tb.table.default <- function(x, > k, > start, > end, > h, breaks=c('Sturges', 'Scott', 'FD'), > right=FALSE) > { > # User defines nothing or not 'x' isn't numeric -> stop > stopifnot(is.numeric(x)) > x <- na.omit(x) > > # User defines only 'x' > # (x, {k, start, end, h}, [breaks, right]) > if (missing(k) && missing(start) && missing(end) && missing(h) ) { > brk <- match.arg(breaks) > switch(brk, > Sturges = k <- nclass.Sturges(x), > Scott = k <- nclass.scott(x), > FD = k <- nclass.FD(x)) > tmp <- range(x) > start <- tmp[1] - abs(tmp[2])/100 > end <- tmp[2] + abs(tmp[2])/100 > R <- end-start > h <- R/k > } > > # User defines 'x' and 'k' > # (x, k, {start, end, h}, [breaks, right]) > else if (missing(start) && missing(end) && missing(h)) { > stopifnot(length(k) >= 1) > tmp <- range(x) > start <- tmp[1] - abs(tmp[2])/100 > end <- tmp[2] + abs(tmp[2])/100 > R <- end-start > h <- R/abs(k) > } > > # User defines 'x', 'start' and 'end' > # (x, {k,} start, end, {h,} [breaks, right]) > else if (missing(k) && missing(h)) { > stopifnot(length(start) >= 1, length(end) >=1) > tmp <- range(x) > R <- end-start > k <- sqrt(abs(R)) > if (k < 5) k <- 5 # min value of k > h <- R/k > } > > # User defines 'x', 'start', 'end' and 'h' > # (x, {k,} start, end, h, [breaks, right]) > else if (missing(k)) { > stopifnot(length(start) >= 1, length(end) >= 1, length(h) >= 1) > } > > else stop('Error, please, see the function sintax!') > tbl <- tb.make.table.I(x, start, end, h, right) > return(tbl) > } > > > # > # Table form data.frame > # > tb.table.data.frame <- function(df, > k, > by, > breaks=c('Sturges', 'Scott', 'FD'), > right=FALSE) > { > stopifnot(is.data.frame(df)) > tmpList <- list() > nameF <- character() > nameY <- character() > > # User didn't defines a factor > if (missing(by)) { > logCol <- sapply(df, is.numeric) > for (i in 1:ncol(df)) { > if (logCol[i]) { > x <- as.matrix(df[ ,i]) > tbl <- tb.make.table.II(x, k, breaks, right) > tmpList <- c(tmpList, list(tbl)) > } > } > valCol <- logCol[logCol] > names(tmpList) <- names(valCol) > return(tmpList) > } > > # User defines one factor > else { > namesdf <- names(df) > pos <- which(namesdf == by) > stopifnot(is.factor((df[[pos]]))) > numF <- table(df[[pos]]) > for(i in 1:length(numF)) { > tmpdf <- subset(df, df[[pos]] == names(numF[i])) > logCol <- sapply(tmpdf, is.numeric) > for (j in 1:ncol(tmpdf)) { > if (logCol[j]) { > x <- as.matrix(tmpdf[ ,j]) > tbl <- tb.make.table.II(x, k, breaks, right) > newFY <- list(tbl) > nameF <- names(numF[i]) > nameY <- names(logCol[j]) > nameFY <- paste(nameF,'.', nameY, sep="") > names(newFY) <- sub(' +$', '', nameFY) > tmpList <- c(tmpList, newFY) > } > } > } > } > return(tmpList) > } > > ############################ > # Tables package # > # to try # > ############################ > > # 1.Tables > # 1.1. Tables from vectors > > # Making a vector > set.seed(1) > x=rnorm(100, 5, 1) > #x=as.factor(rep(1:10, 10)) # to try > > tbl <- tb.table(x) > print(tbl); cat('\n') > > # Equal to above > tbl <- tb.table(x, breaks='Sturges') > print(tbl); cat('\n') > > tbl <- tb.table(x, breaks='Scott') > print(tbl); cat('\n') > > tbl <- tb.table(x, breaks='FD') > print(tbl); cat('\n') > > tbl <- tb.table(x, breaks='F', right=T) > print(tbl); cat('\n') > > tbl <- tb.table(x, k=4) > print(tbl); cat('\n') > > tbl <- tb.table(x, k=20) > print(tbl); cat('\n') > > # Partial > tbl <- tb.table(x, start=4, end=6) > print(tbl); cat('\n') > > # Partial > tbl <- tb.table(x, start=4.5, end=5.5) > print(tbl); cat('\n') > > # Nonsense > tbl <- tb.table(x, start=0, end=10, h=.5) > print(tbl); cat('\n') > > # First and last class forced (fi=0) > tbl <- tb.table(x, start=1, end=9, h=1) > print(tbl); cat('\n') > > tbl <- tb.table(x, start=1, end=10, h=2) > print(tbl); cat('\n') > > > # 1.2. Tables from data.frame > > # 1.2.1. Making a data.frame > mdf=data.frame(X1=rep(LETTERS[1:4], 25), > X2=as.factor(rep(1:10, 10)), > Y1=c(NA, NA, rnorm(96, 10, 1), NA, NA), > Y2=rnorm(100, 58, 4), > Y3=c(NA, NA, rnorm(98, -20, 2))) > > tbl <- tb.table(mdf) > print(tbl) > > # Equal to above > tbl <- tb.table(mdf, breaks='Sturges') > print(tbl) > > tbl <- tb.table(mdf, breaks='Scott') > print(tbl) > > tbl <- tb.table(mdf, breaks='FD') > print(tbl) > > tbl <- tb.table(mdf, k=4) > print(tbl) > > tbl <- tb.table(mdf, k=10) > print(tbl) > > levels(mdf$X1) > tbl=tb.table(mdf, k=5, by='X1') > length(tbl) > names(tbl) > print(tbl) > > tbl=tb.table(mdf, breaks='FD', by='X1') > print(tbl) > > # A 'big' result: X2 is a factor with 10 levels! > tbl=tb.table(mdf, breaks='FD', by='X2') > print(tbl) > > # 1.2.2. Using 'iris' > tbl=tb.table(iris, k=5) > print(tbl) > > levels(iris$Species) > tbl=tb.table(iris, k=5, by='Species') > length(tbl) > names(tbl) > print(tbl) > > tbl=tb.table(iris, k=5, by='Species', right=T) > print(tbl) > > tbl=tb.table(iris, breaks='FD', by='Species') > print(tbl) > > library(MASS) > levels(Cars93$Origin) > tbl=tb.table(Cars93, k=5, by='Origin') > names(tbl) > print(tbl) > > tbl=tb.table(Cars93, breaks='FD', by='Origin') > print(tbl) > > I find that this package would be very useful and would like to hear the opinion > of the interested parties in participating. > > Best regards, > -- > Jose Claudio Faria > Brasil/Bahia/UESC/DCET > Estatistica Experimental/Prof. Adjunto > mails: > joseclaudio.faria at terra.com.br > jc_faria at uesc.br > jc_faria at uol.com.br > tel: 73-3634.2779 > > ______________________________________________ > R-help at stat.math.ethz.ch mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html >