Hi. I did write those functions, and sent them (I thought) to one of the R maintainers to see whether they would be appropriate for inclusion (because I'd seen some requests on the mailing lists). However, I'm happy to post them -- I should have thought of it before. WARNING: I've tested these functions on some data arising in my work and also on the USArrests data that comes with R, and they worked for me. However, I make no guarantee that they will work for you, and neither Merck (my employer) nor I will be responsible for anything bad that happens to you if you use them. (This is repeated in the main function.) That said, I'd be happy to get suggestions for improvements (including any examples that it breaks down on). Regards, Matthew Wiener Applied Computer Science and Mathematics Department Merck Research Labs Rahway, NJ 07065-0900 732-594-5303 ----------------------------------------- First the two functions: f.get.subtrees is really just a shell to loop through f.make.subtree. (After the functions there's a .Rd file.) "f.get.subtrees" <- function(tree, k = NULL, h = NULL, add.element = NULL){ ## tree is a tree, k and h are as in cutree. ## additional elements is the name of any additional info ## connected with tree nodes. The additional elements referred ## to should have the same length as tree$labels. ## groups <- cutree(tree, k, h) subtrees <- list() for(i in 1:length(unique(groups))){ subtrees[[i]] <- f.make.subtree(tree, groups, i, add.element = add.element) } subtrees } "f.make.subtree" <- function(tree, groups, n = 1, add.element = NULL){ ## ## (Required legal stuff) ## Copyright: Matthew Wiener ## Applied Computer Science & Mathematics Dept. ## Merck Research Labs ## Rahway, NJ 07090 USA ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## ## ##################################################################### ## ## Now here's the program itself. ## ## this function can be called by f.get.subtrees, in which ## case the groups will be defined using cutree. ## ## which nodes are in the subtree? which.nodes <- which(groups == n) names(which.nodes) <- NULL ## get the order elements for those labels that are in ## the subtree subtree.labels <- tree$labels[which.nodes] subtree.order <- tree$order[tree$labels[tree$order] %in% subtree.labels] ## Figure out which rows of the merge tree ## are going to need to be included: first find out which ## rows contain leaves in the group (leaves indicated by ## negative numbers), then get the associated rows which.merge.elements <- which(-tree$merge %in% which.nodes) which.merge.rows <- sort(unique(which.merge.elements %% nrow(tree$merge))) ## if the last row is in there, it will show up as 0 ## needs to be changed to number of rows which.merge.rows[which.merge.rows == 0] <- nrow(tree$merge) ## now collect rows referred to be the rows you've got ## until there's nothing more to get old.length <- 0 new.length <- length(which.merge.rows) ## trap the condition with only one row if(length(which.merge.elements) == 1){ res <- list(merge = NULL, heights = tree$height[which.merge.rows], order = 1, labels = subtree.labels, method = tree$method, call = tree$call, dist.method = tree$dist.method) } else{ while(new.length - old.length > 0){ old.length <- new.length ## get new rows ## we need any row in which a row we've already got is referred to new.rows <- numeric(0) more.new.rows <- numeric(0) row.elements <- as.vector(tree$merge[which.merge.rows,]) pos.elements <- row.elements[row.elements > 0] ## need rows we refer to ## as long as they involve an element we're keeping if(length(pos.elements) > 0) new.rows <- pos.elements[apply(tree$merge[pos.elements,, drop = F], 1, function(x){ any(x > 0) & all(-x[x<0] %in% which.nodes)})] ## also need the ones referring to rows we already have more.new.rows <- which(apply(tree$merge, 1, function(x){all(x %in% which.merge.rows)})) which.merge.rows <- sort(unique(c(which.merge.rows, new.rows, more.new.rows))) new.length <- length(which.merge.rows) } merge.list <- tree$merge[which.merge.rows,,drop = F] merge.height <- tree$height[which.merge.rows] ## fix references -- row numbers need to change pos.elements <- sort(merge.list[merge.list > 0]) for(i in seq(along = pos.elements)) merge.list[merge.list == pos.elements[i]] <- which(pos.elements[i] == which.merge.rows) ## fix references -- element numbers need to change neg.elements <- merge.list[merge.list < 0] minus.neg.elements <- -neg.elements num.elements <- length(neg.elements) ## change.table is the dictionary for changes change.table <- cbind(sort(minus.neg.elements), 1:num.elements) ## apply the dictionary for(i in 1:num.elements){ merge.list[merge.list== -change.table[i,1]] <- -change.table[i,2] } old.order <- subtree.order subtree.order <- match(tree$labels[old.order], subtree.labels) res <- list(merge = merge.list, height = merge.height, order = subtree.order, labels = subtree.labels, method = tree$method, call = list(match.call(), tree$call), dist.method = tree$dist.method) class(res) <- "hclust" ## if any additional elements specified, get them too for(i in seq(along = add.element)){ res[[add.element[i]]] <- tree[[add.element[i]]][which.nodes] } } res } ---------------------------------------------------------------------------- -- Here's the .Rd file. It compiled OK for me. \name{Subtree extraction for hclust trees} \alias{f.get.subtrees} \alias(f.make.subtree} \title{Extract hclust Subtrees} \description{Extract subtrees from an "hclust"-class tree object.} \usage{ f.get.subtrees(tree, k = NULL, h = NULL, add.element = NULL) f.make.subtree(tree, groups, n = 1, add.element = NULL) } \arguments{ \item{tree}{a tree object with class "hclust"} \item{k}{The number of groups desired (as in \code{cutree})} \item{h}{The height at which tree should be cut (as in \code{cutree})} \item{groups}{A vector of subgroup memberships. This should be the same length as \code{tree$label}, that is, one element for each leaf of the tree. Can be created by \code{cutree}.} \item{n}{Index of subgroup for the subtree to be created} \item{add.element}{A vector of element names for any elements added to the tree} } \value{ an hclust-class tree object representing a single subtree (f.make.subtree) or a list of such objects (f.get.subtrees). } \details{ } \references{ } \seealso{ \code{\link[mva]{hclust}}, \code{\link[mva]{cutree}} } \examples{ data(USArrests) t1 <- hclust(dist(USArrests)) t2 <- f.get.subtrees(t1, k = 3) # a list with 3 subtrees plot(t2[[1]]) # plot the first subtree t3 <- f.get.subtrees(t1, h = 125) # same as t2 } \author{Matthew Wiener} \email{Matthew_Wiener at merck.com} \keyword{} -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.- r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html Send "info", "help", or "[un]subscribe" (in the "body", not the subject !) To: r-help-request at stat.math.ethz.ch _._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._