wwa418 at ku-eichstaett.de
2010-Nov-10 01:26 UTC
[R] Retrieve all names of nested list and index list based on these names
Hi all, I looked for a function that would retrieve all(!) names of an arbitrary deeply nested named list. Also, names should optionally be arranged in a way that reflects the list's hierarchy structure (i.e. 'a$a.1$a.1.1' etc.) Also, there should be a recursive index linked to a respective list branch that could be used to index a list by names (as you would do with named vectors, only that now there's also a hierarchy structure coming into play. Example: name index a 1 a.1 1-1 a.1.1 1-1-1 As I didn't really find anything that suited my needs, I ended up trying to write a recursive function that loops through the individual branches via lapply() and came to find this to be pretty nasty to debug/manually test ;-). I think I found a acceptable implementation now and thought I'd share it in case someone is up to a similar task. Two function defs, then an example: ##### FUNCTION DEFS ##### listnames.get <- function( list.obj, do.basename=TRUE, do.name.chain=TRUE, ... ) { # VALIDATE if(!is.list(list.obj)) stop("Argument 'list.obj' must be a list.") # / #--------------------------------------------------------------------------- # CORE FUNCTION #--------------------------------------------------------------------------- listnames.get.core <- function( list.obj, do.basename=TRUE, do.name.chain=TRUE, buffer, ... ) { if(!exists("index", buffer)) { buffer$index <- new.env(parent=emptyenv()) buffer$index <- NULL buffer$name <- NULL } jnk <- sapply(1:length(list.obj), function(x) { list.branch <- list.obj[x] list.branch.nme <- names(list.branch) if(do.basename) list.branch.nme <- basename(list.branch.nme) list.obj.updt <- list.branch[[1]] # UPDATE BUFFER buffer$run <- c(buffer$run, x) if(do.name.chain) { buffer$name <- c(buffer$name, list.branch.nme) } else { buffer$name <- list.branch.nme } # / index.crnt <- paste(as.character(buffer$run), collapse="-") index.crnt <- data.frame( name=paste(buffer$name, collapse="$"), index=index.crnt, stringsAsFactors=FALSE ) index.updt <- rbind(buffer$index, index.crnt) buffer$index <- index.updt if(is.list(list.obj.updt)) { jcore.listnames.get.core( list.obj=list.obj.updt, do.basename=do.basename, do.name.chain=do.name.chain, buffer=buffer ) } # UPDATE BUFFER buffer$run <- buffer$run[-length(buffer$run)] buffer$name <- buffer$name[-length(buffer$name)] # / return(NULL) }) return(TRUE) } # /CORE FUNCTION ---------- #--------------------------------------------------------------------------- # APPLICATION #--------------------------------------------------------------------------- assign("buffer", new.env(parent=emptyenv()), envir=environment()) listnames.get.core( list.obj=list.obj, do.basename=do.basename, buffer=buffer ) # /APPLICATION ---------- return(buffer$index) } listbranch.get <- function( list.obj, query, do.strict=TRUE, do.rtn.val=TRUE, msg.error=NULL, ... ) { # VALIDATE if(!is.list(list.obj)) stop("Argument 'list.obj' must be a list.") # / # ESTABLISH LIST INDEX list.index <- jcore.listnames.get( list.obj=list.obj, do.basename=TRUE, do.name.chain=TRUE ) list.index.nms <- list.index$name # / # SEARCH FOR QUERY if(do.strict) { query.0 <- query query <- gsub("\\$", "\\\\$", query) query <- gsub("\\.", "\\\\.", query) query <- paste("^", query, "$", sep="") } idx <- grep(query, list.index.nms, perl=TRUE) if(!length(idx)) { if(is.null(msg.error)) { msg.error <- paste("Query not successful: '", query.0, "' ('", query, "')", sep="") } stop(cat(msg.error, sep="\n")) } # / # BUILDING RECURSIVE INDEX idx <- list.index$index[idx] idx <- as.numeric(unlist(strsplit(idx, split="-"))) # / if(do.rtn.val) { # RECURSIVE INDEXING rtn <- list.obj[[idx]] # / } else { rtn <- idx } return(rtn) } ##### EXAMPLE ##### my.list <- list( a=list(a.1="a", a.2=list(a.2.1="a", a.2.2="b"), a.3=list(a.3.1="a"), b=list(b.1=list(b.1.1="a"), b.2="b"), c="a" ) listnames.get(list.obj=my.list, do.basename=TRUE, do.name.chain=TRUE) listbranch.get(list.obj=my.list, query="a$a.2$a.2.2", do.strict=TRUE, do.rtn.val=TRUE) listbranch.get(list.obj=my.list, query="a$a.2$a.2.2", do.strict=TRUE, do.rtn.val=FALSE)