C Campbell
2015-Aug-29 20:19 UTC
[R] Advanced Level Script for Traceability Between Worksheets
Hi folks - I have almost know R skills yet and have been put 'in charge' of the below script created by a former employee. Although some of this is understandable to me, much of it is not. If anyone can help with explaining sections, commenting on the skill level it takes to understand this level of scripting in R, and/or point me to some resources that may cover some of this (e.g., what is ..A[..B, in_B := TRUE, allow.cartesian TRUE]; and specifically what do the 2 dots mean?), I would very much appreciate it. Would also be interested in communicating offline if you prefer. Thank you, Jay # Locate file #### parameterization_file <- file.choose() cd <- dirname(parameterization_file) # Front matter #### message("Installing and loading packages...") # Packages required_packages <- c("openxlsx", "xlsx", "magrittr", "data.table", "reshape2", "XML") install_these <- setdiff(required_packages, rownames(installed.packages())) while (length(install_these) > 0) { install.packages(install_these, repos = "http://cran.rstudio.com") install_these <- setdiff(required_packages, rownames(installed.packages())) } suppressPackageStartupMessages(library(openxlsx)) suppressPackageStartupMessages(library(magrittr)) suppressPackageStartupMessages(library(data.table)) suppressPackageStartupMessages(library(reshape2)) suppressPackageStartupMessages(library(XML)) # Options options(stringsAsFactors = FALSE) # Functions message("Loading functions...") A__in__B <- function(A, B, case = TRUE, ...) { # Copies ..A <- copy(A) ..B <- copy(B) setkey(..A, value) setkey(..B, value) ..A <- unique(..A) ..B <- unique(..B) # Rownames are unnecessary ..A[, rn := NULL] ..B[, rn := NULL] # Case sensitivity if (!case) { ..A <- tableToLower(..A) ..B <- tableToLower(..B) } # Check if A is in B ..A[..B, in_B := TRUE, allow.cartesian = TRUE] if ("in_B" %in% names(..A)) ..A[is.na(in_B), in_B := FALSE] else ..A[, in_B := FALSE] # Case sensitivity if (!case) ..A <- tableDropLower(..A) # Set attributes setABattr(..A, A, B) # Return results setkey(..A, value) return(..A) } A__unique <- function(A, case = TRUE, ...) { # Copies ..A <- copy(A) setkey(..A, value) # Case sensitivity if (!case) ..A <- tableToLower(..A) # Check if A_i values are unique ..A[..A[duplicated(..A), SJ(value)], is_unique := FALSE, allow.cartesian = TRUE] if ("is_unique" %in% names(..A)) ..A[is.na(is_unique), is_unique := TRUE] else ..A[, is_unique := TRUE] # Case sensitivity if (!case) ..A <- tableDropLower(..A) # Roll up to value level ..A <- ..A[, list(is_unique = all(is_unique)), keyby = value] # Return results return(..A) } A_i__in__B <- function(A, B, case = TRUE, ...) { # Copies ..A <- copy(A) setkey(..A, value, rn) ..A <- unique(..A) ..B <- copy(B) setkey(..B, value) ..B %>% unique # B rownames are unnecessary ..B[, rn := NULL] # Case sensitivity if (!case) { ..A <- tableToLower(..A) ..B <- tableToLower(..B) } # Check if A is in B if ("in_B" %in% names(..A)) ..A[is.na(in_B), in_B := FALSE] else ..A[, in_B := FALSE] # Case sensitivity if (!case) ..A <- tableDropLower(..A) # Set attributes setABattr(..A, A, B) # Return results setkey(..A, value, rn) return(..A) } A_i__in__B_i <- function(A, B, case = TRUE, ...) { # Copies ..A <- copy(A) setkey(..A, value, rn) ..A <- unique(..A) ..B <- copy(B) setkey(..B, value, rn) ..B <- unique(..B) # Case sensitivity if (!case) { ..A <- tableToLower(..A) ..B <- tableToLower(..B) } # Check if A_i terms are in B_i terms ..A[..B, in_B := TRUE, allow.cartesian = TRUE] if ("in_B" %in% names(..A)) ..A[is.na(in_B), in_B := FALSE] else ..A[, in_B := FALSE] # Case sensitivity if (!case) ..A <- tableDropLower(..A) # Set attributes setABattr(..A, A, B) # Return results setkey(..A, value, rn) return(..A) } A_i__substr__B_i <- function(A, B, case = TRUE, ...) { # Copies ..A <- copy(A) setkey(..A, rn) ..B <- copy(B) setkey(..B, rn) # Renames setnames(..A, "value", "A_value") setnames(..B, "value", "B_value") # Merge ..X <- ..B[..A, allow.cartesian = TRUE] # Check if A_i values are substrings of B_i values ..X[is.na(B_value), is_substring := FALSE] Encoding(..X$A_value) <- "UTF-8" Encoding(..X$B_value) <- "UTF-8" if (case) { ..X[!is.na(B_value), is_substring := mapply( grepl, A_value, B_value, fixed = TRUE)] } else { ..X[!is.na(B_value), is_substring := mapply( grepl, tolower(A_value), tolower(B_value), fixed = TRUE)] } Encoding(..X$A_value) <- "bytes" Encoding(..X$B_value) <- "bytes" # Rename/reorder ..X <- ..X[, list(value = A_value, rn, is_substring)] # Set attributes setABattr(..X, A, B) # Return results setkey(..X, value, rn) return(..X) } A_i__unique <- function(A, case = TRUE, ...) { # Copies ..A <- copy(A) setkey(..A, value) # Case sensitivity if (!case) ..A <- tableToLower(..A) # Check if A_i values are unique ..A[..A[duplicated(..A), SJ(value)], is_unique := FALSE, allow.cartesian = TRUE] if ("is_unique" %in% names(..A)) ..A[is.na(is_unique), is_unique := TRUE] else ..A[, is_unique := TRUE] # Case sensitivity if (!case) ..A <- tableDropLower(..A) # Return results setkey(..A, value, rn) return(..A) } extractColumn <- function(x, column_name, value_delimiter = NULL, rows NULL) { # Validate formatting on column name args column_name %<>% trimCompress # Multiple columns? mult_cols <- grepl(",", column_name) if (mult_cols) column_name %<>% strsplit(",") %>% unlist %>% trimCompress # Get column + rn ..table <- x[, c("rn", column_name), with = FALSE] setnames(..table, 2, "value") # Long if multiple if (mult_cols) { ..table %<>% melt(1) ..table[, variable := NULL] } # Key table by rowname setkey(..table, rn) # If rows was provided, subset if (!is.null(rows)) if (rows != "All") ..table <- ..table[textrange2vector(rows) %>% SJ] # Split values according to delimiter... dlm <- Rdelim(value_delimiter) if (!is.null(dlm)) ..values <- strsplit(..table[, value], Rdelim(value_delimiter)) %>% lapply(trimCompress) # ... or convert to list if no delimiter else ..values <- ..table[, value] %>% trimCompress %>% as.list # Set list name values to rowname values names(..values) <- ..table[, rn] # Convert from list to table ..values %<>% melt %>% as.data.table setnames(..values, 2, "rn") # Remove any instances of blank values ..values <- ..values[!is.na(value) & grepl("[^[:space:]]", value)] # Encode all text to bytes # Will need to encode to UTF-8 before output to make it readable Encoding(..values$value) <- "bytes" if (is.character(..values$rn)) Encoding(..values$rn) <- "bytes" # If row names can be converted to numeric, do so if (..values[, rn] %>% is.character) if (..values[, rn] %>% type.convert %>% is.numeric) ..values[, rn := as.numeric(rn)] # Key table by value setkey(..values, value, rn) # Add attributes setattr(..values, "file_path", attr(x, "file_path")) setattr(..values, "sheet_name", attr(x, "sheet_name")) setattr(..values, "header_row", attr(x, "header_row")) setattr(..values, "column_name", column_name) setattr(..values, "rownames_name", attr(x, "rownames_name")) setattr(..values, "value_delimiter", value_delimiter) setattr(..values, "rows", rows) # Return the values table return(..values) } fillNAlast <- function(x) { na <- is.na(x) miss <- which(na) nonmiss <- which(!na) map <- outer(nonmiss, miss, "<") %>% apply(2, . %>% which %>% max) x[miss] <- x[nonmiss[map]] return(x) } getSheetIndex <- function(file_path, sheet_name) { # Extract workbook.xml to temporary file that will be deleted at end of # run xmlDir <- file.path(tempdir(), "findSheet") workbook <- unzip(file_path, files = "xl/workbook.xml", exdir = xmlDir) on.exit(unlink(xmlDir, recursive = TRUE), add = TRUE) # Read workbook.xml and get sheet nodes workbook <- readLines(workbook, warn = FALSE, encoding = "UTF-8") %>% unlist sheets <- gregexpr("<sheet .*/sheets>", workbook, perl = TRUE) %>% regmatches(workbook, .) %>% unlist # Extract sheet names from nodes, parse as html, and return text values sheetNames <- gregexpr('(?<=name=")[^"]+', sheets, perl = TRUE) %>% regmatches(sheets, .) %>% unlist %>% lapply(htmlParse, asText = TRUE) %>% sapply(. %>% xpathApply("//body//text()", xmlValue) %>% unlist) # Which sheet name is equal to the sheet_name argument? which(sheetNames == sheet_name) } Rdelim <- function(x, ...) { if (!is.null(x)) { if (!is.na(x) & x != "None") { if (x == "Newline") "\\n" else x } else NULL } else NULL } readSource <- function(file_path, sheet_name, header_row, column_names, rownames_name = NULL) { # Validate formatting on column name args column_names %<>% strsplit(",") %>% unlist %>% trimCompress rownames_name %<>% trimCompress # Sheet index sheet_index <- getSheetIndex(file_path, sheet_name) # Read column names according to header row ..names <- read.xlsx( xlsxFile = file_path , sheet = sheet_index , colNames = FALSE , rows = header_row ) %>% unlist %>% unname %>% trimCompress # Read in column plus and any rownames column ..table <- read.xlsx( xlsxFile = file_path , sheet = sheet_index , startRow = header_row , cols = which(..names %in% c(rownames_name, column_names)) , skipEmptyRows = FALSE , detectDates = TRUE ) %>% as.data.table # Set names setnames(..table, ..names[which(..names %in% c(rownames_name, column_names))]) # Rownames ## If no rownames column, use row number if (is.null(rownames_name)) { if (is.null(rows)) ..table[, rn := 1:.N + 1L] else ..table[, rn := rows] } else { # Otherwise, just copy the column ..table[, rn := lapply(.SD, identity), .SDcols = rownames_name] } setcolorder(..table, c("rn", setdiff(names(..table), "rn"))) # If row can be converted to numeric, do so if (..table[, rn] %>% is.character) if (..table[, rn] %>% type.convert %>% is.numeric) ..table[, rn := as.numeric(rn)] # Key table by row setkey(..table, rn) # Add attributes setattr(..table, "file_path", file_path) setattr(..table, "sheet_name", sheet_name) setattr(..table, "header_row", header_row) setattr(..table, "column_names", column_names) setattr(..table, "rownames_name", rownames_name) # Return the values table return(..table) } setABattr <- function(new_table, A, B) { # Strip existing attributes in new_table setattr(new_table, "file_path", NULL) setattr(new_table, "sheet_name", NULL) setattr(new_table, "header_row", NULL) setattr(new_table, "column_name", NULL) setattr(new_table, "rownames_name", NULL) setattr(new_table, "value_delimiter", NULL) setattr(new_table, "rows", NULL) setattr(new_table, "rows_are_rownames", NULL) # Set A attributes in new_table setattr(new_table, "A_file_path", attributes(A)$file_path) setattr(new_table, "A_sheet_name", attributes(A)$sheet_name) setattr(new_table, "A_header_row", attributes(A)$header_row) setattr(new_table, "A_column_name", attributes(A)$column_name) setattr(new_table, "A_rownames_name", attributes(A)$rownames_name) setattr(new_table, "A_value_delimiter", attributes(A)$value_delimiter) setattr(new_table, "A_rows", attributes(A)$rows) setattr(new_table, "A_rows_are_rownames", attributes(A)$rows_are_rownames) # Set B attributes in new_table setattr(new_table, "B_file_path", attributes(B)$file_path) setattr(new_table, "B_sheet_name", attributes(B)$sheet_name) setattr(new_table, "B_header_row", attributes(B)$header_row) setattr(new_table, "B_column_name", attributes(B)$column_name) setattr(new_table, "B_rownames_name", attributes(B)$rownames_name) setattr(new_table, "B_value_delimiter", attributes(B)$value_delimiter) setattr(new_table, "B_rows", attributes(B)$rows) setattr(new_table, "B_rows_are_rownames", attributes(B)$rows_are_rownames) } tableToLower <- function(X, ...) { # Copy x <- copy(X) # Existing keys keys <- key(x) setkey(x, NULL) # Rename value column setnames(x, "value", "value_orig") # Derived value column Encoding(x$value_orig) <- "UTF-8" x[, value := tolower(value_orig)] Encoding(x$value) <- "bytes" Encoding(x$value_orig) <- "bytes" # Rekey setkeyv(x, keys) # Return return(x) } tableDropLower <- function(X, ...) { # Copy x <- copy(X) # Existing keys keys <- key(x) setkey(x, NULL) # Drop derived value column x[, value := NULL] # Rename value_orig column setnames(x, "value_orig", "value") # Rekey setkeyv(x, keys) # Return return(x) } textrange2vector <- function(x) { strsplit(x, ",") %>% lapply( . %>% strsplit("-") %>% lapply(as.numeric) %>% lapply(function(s) if (length(s) == 1) s else seq(s[1], s[2]))) %>% lapply(unlist) } trimCompress <- function(x) { if (!"magrittr" %in% loadedNamespaces()) # check if magrittr is loaded library(magrittr) # load if not if (is.null(x)) return(NULL) x %>% gsub("^\\s+", "", .) %>% # remove leading blanks gsub("\\s+$", "", .) %>% # remove trailing blanks gsub("\\s+", " ", .) # compress multiple blanks to one } # Read parameterization file #### message("Reading parameters...") ## Catalog parameters avail_params <- read.xlsx( parameterization_file , "Available Parameters" , colNames = FALSE , startRow = 2 ) %>% as.data.table sheet_params <- c("name", "path", "sheet", "header", "rn") setnames(avail_params, 1:5, sheet_params) avail_params <- avail_params[!is.na(name) & grepl("[^[:space:]]", name)] %>% melt(id.vars = 1:5, value.name = "columns") avail_params <- avail_params[, lapply(.SD, . %>% Filter(Negate(is.na), .) %>% list), by = eval(sheet_params)] avail_params[, variable := NULL] ## Analysis parameters analysis_params <- read.xlsx( parameterization_file , "Parameterization" , startRow = 2 , colNames = FALSE ) %>% as.data.table setnames(analysis_params, c( "name1", "col1", "rows1", "dlm1", "verb", "case", "name2", "col2", "rows2", "dlm2", "outname", "outcols", "outflat" )) analysis_params <- analysis_params[-1][!is.na(name1) & grepl("[^[:space:]]", name1)] analysis_params[, n := 1:.N] ## Combine parameters setkey(avail_params, name) setkey(analysis_params, name1) analysis_params[avail_params, ":="( path1 = path ,sheet1 = sheet ,header1 = header ,rn1 = rn ), allow.cartesian = TRUE] setkey(analysis_params, name2) analysis_params[avail_params, ":="( path2 = path ,sheet2 = sheet ,header2 = header ,rn2 = rn ), allow.cartesian = TRUE] setkey(analysis_params, n) # Match actions to functions verb_function_map <- list( "A_i__in__B" = c("In", "Not In"), "A_i__in__B_i" = c("In (Same Row)", "Not In (Same Row)"), "A_i__substr__B_i" = c("Substring Of (Same Row)", "Not Substring Of (Same Row)"), "A_i__unique" = c("Is Unique", "Not Unique") ) %>% unlist names(verb_function_map) %<>% gsub("[0-9]+", "", .) analysis_params[, fun := factor(verb)] levels(analysis_params$fun) %<>% match(verb_function_map) %>% "["(names(verb_function_map), .) analysis_params$fun %<>% as.character # Read data sources message("Reading data sources...") data_names <- avail_params[, name] data_list <- replicate(length(data_names), list(), simplify = FALSE) names(data_list) <- data_names for (i in 1:nrow(avail_params)) data_list[[i]] <- with(avail_params[i], readSource( file_path = path , sheet_name = sheet , header_row = header , column_names = columns[[1]] , rownames_name = rn )) # Analysis #### message("Performing comparisons...") reports <- analyses <- vector("list", nrow(analysis_params)) names(reports) <- names(analyses) <- analysis_params[, outname] rowAnalysis2report <- function(analysis, params = list()) { # Create a copy x <- copy(analysis) # Subset to logical_val of logical_col setnames(x, setdiff(names(x), c("rn", "value")), "logical_col") x <- x[logical_col == !grepl("Not", params$verb)] x[, logical_col := NULL] # Re-encode Encoding(x$value) <- "UTF-8" if (is.character(x$rn)) Encoding(x$rn) <- "UTF-8" # Unique results only setkey(x, rn, value) setcolorder(x, key(x)) x <- unique(x) # Flatten if desired if (params$outflat == "Yes") { dlm <- Rdelim(params$dlm1) if (!is.null(dlm)) { if (dlm == "\\n") dlm <- "\n" x <- x[, list(value = paste(value, collapse = dlm)), by = rn] } } # Retrieve all columns if desired setkey(x, rn) if (params$outcols == "Yes") { full_source <- copy(data_list[[params$name1]]) setkey(full_source, rn) x <- x[full_source, nomatch = 0, allow.cartesian = TRUE] } # Rename results columns if (is.null(params$rn1)) setnames(x, 1, "Row") else { if (is.na(params$rn1) | params$rn1 == params$col1) setnames(x, 1, "Row") else setnames(x, 1, params$rn1) } setnames(x, 2, params$col1) return(x) } ## Do it for (i in 1:nrow(analysis_params)) { r <- analysis_params[i] args <- list( A = extractColumn(data_list[[r$name1]], r$col1, r$dlm1, r$rows1), B = if (!is.na(r$name2)) extractColumn(data_list[[r$name2]], r$col2, r$dlm2, r$rows2), case = (r$case == "Yes") ) analyses[[i]] <- do.call(r$fun, args) reports[[i]] <- rowAnalysis2report(analyses[[i]], r) rm(r, args) } # Output #### message("Writing results to output file...") detach("package:openxlsx") suppressPackageStartupMessages(library(xlsx)) # Output file exists <- TRUE i <- 0 while (exists) { out_file <- if (i > 0) { file.path(cd, sprintf("Comparison_Reports_%s_(%s).xlsx", Sys.Date(), i)) } else file.path(cd, sprintf("Comparison_Reports_%s.xlsx", Sys.Date())) exists <- file.exists(out_file) if (!exists) file.copy(parameterization_file, out_file) i <- i + 1 } # Headers headers <- analysis_params[, lapply(.SD, as.character), .SDcols = c( "outname", "col1", "verb", "col2", "case", "name1", "name2", "rows1", "rows2", "dlm1", "dlm2")] headers[, case := factor(case, c("Yes", "No"), c("(Case Sensitive)", "(Not Case Sensitive)"))] headers[!is.na(col2), header_title := paste(col1, verb, col2, case)] headers[is.na(col2), header_title := paste(col1, verb, case)] headers[, header_time := Sys.time()] headers$header_col1 <- headers[, list(col1, name1, rows1, dlm1)] %>% t %>% as.data.table %>% lapply(as.list) %>% lapply(as.data.table) %>% lapply(setnames, c("Column", "Source", "Rows", "Delimiter")) %>% lapply(as.list) headers$header_col2 <- headers[, list(col2, name2, rows2, dlm2)] %>% t %>% as.data.table %>% lapply(as.list) %>% lapply(as.data.table) %>% lapply(setnames, c("Column", "Source", "Rows", "Delimiter")) %>% lapply(as.list) # Write keep <- c(ls(), "i", "keep") ## Loop through reports and write for (i in names(reports)) { message(i, "...") # Load workbook wb <- loadWorkbook(out_file) # Workbook styles ## Header title hd <- CellStyle( wb, alignment = Alignment(horizontal = "ALIGN_LEFT", vertical "VERTICAL_TOP"), font = Font(wb, heightInPoints = 16, isBold = TRUE) ) ## Date dt <- CellStyle( wb, alignment = Alignment(horizontal = "ALIGN_LEFT", vertical "VERTICAL_TOP"), dataFormat = DataFormat("m/d/yyyy h:mm:ss;@") ) ## Parameters header ph <- CellStyle( wb, alignment = Alignment(horizontal = "ALIGN_LEFT", vertical "VERTICAL_TOP"), font = Font(wb, isItalic = TRUE) ) ## Column names header cn <- CellStyle( wb, alignment = Alignment(horizontal = "ALIGN_LEFT", vertical "VERTICAL_TOP"), border = Border(position = c("BOTTOM", "TOP"), pen = c("BORDER_THIN", "BORDER_MEDIUM")), font = Font(wb, isBold = TRUE) ) ## Column names header for reproduced data cnr <- CellStyle( wb, alignment = Alignment(horizontal = "ALIGN_LEFT", vertical "VERTICAL_TOP"), border = Border(position = c("BOTTOM", "TOP"), pen = c("BORDER_THIN", "BORDER_MEDIUM")), font = Font(wb, isBold = TRUE, isItalic = TRUE) ) ## Values vl <- CellStyle( wb, alignment = Alignment(horizontal = "ALIGN_LEFT", vertical "VERTICAL_TOP", wrapText = TRUE) ) ## Values for reproduced data vlr <- CellStyle( wb, alignment = Alignment(horizontal = "ALIGN_LEFT", vertical "VERTICAL_TOP", wrapText = TRUE), font = Font(wb, isItalic = TRUE) ) # Create sheet sh <- createSheet(wb, i) # Add header rows h <- headers[outname == i] addMergedRegion(sh, 1, 1, 1, 10) addMergedRegion(sh, 2, 2, 1, 10) rw <- createRow(sh, 1:2) cl <- createCell(rw, 1) ## Title addDataFrame(h[, header_title], sh, FALSE, FALSE, 1, 1) rw <- getRows(sh, 1) cl <- getCells(rw) lapply(cl, setCellStyle, hd) ## Date addDataFrame(h[, header_time], sh, FALSE, FALSE, 2, 1) rw <- getRows(sh, 2) cl <- getCells(rw) lapply(cl, setCellStyle, dt) ## Parameters addDataFrame(h[, header_col1] %>% as.data.frame, sh, TRUE, FALSE, 4, 1) if (h[, !is.na(col2)]) addDataFrame(h[, header_col2] %>% as.data.frame, sh, FALSE, FALSE, 6, 1) rw <- getRows(sh, 4) cl <- getCells(rw) lapply(cl, setCellStyle, ph) rw <- getRows(sh, 5:6) cl <- getCells(rw) lapply(cl, setCellStyle, vl) # Add report addDataFrame(reports[[i]], sh, TRUE, FALSE, 8, 1) nc <- ncol(reports[[i]]) ## Format column names rw <- getRows(sh, 8) cl <- getCells(rw, 1:2) lapply(cl, setCellStyle, cn) if (nc > 2) { cl <- getCells(rw, 3:nc) lapply(cl, setCellStyle, cnr) } ## Format values rw <- getRows(sh, 9:(nrow(reports[[i]]) + 9)) cl <- getCells(rw, 1:2) lapply(cl, setCellStyle, vl) if (nc > 2) { cl <- getCells(rw, 3:nc) lapply(cl, setCellStyle, vlr) } ## Add autofilters if (ncol(reports[[i]]) > 26) { addAutoFilter(sh, sprintf("A8:%s%s%s", LETTERS[floor(ncol(reports[[i]]) / 26)], LETTERS[ncol(reports[[i]]) %% 26], nrow(reports[[i]]) + 9)) } else { addAutoFilter(sh, sprintf("A8:%s%s", LETTERS[ncol(reports[[i]])], nrow(reports[[i]]) + 9)) } # Autofit columns autoSizeColumn(sh, 1:ncol(reports[[i]])) # Create freeze on report column names and results columns if (nc > 2) createFreezePane(sh, rowSplit = 9, colSplit = 3) else createFreezePane(sh, rowSplit = 9, colSplit = 1) # Save saveWorkbook(wb, out_file) rm(list = setdiff(ls(), keep)) } b [[alternative HTML version deleted]]
Jeff Newmiller
2015-Aug-30 01:35 UTC
[R] Advanced Level Script for Traceability Between Worksheets
Some notes: 1) HTML damages your email on this mailing list (we often see run-on lines and garbage characters.. definitely not whatever you saw). 2) Massive scripts are off topic... please read the Posting Guide. If you can't narrow your question to a smaller example then you may really need a consultant. 3) Periods in this case don't have any special meaning... They just make the variable look weird. 4) This script makes liberal use of the data.tables package, which has some advantage in speed and memory efficiency if you are working with large data sets. The odd indexing used in ..A[..B, in_B := TRUE, allow.cartesian = TRUE] is a relational join that is discussed in the vignette for the data.table package. I only use data.tables if I need to optimise memory or execution speed because I don't find them very intuitive. The fact that this code makes copies so frequently may indicate that it is not as optimised as it could be. Or perhaps they are necessary and I just have not looked at it closely enough. 5) A very few uses of %>? and ?<>? are from the magrittr package... I do find that helpful though it seems like the author here was only just getting started using it. Again, you would need to read the vignette and/or some internet tutorials to follow that syntax... I find it much easier than data.table though it is solving a different problem. 6) I don't consult off list... sorry. --------------------------------------------------------------------------- Jeff Newmiller The ..... ..... Go Live... DCN:<jdnewmil at dcn.davis.ca.us> Basics: ##.#. ##.#. Live Go... Live: OO#.. Dead: OO#.. Playing Research Engineer (Solar/Batteries O.O#. #.O#. with /Software/Embedded Controllers) .OO#. .OO#. rocks...1k --------------------------------------------------------------------------- Sent from my phone. Please excuse my brevity. On August 29, 2015 1:19:35 PM PDT, C Campbell <cc571309 at gmail.com> wrote:>Hi folks - I have almost know R skills yet and have been put 'in >charge' of >the below script created by a former employee. Although some of this >is >understandable to me, much of it is not. If anyone can help with >explaining sections, commenting on the skill level it takes to >understand >this level of scripting in R, and/or point me to some resources that >may >cover some of this (e.g., what is ..A[..B, in_B := TRUE, >allow.cartesian >TRUE]; and specifically what do the 2 dots mean?), I would very much >appreciate it. Would also be interested in communicating offline if >you >prefer. >Thank you, >Jay > > > ># Locate file #### >parameterization_file <- file.choose() >cd <- dirname(parameterization_file) > ># Front matter #### >message("Installing and loading packages...") > ># Packages >required_packages <- c("openxlsx", "xlsx", "magrittr", "data.table", >"reshape2", > "XML") >install_these <- setdiff(required_packages, >rownames(installed.packages())) > >while (length(install_these) > 0) { > install.packages(install_these, repos = "http://cran.rstudio.com") > install_these <- setdiff(required_packages, >rownames(installed.packages())) >} > >suppressPackageStartupMessages(library(openxlsx)) >suppressPackageStartupMessages(library(magrittr)) >suppressPackageStartupMessages(library(data.table)) >suppressPackageStartupMessages(library(reshape2)) >suppressPackageStartupMessages(library(XML)) > > ># Options >options(stringsAsFactors = FALSE) > ># Functions >message("Loading functions...") > >A__in__B <- function(A, B, case = TRUE, ...) { > > # Copies > ..A <- copy(A) > ..B <- copy(B) > setkey(..A, value) > setkey(..B, value) > ..A <- unique(..A) > ..B <- unique(..B) > > # Rownames are unnecessary > ..A[, rn := NULL] > ..B[, rn := NULL] > > # Case sensitivity > if (!case) { > ..A <- tableToLower(..A) > ..B <- tableToLower(..B) > } > > # Check if A is in B > ..A[..B, in_B := TRUE, allow.cartesian = TRUE] > if ("in_B" %in% names(..A)) > ..A[is.na(in_B), in_B := FALSE] > else > ..A[, in_B := FALSE] > > # Case sensitivity > if (!case) > ..A <- tableDropLower(..A) > > # Set attributes > setABattr(..A, A, B) > > # Return results > setkey(..A, value) > return(..A) > >} > >A__unique <- function(A, case = TRUE, ...) { > > # Copies > ..A <- copy(A) > setkey(..A, value) > > # Case sensitivity > if (!case) > ..A <- tableToLower(..A) > > # Check if A_i values are unique > ..A[..A[duplicated(..A), SJ(value)], is_unique := FALSE, > allow.cartesian = TRUE] > if ("is_unique" %in% names(..A)) > ..A[is.na(is_unique), is_unique := TRUE] > else > ..A[, is_unique := TRUE] > > # Case sensitivity > if (!case) > ..A <- tableDropLower(..A) > > # Roll up to value level > ..A <- ..A[, list(is_unique = all(is_unique)), keyby = value] > > # Return results > return(..A) > >} > >A_i__in__B <- function(A, B, case = TRUE, ...) { > > # Copies > ..A <- copy(A) > setkey(..A, value, rn) > ..A <- unique(..A) > ..B <- copy(B) > setkey(..B, value) > ..B %>% unique > > # B rownames are unnecessary > ..B[, rn := NULL] > > # Case sensitivity > if (!case) { > ..A <- tableToLower(..A) > ..B <- tableToLower(..B) > } > > # Check if A is in B > if ("in_B" %in% names(..A)) > ..A[is.na(in_B), in_B := FALSE] > else > ..A[, in_B := FALSE] > > # Case sensitivity > if (!case) > ..A <- tableDropLower(..A) > > # Set attributes > setABattr(..A, A, B) > > # Return results > setkey(..A, value, rn) > return(..A) > >} > >A_i__in__B_i <- function(A, B, case = TRUE, ...) { > > # Copies > ..A <- copy(A) > setkey(..A, value, rn) > ..A <- unique(..A) > ..B <- copy(B) > setkey(..B, value, rn) > ..B <- unique(..B) > > # Case sensitivity > if (!case) { > ..A <- tableToLower(..A) > ..B <- tableToLower(..B) > } > > # Check if A_i terms are in B_i terms > ..A[..B, in_B := TRUE, allow.cartesian = TRUE] > if ("in_B" %in% names(..A)) > ..A[is.na(in_B), in_B := FALSE] > else > ..A[, in_B := FALSE] > > # Case sensitivity > if (!case) > ..A <- tableDropLower(..A) > > # Set attributes > setABattr(..A, A, B) > > # Return results > setkey(..A, value, rn) > return(..A) > >} > >A_i__substr__B_i <- function(A, B, case = TRUE, ...) { > > # Copies > ..A <- copy(A) > setkey(..A, rn) > ..B <- copy(B) > setkey(..B, rn) > > # Renames > setnames(..A, "value", "A_value") > setnames(..B, "value", "B_value") > > # Merge > ..X <- ..B[..A, allow.cartesian = TRUE] > > # Check if A_i values are substrings of B_i values > ..X[is.na(B_value), is_substring := FALSE] > Encoding(..X$A_value) <- "UTF-8" > Encoding(..X$B_value) <- "UTF-8" > if (case) { > ..X[!is.na(B_value), is_substring := mapply( > grepl, A_value, B_value, fixed = TRUE)] > } else { > ..X[!is.na(B_value), is_substring := mapply( > grepl, tolower(A_value), tolower(B_value), fixed = TRUE)] > } > Encoding(..X$A_value) <- "bytes" > Encoding(..X$B_value) <- "bytes" > > # Rename/reorder > ..X <- ..X[, list(value = A_value, rn, is_substring)] > > # Set attributes > setABattr(..X, A, B) > > # Return results > setkey(..X, value, rn) > return(..X) > >} > >A_i__unique <- function(A, case = TRUE, ...) { > > # Copies > ..A <- copy(A) > setkey(..A, value) > > # Case sensitivity > if (!case) > ..A <- tableToLower(..A) > > # Check if A_i values are unique > ..A[..A[duplicated(..A), SJ(value)], is_unique := FALSE, > allow.cartesian = TRUE] > if ("is_unique" %in% names(..A)) > ..A[is.na(is_unique), is_unique := TRUE] > else > ..A[, is_unique := TRUE] > > # Case sensitivity > if (!case) > ..A <- tableDropLower(..A) > > # Return results > setkey(..A, value, rn) > return(..A) > >} > >extractColumn <- function(x, column_name, value_delimiter = NULL, rows >>NULL) >{ > > # Validate formatting on column name args > column_name %<>% trimCompress > > # Multiple columns? > mult_cols <- grepl(",", column_name) > if (mult_cols) > column_name %<>% strsplit(",") %>% unlist %>% trimCompress > > # Get column + rn > ..table <- x[, c("rn", column_name), with = FALSE] > setnames(..table, 2, "value") > > # Long if multiple > if (mult_cols) { > ..table %<>% melt(1) > ..table[, variable := NULL] > } > > # Key table by rowname > setkey(..table, rn) > > # If rows was provided, subset > if (!is.null(rows)) > if (rows != "All") > ..table <- ..table[textrange2vector(rows) %>% SJ] > > # Split values according to delimiter... > dlm <- Rdelim(value_delimiter) > if (!is.null(dlm)) > ..values <- strsplit(..table[, value], Rdelim(value_delimiter)) %>% > lapply(trimCompress) > # ... or convert to list if no delimiter > else > ..values <- ..table[, value] %>% trimCompress %>% as.list > > # Set list name values to rowname values > names(..values) <- ..table[, rn] > > # Convert from list to table > ..values %<>% melt %>% as.data.table > setnames(..values, 2, "rn") > > # Remove any instances of blank values > ..values <- ..values[!is.na(value) & grepl("[^[:space:]]", value)] > > # Encode all text to bytes > # Will need to encode to UTF-8 before output to make it readable > Encoding(..values$value) <- "bytes" > if (is.character(..values$rn)) Encoding(..values$rn) <- "bytes" > > # If row names can be converted to numeric, do so > if (..values[, rn] %>% is.character) > if (..values[, rn] %>% type.convert %>% is.numeric) > ..values[, rn := as.numeric(rn)] > > # Key table by value > setkey(..values, value, rn) > > # Add attributes > setattr(..values, "file_path", attr(x, "file_path")) > setattr(..values, "sheet_name", attr(x, "sheet_name")) > setattr(..values, "header_row", attr(x, "header_row")) > setattr(..values, "column_name", column_name) > setattr(..values, "rownames_name", attr(x, "rownames_name")) > setattr(..values, "value_delimiter", value_delimiter) > setattr(..values, "rows", rows) > > # Return the values table > return(..values) > >} > >fillNAlast <- function(x) { > na <- is.na(x) > miss <- which(na) > nonmiss <- which(!na) > map <- outer(nonmiss, miss, "<") %>% > apply(2, . %>% which %>% max) > x[miss] <- x[nonmiss[map]] > return(x) >} > >getSheetIndex <- function(file_path, sheet_name) { > ># Extract workbook.xml to temporary file that will be deleted at end of > # run > xmlDir <- file.path(tempdir(), "findSheet") >workbook <- unzip(file_path, files = "xl/workbook.xml", exdir = xmlDir) > on.exit(unlink(xmlDir, recursive = TRUE), add = TRUE) > > # Read workbook.xml and get sheet nodes > workbook <- readLines(workbook, warn = FALSE, encoding = "UTF-8") %>% > unlist > sheets <- gregexpr("<sheet .*/sheets>", workbook, perl = TRUE) %>% > regmatches(workbook, .) %>% > unlist > > # Extract sheet names from nodes, parse as html, and return text >values > sheetNames <- gregexpr('(?<=name=")[^"]+', sheets, perl = TRUE) %>% > regmatches(sheets, .) %>% > unlist %>% > lapply(htmlParse, asText = TRUE) %>% > sapply(. %>% xpathApply("//body//text()", xmlValue) %>% unlist) > > # Which sheet name is equal to the sheet_name argument? > which(sheetNames == sheet_name) > >} > >Rdelim <- function(x, ...) { > if (!is.null(x)) { > if (!is.na(x) & x != "None") { > if (x == "Newline") "\\n" else x > } else NULL > } else NULL >} > >readSource <- function(file_path, sheet_name, header_row, column_names, > rownames_name = NULL) >{ > > # Validate formatting on column name args > column_names %<>% strsplit(",") %>% unlist %>% trimCompress > rownames_name %<>% trimCompress > > # Sheet index > sheet_index <- getSheetIndex(file_path, sheet_name) > > # Read column names according to header row > ..names <- read.xlsx( > xlsxFile = file_path > , sheet = sheet_index > , colNames = FALSE > , rows = header_row > ) %>% unlist %>% unname %>% trimCompress > > # Read in column plus and any rownames column > ..table <- read.xlsx( > xlsxFile = file_path > , sheet = sheet_index > , startRow = header_row > , cols = which(..names %in% c(rownames_name, column_names)) > , skipEmptyRows = FALSE > , detectDates = TRUE > ) %>% as.data.table > > # Set names > setnames(..table, > ..names[which(..names %in% c(rownames_name, column_names))]) > > # Rownames > ## If no rownames column, use row number > if (is.null(rownames_name)) { >if (is.null(rows)) ..table[, rn := 1:.N + 1L] else ..table[, rn :>rows] > } else { # Otherwise, just copy the column > ..table[, rn := lapply(.SD, identity), .SDcols = rownames_name] > } > setcolorder(..table, c("rn", setdiff(names(..table), "rn"))) > > # If row can be converted to numeric, do so > if (..table[, rn] %>% is.character) > if (..table[, rn] %>% type.convert %>% is.numeric) > ..table[, rn := as.numeric(rn)] > > # Key table by row > setkey(..table, rn) > > # Add attributes > setattr(..table, "file_path", file_path) > setattr(..table, "sheet_name", sheet_name) > setattr(..table, "header_row", header_row) > setattr(..table, "column_names", column_names) > setattr(..table, "rownames_name", rownames_name) > > # Return the values table > return(..table) > >} > >setABattr <- function(new_table, A, B) { > > # Strip existing attributes in new_table > setattr(new_table, "file_path", NULL) > setattr(new_table, "sheet_name", NULL) > setattr(new_table, "header_row", NULL) > setattr(new_table, "column_name", NULL) > setattr(new_table, "rownames_name", NULL) > setattr(new_table, "value_delimiter", NULL) > setattr(new_table, "rows", NULL) > setattr(new_table, "rows_are_rownames", NULL) > > # Set A attributes in new_table > setattr(new_table, "A_file_path", attributes(A)$file_path) > setattr(new_table, "A_sheet_name", attributes(A)$sheet_name) > setattr(new_table, "A_header_row", attributes(A)$header_row) > setattr(new_table, "A_column_name", attributes(A)$column_name) > setattr(new_table, "A_rownames_name", attributes(A)$rownames_name) > setattr(new_table, "A_value_delimiter", attributes(A)$value_delimiter) > setattr(new_table, "A_rows", attributes(A)$rows) >setattr(new_table, "A_rows_are_rownames", >attributes(A)$rows_are_rownames) > > # Set B attributes in new_table > setattr(new_table, "B_file_path", attributes(B)$file_path) > setattr(new_table, "B_sheet_name", attributes(B)$sheet_name) > setattr(new_table, "B_header_row", attributes(B)$header_row) > setattr(new_table, "B_column_name", attributes(B)$column_name) > setattr(new_table, "B_rownames_name", attributes(B)$rownames_name) > setattr(new_table, "B_value_delimiter", attributes(B)$value_delimiter) > setattr(new_table, "B_rows", attributes(B)$rows) >setattr(new_table, "B_rows_are_rownames", >attributes(B)$rows_are_rownames) > >} > >tableToLower <- function(X, ...) { > > # Copy > x <- copy(X) > > # Existing keys > keys <- key(x) > setkey(x, NULL) > > # Rename value column > setnames(x, "value", "value_orig") > > # Derived value column > Encoding(x$value_orig) <- "UTF-8" > x[, value := tolower(value_orig)] > Encoding(x$value) <- "bytes" > Encoding(x$value_orig) <- "bytes" > > # Rekey > setkeyv(x, keys) > > # Return > return(x) > >} > >tableDropLower <- function(X, ...) { > > # Copy > x <- copy(X) > > # Existing keys > keys <- key(x) > setkey(x, NULL) > > # Drop derived value column > x[, value := NULL] > > # Rename value_orig column > setnames(x, "value_orig", "value") > > # Rekey > setkeyv(x, keys) > > # Return > return(x) > >} > >textrange2vector <- function(x) { > strsplit(x, ",") %>% > lapply( > . %>% > strsplit("-") %>% > lapply(as.numeric) %>% > lapply(function(s) > if (length(s) == 1) s > else seq(s[1], s[2]))) %>% > lapply(unlist) >} > >trimCompress <- function(x) { > > if (!"magrittr" %in% loadedNamespaces()) # check if magrittr is loaded > library(magrittr) # load if not > > if (is.null(x)) return(NULL) > > x %>% > gsub("^\\s+", "", .) %>% # remove leading blanks > gsub("\\s+$", "", .) %>% # remove trailing blanks > gsub("\\s+", " ", .) # compress multiple blanks to one > >} > > > > > > > ># Read parameterization file #### > >message("Reading parameters...") > >## Catalog parameters >avail_params <- read.xlsx( > parameterization_file > , "Available Parameters" > , colNames = FALSE > , startRow = 2 >) %>% as.data.table >sheet_params <- c("name", "path", "sheet", "header", "rn") >setnames(avail_params, 1:5, sheet_params) >avail_params <- avail_params[!is.na(name) & grepl("[^[:space:]]", >name)] %>% > melt(id.vars = 1:5, value.name = "columns") >avail_params <- avail_params[, lapply(.SD, . %>% Filter(Negate(is.na), >.) >%>% > list), by = eval(sheet_params)] >avail_params[, variable := NULL] > >## Analysis parameters >analysis_params <- read.xlsx( > parameterization_file > , "Parameterization" > , startRow = 2 > , colNames = FALSE >) %>% as.data.table >setnames(analysis_params, c( > "name1", "col1", "rows1", "dlm1", > "verb", "case", > "name2", "col2", "rows2", "dlm2", > "outname", "outcols", "outflat" >)) >analysis_params <- analysis_params[-1][!is.na(name1) & > grepl("[^[:space:]]", name1)] >analysis_params[, n := 1:.N] > >## Combine parameters >setkey(avail_params, name) >setkey(analysis_params, name1) >analysis_params[avail_params, ":="( > path1 = path > ,sheet1 = sheet > ,header1 = header > ,rn1 = rn >), allow.cartesian = TRUE] >setkey(analysis_params, name2) >analysis_params[avail_params, ":="( > path2 = path > ,sheet2 = sheet > ,header2 = header > ,rn2 = rn >), allow.cartesian = TRUE] >setkey(analysis_params, n) > > ># Match actions to functions >verb_function_map <- list( > "A_i__in__B" = c("In", "Not In"), > "A_i__in__B_i" = c("In (Same Row)", "Not In (Same Row)"), > "A_i__substr__B_i" = c("Substring Of (Same Row)", > "Not Substring Of (Same Row)"), > "A_i__unique" = c("Is Unique", "Not Unique") >) %>% unlist >names(verb_function_map) %<>% gsub("[0-9]+", "", .) >analysis_params[, fun := factor(verb)] >levels(analysis_params$fun) %<>% > match(verb_function_map) %>% > "["(names(verb_function_map), .) >analysis_params$fun %<>% as.character > > > ># Read data sources > >message("Reading data sources...") > >data_names <- avail_params[, name] >data_list <- replicate(length(data_names), list(), simplify = FALSE) >names(data_list) <- data_names >for (i in 1:nrow(avail_params)) > data_list[[i]] <- with(avail_params[i], readSource( > file_path = path > , sheet_name = sheet > , header_row = header > , column_names = columns[[1]] > , rownames_name = rn > )) > > > > ># Analysis #### > >message("Performing comparisons...") > >reports <- analyses <- vector("list", nrow(analysis_params)) >names(reports) <- names(analyses) <- analysis_params[, outname] > >rowAnalysis2report <- function(analysis, params = list()) { > > # Create a copy > x <- copy(analysis) > > # Subset to logical_val of logical_col > setnames(x, setdiff(names(x), c("rn", "value")), "logical_col") > x <- x[logical_col == !grepl("Not", params$verb)] > x[, logical_col := NULL] > > # Re-encode > Encoding(x$value) <- "UTF-8" > if (is.character(x$rn)) > Encoding(x$rn) <- "UTF-8" > > # Unique results only > setkey(x, rn, value) > setcolorder(x, key(x)) > x <- unique(x) > > # Flatten if desired > if (params$outflat == "Yes") { > dlm <- Rdelim(params$dlm1) > if (!is.null(dlm)) { > if (dlm == "\\n") dlm <- "\n" > x <- x[, list(value = paste(value, collapse = dlm)), by = rn] > } > } > > # Retrieve all columns if desired > setkey(x, rn) > if (params$outcols == "Yes") { > full_source <- copy(data_list[[params$name1]]) > setkey(full_source, rn) > x <- x[full_source, nomatch = 0, allow.cartesian = TRUE] > } > > # Rename results columns > if (is.null(params$rn1)) setnames(x, 1, "Row") else { > if (is.na(params$rn1) | params$rn1 == params$col1) setnames(x, 1, >"Row") > else setnames(x, 1, params$rn1) > } > setnames(x, 2, params$col1) > > return(x) > >} > >## Do it >for (i in 1:nrow(analysis_params)) { > r <- analysis_params[i] > args <- list( > A = extractColumn(data_list[[r$name1]], r$col1, r$dlm1, r$rows1), > B = if (!is.na(r$name2)) > extractColumn(data_list[[r$name2]], r$col2, r$dlm2, r$rows2), > case = (r$case == "Yes") > ) > analyses[[i]] <- do.call(r$fun, args) > reports[[i]] <- rowAnalysis2report(analyses[[i]], r) > rm(r, args) >} > > > > > > ># Output #### > >message("Writing results to output file...") > >detach("package:openxlsx") >suppressPackageStartupMessages(library(xlsx)) > ># Output file >exists <- TRUE >i <- 0 >while (exists) { > out_file <- if (i > 0) { >file.path(cd, sprintf("Comparison_Reports_%s_(%s).xlsx", Sys.Date(), >i)) >} else file.path(cd, sprintf("Comparison_Reports_%s.xlsx", Sys.Date())) > exists <- file.exists(out_file) > if (!exists) > file.copy(parameterization_file, out_file) > i <- i + 1 >} > ># Headers >headers <- analysis_params[, lapply(.SD, as.character), .SDcols = c( > "outname", "col1", "verb", "col2", "case", "name1", "name2", > "rows1", "rows2", "dlm1", "dlm2")] >headers[, case := factor(case, c("Yes", "No"), > c("(Case Sensitive)", "(Not Case Sensitive)"))] >headers[!is.na(col2), header_title := paste(col1, verb, col2, case)] >headers[is.na(col2), header_title := paste(col1, verb, case)] >headers[, header_time := Sys.time()] >headers$header_col1 <- headers[, list(col1, name1, rows1, dlm1)] %>% > t %>% > as.data.table %>% > lapply(as.list) %>% > lapply(as.data.table) %>% > lapply(setnames, c("Column", "Source", "Rows", "Delimiter")) %>% > lapply(as.list) >headers$header_col2 <- headers[, list(col2, name2, rows2, dlm2)] %>% > t %>% > as.data.table %>% > lapply(as.list) %>% > lapply(as.data.table) %>% > lapply(setnames, c("Column", "Source", "Rows", "Delimiter")) %>% > lapply(as.list) > > ># Write >keep <- c(ls(), "i", "keep") > >## Loop through reports and write >for (i in names(reports)) { > > message(i, "...") > > # Load workbook > wb <- loadWorkbook(out_file) > > # Workbook styles > > ## Header title > hd <- CellStyle( > wb, > alignment = Alignment(horizontal = "ALIGN_LEFT", vertical >"VERTICAL_TOP"), > font = Font(wb, heightInPoints = 16, isBold = TRUE) > ) > > ## Date > dt <- CellStyle( > wb, > alignment = Alignment(horizontal = "ALIGN_LEFT", vertical >"VERTICAL_TOP"), > dataFormat = DataFormat("m/d/yyyy h:mm:ss;@") > ) > > ## Parameters header > ph <- CellStyle( > wb, > alignment = Alignment(horizontal = "ALIGN_LEFT", vertical >"VERTICAL_TOP"), > font = Font(wb, isItalic = TRUE) > ) > > ## Column names header > cn <- CellStyle( > wb, > alignment = Alignment(horizontal = "ALIGN_LEFT", vertical >"VERTICAL_TOP"), > border = Border(position = c("BOTTOM", "TOP"), > pen = c("BORDER_THIN", "BORDER_MEDIUM")), > font = Font(wb, isBold = TRUE) > ) > > ## Column names header for reproduced data > cnr <- CellStyle( > wb, > alignment = Alignment(horizontal = "ALIGN_LEFT", vertical >"VERTICAL_TOP"), > border = Border(position = c("BOTTOM", "TOP"), > pen = c("BORDER_THIN", "BORDER_MEDIUM")), > font = Font(wb, isBold = TRUE, isItalic = TRUE) > ) > > ## Values > vl <- CellStyle( > wb, > alignment = Alignment(horizontal = "ALIGN_LEFT", vertical >"VERTICAL_TOP", > wrapText = TRUE) > ) > > ## Values for reproduced data > vlr <- CellStyle( > wb, > alignment = Alignment(horizontal = "ALIGN_LEFT", vertical >"VERTICAL_TOP", > wrapText = TRUE), > font = Font(wb, isItalic = TRUE) > ) > > > # Create sheet > sh <- createSheet(wb, i) > > # Add header rows > h <- headers[outname == i] > addMergedRegion(sh, 1, 1, 1, 10) > addMergedRegion(sh, 2, 2, 1, 10) > rw <- createRow(sh, 1:2) > cl <- createCell(rw, 1) > > ## Title > addDataFrame(h[, header_title], sh, FALSE, FALSE, 1, 1) > rw <- getRows(sh, 1) > cl <- getCells(rw) > lapply(cl, setCellStyle, hd) > > ## Date > addDataFrame(h[, header_time], sh, FALSE, FALSE, 2, 1) > rw <- getRows(sh, 2) > cl <- getCells(rw) > lapply(cl, setCellStyle, dt) > > ## Parameters >addDataFrame(h[, header_col1] %>% as.data.frame, sh, TRUE, FALSE, 4, 1) > if (h[, !is.na(col2)]) >addDataFrame(h[, header_col2] %>% as.data.frame, sh, FALSE, FALSE, 6, >1) > rw <- getRows(sh, 4) > cl <- getCells(rw) > lapply(cl, setCellStyle, ph) > rw <- getRows(sh, 5:6) > cl <- getCells(rw) > lapply(cl, setCellStyle, vl) > > # Add report > addDataFrame(reports[[i]], sh, TRUE, FALSE, 8, 1) > nc <- ncol(reports[[i]]) > > ## Format column names > rw <- getRows(sh, 8) > cl <- getCells(rw, 1:2) > lapply(cl, setCellStyle, cn) > if (nc > 2) { > cl <- getCells(rw, 3:nc) > lapply(cl, setCellStyle, cnr) > } > > ## Format values > rw <- getRows(sh, 9:(nrow(reports[[i]]) + 9)) > cl <- getCells(rw, 1:2) > lapply(cl, setCellStyle, vl) > if (nc > 2) { > cl <- getCells(rw, 3:nc) > lapply(cl, setCellStyle, vlr) > } > > ## Add autofilters > if (ncol(reports[[i]]) > 26) { > addAutoFilter(sh, sprintf("A8:%s%s%s", > LETTERS[floor(ncol(reports[[i]]) / 26)], > LETTERS[ncol(reports[[i]]) %% 26], > nrow(reports[[i]]) + 9)) > } else { > addAutoFilter(sh, sprintf("A8:%s%s", LETTERS[ncol(reports[[i]])], > nrow(reports[[i]]) + 9)) > } > > # Autofit columns > autoSizeColumn(sh, 1:ncol(reports[[i]])) > > # Create freeze on report column names and results columns > if (nc > 2) createFreezePane(sh, rowSplit = 9, colSplit = 3) else > createFreezePane(sh, rowSplit = 9, colSplit = 1) > > # Save > saveWorkbook(wb, out_file) > rm(list = setdiff(ls(), keep)) > >} >b > > [[alternative HTML version deleted]] > >______________________________________________ >R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see >https://stat.ethz.ch/mailman/listinfo/r-help >PLEASE do read the posting guide >http://www.R-project.org/posting-guide.html >and provide commented, minimal, self-contained, reproducible code.