C Campbell
2015-Aug-29 20:19 UTC
[R] Advanced Level Script for Traceability Between Worksheets
Hi folks - I have almost know R skills yet and have been put 'in charge'
of
the below script created by a former employee. Although some of this is
understandable to me, much of it is not. If anyone can help with
explaining sections, commenting on the skill level it takes to understand
this level of scripting in R, and/or point me to some resources that may
cover some of this (e.g., what is ..A[..B, in_B := TRUE, allow.cartesian TRUE];
and specifically what do the 2 dots mean?), I would very much
appreciate it. Would also be interested in communicating offline if you
prefer.
Thank you,
Jay
# Locate file ####
parameterization_file <- file.choose()
cd <- dirname(parameterization_file)
# Front matter ####
message("Installing and loading packages...")
# Packages
required_packages <- c("openxlsx", "xlsx",
"magrittr", "data.table",
"reshape2",
"XML")
install_these <- setdiff(required_packages, rownames(installed.packages()))
while (length(install_these) > 0) {
install.packages(install_these, repos = "http://cran.rstudio.com")
install_these <- setdiff(required_packages,
rownames(installed.packages()))
}
suppressPackageStartupMessages(library(openxlsx))
suppressPackageStartupMessages(library(magrittr))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(XML))
# Options
options(stringsAsFactors = FALSE)
# Functions
message("Loading functions...")
A__in__B <- function(A, B, case = TRUE, ...) {
# Copies
..A <- copy(A)
..B <- copy(B)
setkey(..A, value)
setkey(..B, value)
..A <- unique(..A)
..B <- unique(..B)
# Rownames are unnecessary
..A[, rn := NULL]
..B[, rn := NULL]
# Case sensitivity
if (!case) {
..A <- tableToLower(..A)
..B <- tableToLower(..B)
}
# Check if A is in B
..A[..B, in_B := TRUE, allow.cartesian = TRUE]
if ("in_B" %in% names(..A))
..A[is.na(in_B), in_B := FALSE]
else
..A[, in_B := FALSE]
# Case sensitivity
if (!case)
..A <- tableDropLower(..A)
# Set attributes
setABattr(..A, A, B)
# Return results
setkey(..A, value)
return(..A)
}
A__unique <- function(A, case = TRUE, ...) {
# Copies
..A <- copy(A)
setkey(..A, value)
# Case sensitivity
if (!case)
..A <- tableToLower(..A)
# Check if A_i values are unique
..A[..A[duplicated(..A), SJ(value)], is_unique := FALSE,
allow.cartesian = TRUE]
if ("is_unique" %in% names(..A))
..A[is.na(is_unique), is_unique := TRUE]
else
..A[, is_unique := TRUE]
# Case sensitivity
if (!case)
..A <- tableDropLower(..A)
# Roll up to value level
..A <- ..A[, list(is_unique = all(is_unique)), keyby = value]
# Return results
return(..A)
}
A_i__in__B <- function(A, B, case = TRUE, ...) {
# Copies
..A <- copy(A)
setkey(..A, value, rn)
..A <- unique(..A)
..B <- copy(B)
setkey(..B, value)
..B %>% unique
# B rownames are unnecessary
..B[, rn := NULL]
# Case sensitivity
if (!case) {
..A <- tableToLower(..A)
..B <- tableToLower(..B)
}
# Check if A is in B
if ("in_B" %in% names(..A))
..A[is.na(in_B), in_B := FALSE]
else
..A[, in_B := FALSE]
# Case sensitivity
if (!case)
..A <- tableDropLower(..A)
# Set attributes
setABattr(..A, A, B)
# Return results
setkey(..A, value, rn)
return(..A)
}
A_i__in__B_i <- function(A, B, case = TRUE, ...) {
# Copies
..A <- copy(A)
setkey(..A, value, rn)
..A <- unique(..A)
..B <- copy(B)
setkey(..B, value, rn)
..B <- unique(..B)
# Case sensitivity
if (!case) {
..A <- tableToLower(..A)
..B <- tableToLower(..B)
}
# Check if A_i terms are in B_i terms
..A[..B, in_B := TRUE, allow.cartesian = TRUE]
if ("in_B" %in% names(..A))
..A[is.na(in_B), in_B := FALSE]
else
..A[, in_B := FALSE]
# Case sensitivity
if (!case)
..A <- tableDropLower(..A)
# Set attributes
setABattr(..A, A, B)
# Return results
setkey(..A, value, rn)
return(..A)
}
A_i__substr__B_i <- function(A, B, case = TRUE, ...) {
# Copies
..A <- copy(A)
setkey(..A, rn)
..B <- copy(B)
setkey(..B, rn)
# Renames
setnames(..A, "value", "A_value")
setnames(..B, "value", "B_value")
# Merge
..X <- ..B[..A, allow.cartesian = TRUE]
# Check if A_i values are substrings of B_i values
..X[is.na(B_value), is_substring := FALSE]
Encoding(..X$A_value) <- "UTF-8"
Encoding(..X$B_value) <- "UTF-8"
if (case) {
..X[!is.na(B_value), is_substring := mapply(
grepl, A_value, B_value, fixed = TRUE)]
} else {
..X[!is.na(B_value), is_substring := mapply(
grepl, tolower(A_value), tolower(B_value), fixed = TRUE)]
}
Encoding(..X$A_value) <- "bytes"
Encoding(..X$B_value) <- "bytes"
# Rename/reorder
..X <- ..X[, list(value = A_value, rn, is_substring)]
# Set attributes
setABattr(..X, A, B)
# Return results
setkey(..X, value, rn)
return(..X)
}
A_i__unique <- function(A, case = TRUE, ...) {
# Copies
..A <- copy(A)
setkey(..A, value)
# Case sensitivity
if (!case)
..A <- tableToLower(..A)
# Check if A_i values are unique
..A[..A[duplicated(..A), SJ(value)], is_unique := FALSE,
allow.cartesian = TRUE]
if ("is_unique" %in% names(..A))
..A[is.na(is_unique), is_unique := TRUE]
else
..A[, is_unique := TRUE]
# Case sensitivity
if (!case)
..A <- tableDropLower(..A)
# Return results
setkey(..A, value, rn)
return(..A)
}
extractColumn <- function(x, column_name, value_delimiter = NULL, rows NULL)
{
# Validate formatting on column name args
column_name %<>% trimCompress
# Multiple columns?
mult_cols <- grepl(",", column_name)
if (mult_cols)
column_name %<>% strsplit(",") %>% unlist %>%
trimCompress
# Get column + rn
..table <- x[, c("rn", column_name), with = FALSE]
setnames(..table, 2, "value")
# Long if multiple
if (mult_cols) {
..table %<>% melt(1)
..table[, variable := NULL]
}
# Key table by rowname
setkey(..table, rn)
# If rows was provided, subset
if (!is.null(rows))
if (rows != "All")
..table <- ..table[textrange2vector(rows) %>% SJ]
# Split values according to delimiter...
dlm <- Rdelim(value_delimiter)
if (!is.null(dlm))
..values <- strsplit(..table[, value], Rdelim(value_delimiter)) %>%
lapply(trimCompress)
# ... or convert to list if no delimiter
else
..values <- ..table[, value] %>% trimCompress %>% as.list
# Set list name values to rowname values
names(..values) <- ..table[, rn]
# Convert from list to table
..values %<>% melt %>% as.data.table
setnames(..values, 2, "rn")
# Remove any instances of blank values
..values <- ..values[!is.na(value) & grepl("[^[:space:]]",
value)]
# Encode all text to bytes
# Will need to encode to UTF-8 before output to make it readable
Encoding(..values$value) <- "bytes"
if (is.character(..values$rn)) Encoding(..values$rn) <- "bytes"
# If row names can be converted to numeric, do so
if (..values[, rn] %>% is.character)
if (..values[, rn] %>% type.convert %>% is.numeric)
..values[, rn := as.numeric(rn)]
# Key table by value
setkey(..values, value, rn)
# Add attributes
setattr(..values, "file_path", attr(x, "file_path"))
setattr(..values, "sheet_name", attr(x, "sheet_name"))
setattr(..values, "header_row", attr(x, "header_row"))
setattr(..values, "column_name", column_name)
setattr(..values, "rownames_name", attr(x,
"rownames_name"))
setattr(..values, "value_delimiter", value_delimiter)
setattr(..values, "rows", rows)
# Return the values table
return(..values)
}
fillNAlast <- function(x) {
na <- is.na(x)
miss <- which(na)
nonmiss <- which(!na)
map <- outer(nonmiss, miss, "<") %>%
apply(2, . %>% which %>% max)
x[miss] <- x[nonmiss[map]]
return(x)
}
getSheetIndex <- function(file_path, sheet_name) {
# Extract workbook.xml to temporary file that will be deleted at end of
# run
xmlDir <- file.path(tempdir(), "findSheet")
workbook <- unzip(file_path, files = "xl/workbook.xml", exdir =
xmlDir)
on.exit(unlink(xmlDir, recursive = TRUE), add = TRUE)
# Read workbook.xml and get sheet nodes
workbook <- readLines(workbook, warn = FALSE, encoding = "UTF-8")
%>%
unlist
sheets <- gregexpr("<sheet .*/sheets>", workbook, perl =
TRUE) %>%
regmatches(workbook, .) %>%
unlist
# Extract sheet names from nodes, parse as html, and return text
values
sheetNames <- gregexpr('(?<=name=")[^"]+', sheets,
perl = TRUE) %>%
regmatches(sheets, .) %>%
unlist %>%
lapply(htmlParse, asText = TRUE) %>%
sapply(. %>% xpathApply("//body//text()", xmlValue) %>%
unlist)
# Which sheet name is equal to the sheet_name argument?
which(sheetNames == sheet_name)
}
Rdelim <- function(x, ...) {
if (!is.null(x)) {
if (!is.na(x) & x != "None") {
if (x == "Newline") "\\n" else x
} else NULL
} else NULL
}
readSource <- function(file_path, sheet_name, header_row, column_names,
rownames_name = NULL)
{
# Validate formatting on column name args
column_names %<>% strsplit(",") %>% unlist %>%
trimCompress
rownames_name %<>% trimCompress
# Sheet index
sheet_index <- getSheetIndex(file_path, sheet_name)
# Read column names according to header row
..names <- read.xlsx(
xlsxFile = file_path
, sheet = sheet_index
, colNames = FALSE
, rows = header_row
) %>% unlist %>% unname %>% trimCompress
# Read in column plus and any rownames column
..table <- read.xlsx(
xlsxFile = file_path
, sheet = sheet_index
, startRow = header_row
, cols = which(..names %in% c(rownames_name, column_names))
, skipEmptyRows = FALSE
, detectDates = TRUE
) %>% as.data.table
# Set names
setnames(..table,
..names[which(..names %in% c(rownames_name, column_names))])
# Rownames
## If no rownames column, use row number
if (is.null(rownames_name)) {
if (is.null(rows)) ..table[, rn := 1:.N + 1L] else ..table[, rn := rows]
} else { # Otherwise, just copy the column
..table[, rn := lapply(.SD, identity), .SDcols = rownames_name]
}
setcolorder(..table, c("rn", setdiff(names(..table),
"rn")))
# If row can be converted to numeric, do so
if (..table[, rn] %>% is.character)
if (..table[, rn] %>% type.convert %>% is.numeric)
..table[, rn := as.numeric(rn)]
# Key table by row
setkey(..table, rn)
# Add attributes
setattr(..table, "file_path", file_path)
setattr(..table, "sheet_name", sheet_name)
setattr(..table, "header_row", header_row)
setattr(..table, "column_names", column_names)
setattr(..table, "rownames_name", rownames_name)
# Return the values table
return(..table)
}
setABattr <- function(new_table, A, B) {
# Strip existing attributes in new_table
setattr(new_table, "file_path", NULL)
setattr(new_table, "sheet_name", NULL)
setattr(new_table, "header_row", NULL)
setattr(new_table, "column_name", NULL)
setattr(new_table, "rownames_name", NULL)
setattr(new_table, "value_delimiter", NULL)
setattr(new_table, "rows", NULL)
setattr(new_table, "rows_are_rownames", NULL)
# Set A attributes in new_table
setattr(new_table, "A_file_path", attributes(A)$file_path)
setattr(new_table, "A_sheet_name", attributes(A)$sheet_name)
setattr(new_table, "A_header_row", attributes(A)$header_row)
setattr(new_table, "A_column_name", attributes(A)$column_name)
setattr(new_table, "A_rownames_name", attributes(A)$rownames_name)
setattr(new_table, "A_value_delimiter",
attributes(A)$value_delimiter)
setattr(new_table, "A_rows", attributes(A)$rows)
setattr(new_table, "A_rows_are_rownames",
attributes(A)$rows_are_rownames)
# Set B attributes in new_table
setattr(new_table, "B_file_path", attributes(B)$file_path)
setattr(new_table, "B_sheet_name", attributes(B)$sheet_name)
setattr(new_table, "B_header_row", attributes(B)$header_row)
setattr(new_table, "B_column_name", attributes(B)$column_name)
setattr(new_table, "B_rownames_name", attributes(B)$rownames_name)
setattr(new_table, "B_value_delimiter",
attributes(B)$value_delimiter)
setattr(new_table, "B_rows", attributes(B)$rows)
setattr(new_table, "B_rows_are_rownames",
attributes(B)$rows_are_rownames)
}
tableToLower <- function(X, ...) {
# Copy
x <- copy(X)
# Existing keys
keys <- key(x)
setkey(x, NULL)
# Rename value column
setnames(x, "value", "value_orig")
# Derived value column
Encoding(x$value_orig) <- "UTF-8"
x[, value := tolower(value_orig)]
Encoding(x$value) <- "bytes"
Encoding(x$value_orig) <- "bytes"
# Rekey
setkeyv(x, keys)
# Return
return(x)
}
tableDropLower <- function(X, ...) {
# Copy
x <- copy(X)
# Existing keys
keys <- key(x)
setkey(x, NULL)
# Drop derived value column
x[, value := NULL]
# Rename value_orig column
setnames(x, "value_orig", "value")
# Rekey
setkeyv(x, keys)
# Return
return(x)
}
textrange2vector <- function(x) {
strsplit(x, ",") %>%
lapply(
. %>%
strsplit("-") %>%
lapply(as.numeric) %>%
lapply(function(s)
if (length(s) == 1) s
else seq(s[1], s[2]))) %>%
lapply(unlist)
}
trimCompress <- function(x) {
if (!"magrittr" %in% loadedNamespaces()) # check if magrittr is
loaded
library(magrittr) # load if not
if (is.null(x)) return(NULL)
x %>%
gsub("^\\s+", "", .) %>% # remove leading blanks
gsub("\\s+$", "", .) %>% # remove trailing blanks
gsub("\\s+", " ", .) # compress multiple blanks to
one
}
# Read parameterization file ####
message("Reading parameters...")
## Catalog parameters
avail_params <- read.xlsx(
parameterization_file
, "Available Parameters"
, colNames = FALSE
, startRow = 2
) %>% as.data.table
sheet_params <- c("name", "path", "sheet",
"header", "rn")
setnames(avail_params, 1:5, sheet_params)
avail_params <- avail_params[!is.na(name) &
grepl("[^[:space:]]", name)] %>%
melt(id.vars = 1:5, value.name = "columns")
avail_params <- avail_params[, lapply(.SD, . %>% Filter(Negate(is.na), .)
%>%
list), by = eval(sheet_params)]
avail_params[, variable := NULL]
## Analysis parameters
analysis_params <- read.xlsx(
parameterization_file
, "Parameterization"
, startRow = 2
, colNames = FALSE
) %>% as.data.table
setnames(analysis_params, c(
"name1", "col1", "rows1", "dlm1",
"verb", "case",
"name2", "col2", "rows2", "dlm2",
"outname", "outcols", "outflat"
))
analysis_params <- analysis_params[-1][!is.na(name1) &
grepl("[^[:space:]]", name1)]
analysis_params[, n := 1:.N]
## Combine parameters
setkey(avail_params, name)
setkey(analysis_params, name1)
analysis_params[avail_params, ":="(
path1 = path
,sheet1 = sheet
,header1 = header
,rn1 = rn
), allow.cartesian = TRUE]
setkey(analysis_params, name2)
analysis_params[avail_params, ":="(
path2 = path
,sheet2 = sheet
,header2 = header
,rn2 = rn
), allow.cartesian = TRUE]
setkey(analysis_params, n)
# Match actions to functions
verb_function_map <- list(
"A_i__in__B" = c("In", "Not In"),
"A_i__in__B_i" = c("In (Same Row)", "Not In (Same
Row)"),
"A_i__substr__B_i" = c("Substring Of (Same Row)",
"Not Substring Of (Same Row)"),
"A_i__unique" = c("Is Unique", "Not Unique")
) %>% unlist
names(verb_function_map) %<>% gsub("[0-9]+", "", .)
analysis_params[, fun := factor(verb)]
levels(analysis_params$fun) %<>%
match(verb_function_map) %>%
"["(names(verb_function_map), .)
analysis_params$fun %<>% as.character
# Read data sources
message("Reading data sources...")
data_names <- avail_params[, name]
data_list <- replicate(length(data_names), list(), simplify = FALSE)
names(data_list) <- data_names
for (i in 1:nrow(avail_params))
data_list[[i]] <- with(avail_params[i], readSource(
file_path = path
, sheet_name = sheet
, header_row = header
, column_names = columns[[1]]
, rownames_name = rn
))
# Analysis ####
message("Performing comparisons...")
reports <- analyses <- vector("list", nrow(analysis_params))
names(reports) <- names(analyses) <- analysis_params[, outname]
rowAnalysis2report <- function(analysis, params = list()) {
# Create a copy
x <- copy(analysis)
# Subset to logical_val of logical_col
setnames(x, setdiff(names(x), c("rn", "value")),
"logical_col")
x <- x[logical_col == !grepl("Not", params$verb)]
x[, logical_col := NULL]
# Re-encode
Encoding(x$value) <- "UTF-8"
if (is.character(x$rn))
Encoding(x$rn) <- "UTF-8"
# Unique results only
setkey(x, rn, value)
setcolorder(x, key(x))
x <- unique(x)
# Flatten if desired
if (params$outflat == "Yes") {
dlm <- Rdelim(params$dlm1)
if (!is.null(dlm)) {
if (dlm == "\\n") dlm <- "\n"
x <- x[, list(value = paste(value, collapse = dlm)), by = rn]
}
}
# Retrieve all columns if desired
setkey(x, rn)
if (params$outcols == "Yes") {
full_source <- copy(data_list[[params$name1]])
setkey(full_source, rn)
x <- x[full_source, nomatch = 0, allow.cartesian = TRUE]
}
# Rename results columns
if (is.null(params$rn1)) setnames(x, 1, "Row") else {
if (is.na(params$rn1) | params$rn1 == params$col1) setnames(x, 1,
"Row")
else setnames(x, 1, params$rn1)
}
setnames(x, 2, params$col1)
return(x)
}
## Do it
for (i in 1:nrow(analysis_params)) {
r <- analysis_params[i]
args <- list(
A = extractColumn(data_list[[r$name1]], r$col1, r$dlm1, r$rows1),
B = if (!is.na(r$name2))
extractColumn(data_list[[r$name2]], r$col2, r$dlm2, r$rows2),
case = (r$case == "Yes")
)
analyses[[i]] <- do.call(r$fun, args)
reports[[i]] <- rowAnalysis2report(analyses[[i]], r)
rm(r, args)
}
# Output ####
message("Writing results to output file...")
detach("package:openxlsx")
suppressPackageStartupMessages(library(xlsx))
# Output file
exists <- TRUE
i <- 0
while (exists) {
out_file <- if (i > 0) {
file.path(cd, sprintf("Comparison_Reports_%s_(%s).xlsx",
Sys.Date(), i))
} else file.path(cd, sprintf("Comparison_Reports_%s.xlsx",
Sys.Date()))
exists <- file.exists(out_file)
if (!exists)
file.copy(parameterization_file, out_file)
i <- i + 1
}
# Headers
headers <- analysis_params[, lapply(.SD, as.character), .SDcols = c(
"outname", "col1", "verb", "col2",
"case", "name1", "name2",
"rows1", "rows2", "dlm1", "dlm2")]
headers[, case := factor(case, c("Yes", "No"),
c("(Case Sensitive)", "(Not Case
Sensitive)"))]
headers[!is.na(col2), header_title := paste(col1, verb, col2, case)]
headers[is.na(col2), header_title := paste(col1, verb, case)]
headers[, header_time := Sys.time()]
headers$header_col1 <- headers[, list(col1, name1, rows1, dlm1)] %>%
t %>%
as.data.table %>%
lapply(as.list) %>%
lapply(as.data.table) %>%
lapply(setnames, c("Column", "Source", "Rows",
"Delimiter")) %>%
lapply(as.list)
headers$header_col2 <- headers[, list(col2, name2, rows2, dlm2)] %>%
t %>%
as.data.table %>%
lapply(as.list) %>%
lapply(as.data.table) %>%
lapply(setnames, c("Column", "Source", "Rows",
"Delimiter")) %>%
lapply(as.list)
# Write
keep <- c(ls(), "i", "keep")
## Loop through reports and write
for (i in names(reports)) {
message(i, "...")
# Load workbook
wb <- loadWorkbook(out_file)
# Workbook styles
## Header title
hd <- CellStyle(
wb,
alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
"VERTICAL_TOP"),
font = Font(wb, heightInPoints = 16, isBold = TRUE)
)
## Date
dt <- CellStyle(
wb,
alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
"VERTICAL_TOP"),
dataFormat = DataFormat("m/d/yyyy h:mm:ss;@")
)
## Parameters header
ph <- CellStyle(
wb,
alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
"VERTICAL_TOP"),
font = Font(wb, isItalic = TRUE)
)
## Column names header
cn <- CellStyle(
wb,
alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
"VERTICAL_TOP"),
border = Border(position = c("BOTTOM", "TOP"),
pen = c("BORDER_THIN",
"BORDER_MEDIUM")),
font = Font(wb, isBold = TRUE)
)
## Column names header for reproduced data
cnr <- CellStyle(
wb,
alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
"VERTICAL_TOP"),
border = Border(position = c("BOTTOM", "TOP"),
pen = c("BORDER_THIN",
"BORDER_MEDIUM")),
font = Font(wb, isBold = TRUE, isItalic = TRUE)
)
## Values
vl <- CellStyle(
wb,
alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
"VERTICAL_TOP",
wrapText = TRUE)
)
## Values for reproduced data
vlr <- CellStyle(
wb,
alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
"VERTICAL_TOP",
wrapText = TRUE),
font = Font(wb, isItalic = TRUE)
)
# Create sheet
sh <- createSheet(wb, i)
# Add header rows
h <- headers[outname == i]
addMergedRegion(sh, 1, 1, 1, 10)
addMergedRegion(sh, 2, 2, 1, 10)
rw <- createRow(sh, 1:2)
cl <- createCell(rw, 1)
## Title
addDataFrame(h[, header_title], sh, FALSE, FALSE, 1, 1)
rw <- getRows(sh, 1)
cl <- getCells(rw)
lapply(cl, setCellStyle, hd)
## Date
addDataFrame(h[, header_time], sh, FALSE, FALSE, 2, 1)
rw <- getRows(sh, 2)
cl <- getCells(rw)
lapply(cl, setCellStyle, dt)
## Parameters
addDataFrame(h[, header_col1] %>% as.data.frame, sh, TRUE, FALSE, 4, 1)
if (h[, !is.na(col2)])
addDataFrame(h[, header_col2] %>% as.data.frame, sh, FALSE, FALSE, 6, 1)
rw <- getRows(sh, 4)
cl <- getCells(rw)
lapply(cl, setCellStyle, ph)
rw <- getRows(sh, 5:6)
cl <- getCells(rw)
lapply(cl, setCellStyle, vl)
# Add report
addDataFrame(reports[[i]], sh, TRUE, FALSE, 8, 1)
nc <- ncol(reports[[i]])
## Format column names
rw <- getRows(sh, 8)
cl <- getCells(rw, 1:2)
lapply(cl, setCellStyle, cn)
if (nc > 2) {
cl <- getCells(rw, 3:nc)
lapply(cl, setCellStyle, cnr)
}
## Format values
rw <- getRows(sh, 9:(nrow(reports[[i]]) + 9))
cl <- getCells(rw, 1:2)
lapply(cl, setCellStyle, vl)
if (nc > 2) {
cl <- getCells(rw, 3:nc)
lapply(cl, setCellStyle, vlr)
}
## Add autofilters
if (ncol(reports[[i]]) > 26) {
addAutoFilter(sh, sprintf("A8:%s%s%s",
LETTERS[floor(ncol(reports[[i]]) / 26)],
LETTERS[ncol(reports[[i]]) %% 26],
nrow(reports[[i]]) + 9))
} else {
addAutoFilter(sh, sprintf("A8:%s%s", LETTERS[ncol(reports[[i]])],
nrow(reports[[i]]) + 9))
}
# Autofit columns
autoSizeColumn(sh, 1:ncol(reports[[i]]))
# Create freeze on report column names and results columns
if (nc > 2) createFreezePane(sh, rowSplit = 9, colSplit = 3) else
createFreezePane(sh, rowSplit = 9, colSplit = 1)
# Save
saveWorkbook(wb, out_file)
rm(list = setdiff(ls(), keep))
}
b
[[alternative HTML version deleted]]
Jeff Newmiller
2015-Aug-30 01:35 UTC
[R] Advanced Level Script for Traceability Between Worksheets
Some notes:
1) HTML damages your email on this mailing list (we often see run-on lines and
garbage characters.. definitely not whatever you saw).
2) Massive scripts are off topic... please read the Posting Guide. If you
can't narrow your question to a smaller example then you may really need a
consultant.
3) Periods in this case don't have any special meaning... They just make the
variable look weird.
4) This script makes liberal use of the data.tables package, which has some
advantage in speed and memory efficiency if you are working with large data
sets. The odd indexing used in ..A[..B, in_B := TRUE, allow.cartesian = TRUE] is
a relational join that is discussed in the vignette for the data.table package.
I only use data.tables if I need to optimise memory or execution speed because I
don't find them very intuitive. The fact that this code makes copies so
frequently may indicate that it is not as optimised as it could be. Or perhaps
they are necessary and I just have not looked at it closely enough.
5) A very few uses of %>? and ?<>? are from the magrittr package... I
do find that helpful though it seems like the author here was only just getting
started using it. Again, you would need to read the vignette and/or some
internet tutorials to follow that syntax... I find it much easier than
data.table though it is solving a different problem.
6) I don't consult off list... sorry.
---------------------------------------------------------------------------
Jeff Newmiller The ..... ..... Go Live...
DCN:<jdnewmil at dcn.davis.ca.us> Basics: ##.#. ##.#. Live
Go...
Live: OO#.. Dead: OO#.. Playing
Research Engineer (Solar/Batteries O.O#. #.O#. with
/Software/Embedded Controllers) .OO#. .OO#. rocks...1k
---------------------------------------------------------------------------
Sent from my phone. Please excuse my brevity.
On August 29, 2015 1:19:35 PM PDT, C Campbell <cc571309 at gmail.com>
wrote:>Hi folks - I have almost know R skills yet and have been put 'in
>charge' of
>the below script created by a former employee. Although some of this
>is
>understandable to me, much of it is not. If anyone can help with
>explaining sections, commenting on the skill level it takes to
>understand
>this level of scripting in R, and/or point me to some resources that
>may
>cover some of this (e.g., what is ..A[..B, in_B := TRUE,
>allow.cartesian >TRUE]; and specifically what do the 2 dots mean?), I
would very much
>appreciate it. Would also be interested in communicating offline if
>you
>prefer.
>Thank you,
>Jay
>
>
>
># Locate file ####
>parameterization_file <- file.choose()
>cd <- dirname(parameterization_file)
>
># Front matter ####
>message("Installing and loading packages...")
>
># Packages
>required_packages <- c("openxlsx", "xlsx",
"magrittr", "data.table",
>"reshape2",
> "XML")
>install_these <- setdiff(required_packages,
>rownames(installed.packages()))
>
>while (length(install_these) > 0) {
> install.packages(install_these, repos =
"http://cran.rstudio.com")
> install_these <- setdiff(required_packages,
>rownames(installed.packages()))
>}
>
>suppressPackageStartupMessages(library(openxlsx))
>suppressPackageStartupMessages(library(magrittr))
>suppressPackageStartupMessages(library(data.table))
>suppressPackageStartupMessages(library(reshape2))
>suppressPackageStartupMessages(library(XML))
>
>
># Options
>options(stringsAsFactors = FALSE)
>
># Functions
>message("Loading functions...")
>
>A__in__B <- function(A, B, case = TRUE, ...) {
>
> # Copies
> ..A <- copy(A)
> ..B <- copy(B)
> setkey(..A, value)
> setkey(..B, value)
> ..A <- unique(..A)
> ..B <- unique(..B)
>
> # Rownames are unnecessary
> ..A[, rn := NULL]
> ..B[, rn := NULL]
>
> # Case sensitivity
> if (!case) {
> ..A <- tableToLower(..A)
> ..B <- tableToLower(..B)
> }
>
> # Check if A is in B
> ..A[..B, in_B := TRUE, allow.cartesian = TRUE]
> if ("in_B" %in% names(..A))
> ..A[is.na(in_B), in_B := FALSE]
> else
> ..A[, in_B := FALSE]
>
> # Case sensitivity
> if (!case)
> ..A <- tableDropLower(..A)
>
> # Set attributes
> setABattr(..A, A, B)
>
> # Return results
> setkey(..A, value)
> return(..A)
>
>}
>
>A__unique <- function(A, case = TRUE, ...) {
>
> # Copies
> ..A <- copy(A)
> setkey(..A, value)
>
> # Case sensitivity
> if (!case)
> ..A <- tableToLower(..A)
>
> # Check if A_i values are unique
> ..A[..A[duplicated(..A), SJ(value)], is_unique := FALSE,
> allow.cartesian = TRUE]
> if ("is_unique" %in% names(..A))
> ..A[is.na(is_unique), is_unique := TRUE]
> else
> ..A[, is_unique := TRUE]
>
> # Case sensitivity
> if (!case)
> ..A <- tableDropLower(..A)
>
> # Roll up to value level
> ..A <- ..A[, list(is_unique = all(is_unique)), keyby = value]
>
> # Return results
> return(..A)
>
>}
>
>A_i__in__B <- function(A, B, case = TRUE, ...) {
>
> # Copies
> ..A <- copy(A)
> setkey(..A, value, rn)
> ..A <- unique(..A)
> ..B <- copy(B)
> setkey(..B, value)
> ..B %>% unique
>
> # B rownames are unnecessary
> ..B[, rn := NULL]
>
> # Case sensitivity
> if (!case) {
> ..A <- tableToLower(..A)
> ..B <- tableToLower(..B)
> }
>
> # Check if A is in B
> if ("in_B" %in% names(..A))
> ..A[is.na(in_B), in_B := FALSE]
> else
> ..A[, in_B := FALSE]
>
> # Case sensitivity
> if (!case)
> ..A <- tableDropLower(..A)
>
> # Set attributes
> setABattr(..A, A, B)
>
> # Return results
> setkey(..A, value, rn)
> return(..A)
>
>}
>
>A_i__in__B_i <- function(A, B, case = TRUE, ...) {
>
> # Copies
> ..A <- copy(A)
> setkey(..A, value, rn)
> ..A <- unique(..A)
> ..B <- copy(B)
> setkey(..B, value, rn)
> ..B <- unique(..B)
>
> # Case sensitivity
> if (!case) {
> ..A <- tableToLower(..A)
> ..B <- tableToLower(..B)
> }
>
> # Check if A_i terms are in B_i terms
> ..A[..B, in_B := TRUE, allow.cartesian = TRUE]
> if ("in_B" %in% names(..A))
> ..A[is.na(in_B), in_B := FALSE]
> else
> ..A[, in_B := FALSE]
>
> # Case sensitivity
> if (!case)
> ..A <- tableDropLower(..A)
>
> # Set attributes
> setABattr(..A, A, B)
>
> # Return results
> setkey(..A, value, rn)
> return(..A)
>
>}
>
>A_i__substr__B_i <- function(A, B, case = TRUE, ...) {
>
> # Copies
> ..A <- copy(A)
> setkey(..A, rn)
> ..B <- copy(B)
> setkey(..B, rn)
>
> # Renames
> setnames(..A, "value", "A_value")
> setnames(..B, "value", "B_value")
>
> # Merge
> ..X <- ..B[..A, allow.cartesian = TRUE]
>
> # Check if A_i values are substrings of B_i values
> ..X[is.na(B_value), is_substring := FALSE]
> Encoding(..X$A_value) <- "UTF-8"
> Encoding(..X$B_value) <- "UTF-8"
> if (case) {
> ..X[!is.na(B_value), is_substring := mapply(
> grepl, A_value, B_value, fixed = TRUE)]
> } else {
> ..X[!is.na(B_value), is_substring := mapply(
> grepl, tolower(A_value), tolower(B_value), fixed = TRUE)]
> }
> Encoding(..X$A_value) <- "bytes"
> Encoding(..X$B_value) <- "bytes"
>
> # Rename/reorder
> ..X <- ..X[, list(value = A_value, rn, is_substring)]
>
> # Set attributes
> setABattr(..X, A, B)
>
> # Return results
> setkey(..X, value, rn)
> return(..X)
>
>}
>
>A_i__unique <- function(A, case = TRUE, ...) {
>
> # Copies
> ..A <- copy(A)
> setkey(..A, value)
>
> # Case sensitivity
> if (!case)
> ..A <- tableToLower(..A)
>
> # Check if A_i values are unique
> ..A[..A[duplicated(..A), SJ(value)], is_unique := FALSE,
> allow.cartesian = TRUE]
> if ("is_unique" %in% names(..A))
> ..A[is.na(is_unique), is_unique := TRUE]
> else
> ..A[, is_unique := TRUE]
>
> # Case sensitivity
> if (!case)
> ..A <- tableDropLower(..A)
>
> # Return results
> setkey(..A, value, rn)
> return(..A)
>
>}
>
>extractColumn <- function(x, column_name, value_delimiter = NULL, rows
>>NULL)
>{
>
> # Validate formatting on column name args
> column_name %<>% trimCompress
>
> # Multiple columns?
> mult_cols <- grepl(",", column_name)
> if (mult_cols)
> column_name %<>% strsplit(",") %>% unlist %>%
trimCompress
>
> # Get column + rn
> ..table <- x[, c("rn", column_name), with = FALSE]
> setnames(..table, 2, "value")
>
> # Long if multiple
> if (mult_cols) {
> ..table %<>% melt(1)
> ..table[, variable := NULL]
> }
>
> # Key table by rowname
> setkey(..table, rn)
>
> # If rows was provided, subset
> if (!is.null(rows))
> if (rows != "All")
> ..table <- ..table[textrange2vector(rows) %>% SJ]
>
> # Split values according to delimiter...
> dlm <- Rdelim(value_delimiter)
> if (!is.null(dlm))
> ..values <- strsplit(..table[, value], Rdelim(value_delimiter))
%>%
> lapply(trimCompress)
> # ... or convert to list if no delimiter
> else
> ..values <- ..table[, value] %>% trimCompress %>% as.list
>
> # Set list name values to rowname values
> names(..values) <- ..table[, rn]
>
> # Convert from list to table
> ..values %<>% melt %>% as.data.table
> setnames(..values, 2, "rn")
>
> # Remove any instances of blank values
> ..values <- ..values[!is.na(value) &
grepl("[^[:space:]]", value)]
>
> # Encode all text to bytes
> # Will need to encode to UTF-8 before output to make it readable
> Encoding(..values$value) <- "bytes"
> if (is.character(..values$rn)) Encoding(..values$rn) <-
"bytes"
>
> # If row names can be converted to numeric, do so
> if (..values[, rn] %>% is.character)
> if (..values[, rn] %>% type.convert %>% is.numeric)
> ..values[, rn := as.numeric(rn)]
>
> # Key table by value
> setkey(..values, value, rn)
>
> # Add attributes
> setattr(..values, "file_path", attr(x, "file_path"))
> setattr(..values, "sheet_name", attr(x, "sheet_name"))
> setattr(..values, "header_row", attr(x, "header_row"))
> setattr(..values, "column_name", column_name)
> setattr(..values, "rownames_name", attr(x,
"rownames_name"))
> setattr(..values, "value_delimiter", value_delimiter)
> setattr(..values, "rows", rows)
>
> # Return the values table
> return(..values)
>
>}
>
>fillNAlast <- function(x) {
> na <- is.na(x)
> miss <- which(na)
> nonmiss <- which(!na)
> map <- outer(nonmiss, miss, "<") %>%
> apply(2, . %>% which %>% max)
> x[miss] <- x[nonmiss[map]]
> return(x)
>}
>
>getSheetIndex <- function(file_path, sheet_name) {
>
># Extract workbook.xml to temporary file that will be deleted at end of
> # run
> xmlDir <- file.path(tempdir(), "findSheet")
>workbook <- unzip(file_path, files = "xl/workbook.xml", exdir =
xmlDir)
> on.exit(unlink(xmlDir, recursive = TRUE), add = TRUE)
>
> # Read workbook.xml and get sheet nodes
> workbook <- readLines(workbook, warn = FALSE, encoding =
"UTF-8") %>%
> unlist
> sheets <- gregexpr("<sheet .*/sheets>", workbook, perl
= TRUE) %>%
> regmatches(workbook, .) %>%
> unlist
>
> # Extract sheet names from nodes, parse as html, and return text
>values
> sheetNames <- gregexpr('(?<=name=")[^"]+', sheets,
perl = TRUE) %>%
> regmatches(sheets, .) %>%
> unlist %>%
> lapply(htmlParse, asText = TRUE) %>%
> sapply(. %>% xpathApply("//body//text()", xmlValue) %>%
unlist)
>
> # Which sheet name is equal to the sheet_name argument?
> which(sheetNames == sheet_name)
>
>}
>
>Rdelim <- function(x, ...) {
> if (!is.null(x)) {
> if (!is.na(x) & x != "None") {
> if (x == "Newline") "\\n" else x
> } else NULL
> } else NULL
>}
>
>readSource <- function(file_path, sheet_name, header_row, column_names,
> rownames_name = NULL)
>{
>
> # Validate formatting on column name args
> column_names %<>% strsplit(",") %>% unlist %>%
trimCompress
> rownames_name %<>% trimCompress
>
> # Sheet index
> sheet_index <- getSheetIndex(file_path, sheet_name)
>
> # Read column names according to header row
> ..names <- read.xlsx(
> xlsxFile = file_path
> , sheet = sheet_index
> , colNames = FALSE
> , rows = header_row
> ) %>% unlist %>% unname %>% trimCompress
>
> # Read in column plus and any rownames column
> ..table <- read.xlsx(
> xlsxFile = file_path
> , sheet = sheet_index
> , startRow = header_row
> , cols = which(..names %in% c(rownames_name, column_names))
> , skipEmptyRows = FALSE
> , detectDates = TRUE
> ) %>% as.data.table
>
> # Set names
> setnames(..table,
> ..names[which(..names %in% c(rownames_name, column_names))])
>
> # Rownames
> ## If no rownames column, use row number
> if (is.null(rownames_name)) {
>if (is.null(rows)) ..table[, rn := 1:.N + 1L] else ..table[, rn :>rows]
> } else { # Otherwise, just copy the column
> ..table[, rn := lapply(.SD, identity), .SDcols = rownames_name]
> }
> setcolorder(..table, c("rn", setdiff(names(..table),
"rn")))
>
> # If row can be converted to numeric, do so
> if (..table[, rn] %>% is.character)
> if (..table[, rn] %>% type.convert %>% is.numeric)
> ..table[, rn := as.numeric(rn)]
>
> # Key table by row
> setkey(..table, rn)
>
> # Add attributes
> setattr(..table, "file_path", file_path)
> setattr(..table, "sheet_name", sheet_name)
> setattr(..table, "header_row", header_row)
> setattr(..table, "column_names", column_names)
> setattr(..table, "rownames_name", rownames_name)
>
> # Return the values table
> return(..table)
>
>}
>
>setABattr <- function(new_table, A, B) {
>
> # Strip existing attributes in new_table
> setattr(new_table, "file_path", NULL)
> setattr(new_table, "sheet_name", NULL)
> setattr(new_table, "header_row", NULL)
> setattr(new_table, "column_name", NULL)
> setattr(new_table, "rownames_name", NULL)
> setattr(new_table, "value_delimiter", NULL)
> setattr(new_table, "rows", NULL)
> setattr(new_table, "rows_are_rownames", NULL)
>
> # Set A attributes in new_table
> setattr(new_table, "A_file_path", attributes(A)$file_path)
> setattr(new_table, "A_sheet_name", attributes(A)$sheet_name)
> setattr(new_table, "A_header_row", attributes(A)$header_row)
> setattr(new_table, "A_column_name", attributes(A)$column_name)
> setattr(new_table, "A_rownames_name",
attributes(A)$rownames_name)
> setattr(new_table, "A_value_delimiter",
attributes(A)$value_delimiter)
> setattr(new_table, "A_rows", attributes(A)$rows)
>setattr(new_table, "A_rows_are_rownames",
>attributes(A)$rows_are_rownames)
>
> # Set B attributes in new_table
> setattr(new_table, "B_file_path", attributes(B)$file_path)
> setattr(new_table, "B_sheet_name", attributes(B)$sheet_name)
> setattr(new_table, "B_header_row", attributes(B)$header_row)
> setattr(new_table, "B_column_name", attributes(B)$column_name)
> setattr(new_table, "B_rownames_name",
attributes(B)$rownames_name)
> setattr(new_table, "B_value_delimiter",
attributes(B)$value_delimiter)
> setattr(new_table, "B_rows", attributes(B)$rows)
>setattr(new_table, "B_rows_are_rownames",
>attributes(B)$rows_are_rownames)
>
>}
>
>tableToLower <- function(X, ...) {
>
> # Copy
> x <- copy(X)
>
> # Existing keys
> keys <- key(x)
> setkey(x, NULL)
>
> # Rename value column
> setnames(x, "value", "value_orig")
>
> # Derived value column
> Encoding(x$value_orig) <- "UTF-8"
> x[, value := tolower(value_orig)]
> Encoding(x$value) <- "bytes"
> Encoding(x$value_orig) <- "bytes"
>
> # Rekey
> setkeyv(x, keys)
>
> # Return
> return(x)
>
>}
>
>tableDropLower <- function(X, ...) {
>
> # Copy
> x <- copy(X)
>
> # Existing keys
> keys <- key(x)
> setkey(x, NULL)
>
> # Drop derived value column
> x[, value := NULL]
>
> # Rename value_orig column
> setnames(x, "value_orig", "value")
>
> # Rekey
> setkeyv(x, keys)
>
> # Return
> return(x)
>
>}
>
>textrange2vector <- function(x) {
> strsplit(x, ",") %>%
> lapply(
> . %>%
> strsplit("-") %>%
> lapply(as.numeric) %>%
> lapply(function(s)
> if (length(s) == 1) s
> else seq(s[1], s[2]))) %>%
> lapply(unlist)
>}
>
>trimCompress <- function(x) {
>
> if (!"magrittr" %in% loadedNamespaces()) # check if magrittr is
loaded
> library(magrittr) # load if not
>
> if (is.null(x)) return(NULL)
>
> x %>%
> gsub("^\\s+", "", .) %>% # remove leading blanks
> gsub("\\s+$", "", .) %>% # remove trailing blanks
> gsub("\\s+", " ", .) # compress multiple blanks
to one
>
>}
>
>
>
>
>
>
>
># Read parameterization file ####
>
>message("Reading parameters...")
>
>## Catalog parameters
>avail_params <- read.xlsx(
> parameterization_file
> , "Available Parameters"
> , colNames = FALSE
> , startRow = 2
>) %>% as.data.table
>sheet_params <- c("name", "path", "sheet",
"header", "rn")
>setnames(avail_params, 1:5, sheet_params)
>avail_params <- avail_params[!is.na(name) &
grepl("[^[:space:]]",
>name)] %>%
> melt(id.vars = 1:5, value.name = "columns")
>avail_params <- avail_params[, lapply(.SD, . %>% Filter(Negate(is.na),
>.)
>%>%
> list), by = eval(sheet_params)]
>avail_params[, variable := NULL]
>
>## Analysis parameters
>analysis_params <- read.xlsx(
> parameterization_file
> , "Parameterization"
> , startRow = 2
> , colNames = FALSE
>) %>% as.data.table
>setnames(analysis_params, c(
> "name1", "col1", "rows1", "dlm1",
> "verb", "case",
> "name2", "col2", "rows2", "dlm2",
> "outname", "outcols", "outflat"
>))
>analysis_params <- analysis_params[-1][!is.na(name1) &
> grepl("[^[:space:]]",
name1)]
>analysis_params[, n := 1:.N]
>
>## Combine parameters
>setkey(avail_params, name)
>setkey(analysis_params, name1)
>analysis_params[avail_params, ":="(
> path1 = path
> ,sheet1 = sheet
> ,header1 = header
> ,rn1 = rn
>), allow.cartesian = TRUE]
>setkey(analysis_params, name2)
>analysis_params[avail_params, ":="(
> path2 = path
> ,sheet2 = sheet
> ,header2 = header
> ,rn2 = rn
>), allow.cartesian = TRUE]
>setkey(analysis_params, n)
>
>
># Match actions to functions
>verb_function_map <- list(
> "A_i__in__B" = c("In", "Not In"),
> "A_i__in__B_i" = c("In (Same Row)", "Not In (Same
Row)"),
> "A_i__substr__B_i" = c("Substring Of (Same Row)",
> "Not Substring Of (Same Row)"),
> "A_i__unique" = c("Is Unique", "Not Unique")
>) %>% unlist
>names(verb_function_map) %<>% gsub("[0-9]+", "",
.)
>analysis_params[, fun := factor(verb)]
>levels(analysis_params$fun) %<>%
> match(verb_function_map) %>%
> "["(names(verb_function_map), .)
>analysis_params$fun %<>% as.character
>
>
>
># Read data sources
>
>message("Reading data sources...")
>
>data_names <- avail_params[, name]
>data_list <- replicate(length(data_names), list(), simplify = FALSE)
>names(data_list) <- data_names
>for (i in 1:nrow(avail_params))
> data_list[[i]] <- with(avail_params[i], readSource(
> file_path = path
> , sheet_name = sheet
> , header_row = header
> , column_names = columns[[1]]
> , rownames_name = rn
> ))
>
>
>
>
># Analysis ####
>
>message("Performing comparisons...")
>
>reports <- analyses <- vector("list", nrow(analysis_params))
>names(reports) <- names(analyses) <- analysis_params[, outname]
>
>rowAnalysis2report <- function(analysis, params = list()) {
>
> # Create a copy
> x <- copy(analysis)
>
> # Subset to logical_val of logical_col
> setnames(x, setdiff(names(x), c("rn", "value")),
"logical_col")
> x <- x[logical_col == !grepl("Not", params$verb)]
> x[, logical_col := NULL]
>
> # Re-encode
> Encoding(x$value) <- "UTF-8"
> if (is.character(x$rn))
> Encoding(x$rn) <- "UTF-8"
>
> # Unique results only
> setkey(x, rn, value)
> setcolorder(x, key(x))
> x <- unique(x)
>
> # Flatten if desired
> if (params$outflat == "Yes") {
> dlm <- Rdelim(params$dlm1)
> if (!is.null(dlm)) {
> if (dlm == "\\n") dlm <- "\n"
> x <- x[, list(value = paste(value, collapse = dlm)), by = rn]
> }
> }
>
> # Retrieve all columns if desired
> setkey(x, rn)
> if (params$outcols == "Yes") {
> full_source <- copy(data_list[[params$name1]])
> setkey(full_source, rn)
> x <- x[full_source, nomatch = 0, allow.cartesian = TRUE]
> }
>
> # Rename results columns
> if (is.null(params$rn1)) setnames(x, 1, "Row") else {
> if (is.na(params$rn1) | params$rn1 == params$col1) setnames(x, 1,
>"Row")
> else setnames(x, 1, params$rn1)
> }
> setnames(x, 2, params$col1)
>
> return(x)
>
>}
>
>## Do it
>for (i in 1:nrow(analysis_params)) {
> r <- analysis_params[i]
> args <- list(
> A = extractColumn(data_list[[r$name1]], r$col1, r$dlm1, r$rows1),
> B = if (!is.na(r$name2))
> extractColumn(data_list[[r$name2]], r$col2, r$dlm2, r$rows2),
> case = (r$case == "Yes")
> )
> analyses[[i]] <- do.call(r$fun, args)
> reports[[i]] <- rowAnalysis2report(analyses[[i]], r)
> rm(r, args)
>}
>
>
>
>
>
>
># Output ####
>
>message("Writing results to output file...")
>
>detach("package:openxlsx")
>suppressPackageStartupMessages(library(xlsx))
>
># Output file
>exists <- TRUE
>i <- 0
>while (exists) {
> out_file <- if (i > 0) {
>file.path(cd, sprintf("Comparison_Reports_%s_(%s).xlsx",
Sys.Date(),
>i))
>} else file.path(cd, sprintf("Comparison_Reports_%s.xlsx",
Sys.Date()))
> exists <- file.exists(out_file)
> if (!exists)
> file.copy(parameterization_file, out_file)
> i <- i + 1
>}
>
># Headers
>headers <- analysis_params[, lapply(.SD, as.character), .SDcols = c(
> "outname", "col1", "verb", "col2",
"case", "name1", "name2",
> "rows1", "rows2", "dlm1", "dlm2")]
>headers[, case := factor(case, c("Yes", "No"),
> c("(Case Sensitive)", "(Not Case
Sensitive)"))]
>headers[!is.na(col2), header_title := paste(col1, verb, col2, case)]
>headers[is.na(col2), header_title := paste(col1, verb, case)]
>headers[, header_time := Sys.time()]
>headers$header_col1 <- headers[, list(col1, name1, rows1, dlm1)] %>%
> t %>%
> as.data.table %>%
> lapply(as.list) %>%
> lapply(as.data.table) %>%
> lapply(setnames, c("Column", "Source",
"Rows", "Delimiter")) %>%
> lapply(as.list)
>headers$header_col2 <- headers[, list(col2, name2, rows2, dlm2)] %>%
> t %>%
> as.data.table %>%
> lapply(as.list) %>%
> lapply(as.data.table) %>%
> lapply(setnames, c("Column", "Source",
"Rows", "Delimiter")) %>%
> lapply(as.list)
>
>
># Write
>keep <- c(ls(), "i", "keep")
>
>## Loop through reports and write
>for (i in names(reports)) {
>
> message(i, "...")
>
> # Load workbook
> wb <- loadWorkbook(out_file)
>
> # Workbook styles
>
> ## Header title
> hd <- CellStyle(
> wb,
> alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
>"VERTICAL_TOP"),
> font = Font(wb, heightInPoints = 16, isBold = TRUE)
> )
>
> ## Date
> dt <- CellStyle(
> wb,
> alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
>"VERTICAL_TOP"),
> dataFormat = DataFormat("m/d/yyyy h:mm:ss;@")
> )
>
> ## Parameters header
> ph <- CellStyle(
> wb,
> alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
>"VERTICAL_TOP"),
> font = Font(wb, isItalic = TRUE)
> )
>
> ## Column names header
> cn <- CellStyle(
> wb,
> alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
>"VERTICAL_TOP"),
> border = Border(position = c("BOTTOM", "TOP"),
> pen = c("BORDER_THIN",
"BORDER_MEDIUM")),
> font = Font(wb, isBold = TRUE)
> )
>
> ## Column names header for reproduced data
> cnr <- CellStyle(
> wb,
> alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
>"VERTICAL_TOP"),
> border = Border(position = c("BOTTOM", "TOP"),
> pen = c("BORDER_THIN",
"BORDER_MEDIUM")),
> font = Font(wb, isBold = TRUE, isItalic = TRUE)
> )
>
> ## Values
> vl <- CellStyle(
> wb,
> alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
>"VERTICAL_TOP",
> wrapText = TRUE)
> )
>
> ## Values for reproduced data
> vlr <- CellStyle(
> wb,
> alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
>"VERTICAL_TOP",
> wrapText = TRUE),
> font = Font(wb, isItalic = TRUE)
> )
>
>
> # Create sheet
> sh <- createSheet(wb, i)
>
> # Add header rows
> h <- headers[outname == i]
> addMergedRegion(sh, 1, 1, 1, 10)
> addMergedRegion(sh, 2, 2, 1, 10)
> rw <- createRow(sh, 1:2)
> cl <- createCell(rw, 1)
>
> ## Title
> addDataFrame(h[, header_title], sh, FALSE, FALSE, 1, 1)
> rw <- getRows(sh, 1)
> cl <- getCells(rw)
> lapply(cl, setCellStyle, hd)
>
> ## Date
> addDataFrame(h[, header_time], sh, FALSE, FALSE, 2, 1)
> rw <- getRows(sh, 2)
> cl <- getCells(rw)
> lapply(cl, setCellStyle, dt)
>
> ## Parameters
>addDataFrame(h[, header_col1] %>% as.data.frame, sh, TRUE, FALSE, 4, 1)
> if (h[, !is.na(col2)])
>addDataFrame(h[, header_col2] %>% as.data.frame, sh, FALSE, FALSE, 6,
>1)
> rw <- getRows(sh, 4)
> cl <- getCells(rw)
> lapply(cl, setCellStyle, ph)
> rw <- getRows(sh, 5:6)
> cl <- getCells(rw)
> lapply(cl, setCellStyle, vl)
>
> # Add report
> addDataFrame(reports[[i]], sh, TRUE, FALSE, 8, 1)
> nc <- ncol(reports[[i]])
>
> ## Format column names
> rw <- getRows(sh, 8)
> cl <- getCells(rw, 1:2)
> lapply(cl, setCellStyle, cn)
> if (nc > 2) {
> cl <- getCells(rw, 3:nc)
> lapply(cl, setCellStyle, cnr)
> }
>
> ## Format values
> rw <- getRows(sh, 9:(nrow(reports[[i]]) + 9))
> cl <- getCells(rw, 1:2)
> lapply(cl, setCellStyle, vl)
> if (nc > 2) {
> cl <- getCells(rw, 3:nc)
> lapply(cl, setCellStyle, vlr)
> }
>
> ## Add autofilters
> if (ncol(reports[[i]]) > 26) {
> addAutoFilter(sh, sprintf("A8:%s%s%s",
> LETTERS[floor(ncol(reports[[i]]) / 26)],
> LETTERS[ncol(reports[[i]]) %% 26],
> nrow(reports[[i]]) + 9))
> } else {
> addAutoFilter(sh, sprintf("A8:%s%s",
LETTERS[ncol(reports[[i]])],
> nrow(reports[[i]]) + 9))
> }
>
> # Autofit columns
> autoSizeColumn(sh, 1:ncol(reports[[i]]))
>
> # Create freeze on report column names and results columns
> if (nc > 2) createFreezePane(sh, rowSplit = 9, colSplit = 3) else
> createFreezePane(sh, rowSplit = 9, colSplit = 1)
>
> # Save
> saveWorkbook(wb, out_file)
> rm(list = setdiff(ls(), keep))
>
>}
>b
>
> [[alternative HTML version deleted]]
>
>______________________________________________
>R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>https://stat.ethz.ch/mailman/listinfo/r-help
>PLEASE do read the posting guide
>http://www.R-project.org/posting-guide.html
>and provide commented, minimal, self-contained, reproducible code.