thr3ads.net - R help - [R] Advanced Level Script for Traceability Between Worksheets [Aug 2015]

If this information is useful, please help other people find it:
Share via:

C Campbell

2015-Aug-29 20:19 UTC

[R] Advanced Level Script for Traceability Between Worksheets

Hi folks - I have almost know R skills yet and have been put 'in charge'
of
the below script created by a former employee.  Although some of this is
understandable to me, much of it is not.  If anyone can help with
explaining sections, commenting on the skill level it takes to understand
this level of scripting in R, and/or point me to some resources that may
cover some of this (e.g., what is ..A[..B, in_B := TRUE, allow.cartesian TRUE];
and specifically what do the 2 dots mean?), I would very much
appreciate it.  Would also be interested in communicating offline if you
prefer.
Thank you,
Jay



# Locate file ####
parameterization_file <- file.choose()
cd <- dirname(parameterization_file)

# Front matter ####
message("Installing and loading packages...")

# Packages
required_packages <- c("openxlsx", "xlsx",
"magrittr", "data.table",
"reshape2",
                       "XML")
install_these <- setdiff(required_packages, rownames(installed.packages()))

while (length(install_these) > 0) {
  install.packages(install_these, repos = "http://cran.rstudio.com")
  install_these <- setdiff(required_packages,
rownames(installed.packages()))
}

suppressPackageStartupMessages(library(openxlsx))
suppressPackageStartupMessages(library(magrittr))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(XML))


# Options
options(stringsAsFactors = FALSE)

# Functions
message("Loading functions...")

A__in__B <- function(A, B, case = TRUE, ...) {

  # Copies
  ..A <- copy(A)
  ..B <- copy(B)
  setkey(..A, value)
  setkey(..B, value)
  ..A <- unique(..A)
  ..B <- unique(..B)

  # Rownames are unnecessary
  ..A[, rn := NULL]
  ..B[, rn := NULL]

  # Case sensitivity
  if (!case) {
    ..A <- tableToLower(..A)
    ..B <- tableToLower(..B)
  }

  # Check if A is in B
  ..A[..B, in_B := TRUE, allow.cartesian = TRUE]
  if ("in_B" %in% names(..A))
    ..A[is.na(in_B), in_B := FALSE]
  else
    ..A[, in_B := FALSE]

  # Case sensitivity
  if (!case)
    ..A <- tableDropLower(..A)

  # Set attributes
  setABattr(..A, A, B)

  # Return results
  setkey(..A, value)
  return(..A)

}

A__unique <- function(A, case = TRUE, ...) {

  # Copies
  ..A <- copy(A)
  setkey(..A, value)

  # Case sensitivity
  if (!case)
    ..A <- tableToLower(..A)

  # Check if A_i values are unique
  ..A[..A[duplicated(..A), SJ(value)], is_unique := FALSE,
      allow.cartesian = TRUE]
  if ("is_unique" %in% names(..A))
    ..A[is.na(is_unique), is_unique := TRUE]
  else
    ..A[, is_unique := TRUE]

  # Case sensitivity
  if (!case)
    ..A <- tableDropLower(..A)

  # Roll up to value level
  ..A <- ..A[, list(is_unique = all(is_unique)), keyby = value]

  # Return results
  return(..A)

}

A_i__in__B <- function(A, B, case = TRUE, ...) {

  # Copies
  ..A <- copy(A)
  setkey(..A, value, rn)
  ..A <- unique(..A)
  ..B <- copy(B)
  setkey(..B, value)
  ..B %>% unique

  # B rownames are unnecessary
  ..B[, rn := NULL]

  # Case sensitivity
  if (!case) {
    ..A <- tableToLower(..A)
    ..B <- tableToLower(..B)
  }

  # Check if A is in B
  if ("in_B" %in% names(..A))
    ..A[is.na(in_B), in_B := FALSE]
  else
    ..A[, in_B := FALSE]

  # Case sensitivity
  if (!case)
    ..A <- tableDropLower(..A)

  # Set attributes
  setABattr(..A, A, B)

  # Return results
  setkey(..A, value, rn)
  return(..A)

}

A_i__in__B_i <- function(A, B, case = TRUE, ...) {

  # Copies
  ..A <- copy(A)
  setkey(..A, value, rn)
  ..A <- unique(..A)
  ..B <- copy(B)
  setkey(..B, value, rn)
  ..B <- unique(..B)

  # Case sensitivity
  if (!case) {
    ..A <- tableToLower(..A)
    ..B <- tableToLower(..B)
  }

  # Check if A_i terms are in B_i terms
  ..A[..B, in_B := TRUE, allow.cartesian = TRUE]
  if ("in_B" %in% names(..A))
    ..A[is.na(in_B), in_B := FALSE]
  else
    ..A[, in_B := FALSE]

  # Case sensitivity
  if (!case)
    ..A <- tableDropLower(..A)

  # Set attributes
  setABattr(..A, A, B)

  # Return results
  setkey(..A, value, rn)
  return(..A)

}

A_i__substr__B_i <- function(A, B, case = TRUE, ...) {

  # Copies
  ..A <- copy(A)
  setkey(..A, rn)
  ..B <- copy(B)
  setkey(..B, rn)

  # Renames
  setnames(..A, "value", "A_value")
  setnames(..B, "value", "B_value")

  # Merge
  ..X <- ..B[..A, allow.cartesian = TRUE]

  # Check if A_i values are substrings of B_i values
  ..X[is.na(B_value), is_substring := FALSE]
  Encoding(..X$A_value) <- "UTF-8"
  Encoding(..X$B_value) <- "UTF-8"
  if (case) {
    ..X[!is.na(B_value), is_substring := mapply(
      grepl, A_value, B_value, fixed = TRUE)]
  } else {
    ..X[!is.na(B_value), is_substring := mapply(
      grepl, tolower(A_value), tolower(B_value), fixed = TRUE)]
  }
  Encoding(..X$A_value) <- "bytes"
  Encoding(..X$B_value) <- "bytes"

  # Rename/reorder
  ..X <- ..X[, list(value = A_value, rn, is_substring)]

  # Set attributes
  setABattr(..X, A, B)

  # Return results
  setkey(..X, value, rn)
  return(..X)

}

A_i__unique <- function(A, case = TRUE, ...) {

  # Copies
  ..A <- copy(A)
  setkey(..A, value)

  # Case sensitivity
  if (!case)
    ..A <- tableToLower(..A)

  # Check if A_i values are unique
  ..A[..A[duplicated(..A), SJ(value)], is_unique := FALSE,
      allow.cartesian = TRUE]
  if ("is_unique" %in% names(..A))
    ..A[is.na(is_unique), is_unique := TRUE]
  else
    ..A[, is_unique := TRUE]

  # Case sensitivity
  if (!case)
    ..A <- tableDropLower(..A)

  # Return results
  setkey(..A, value, rn)
  return(..A)

}

extractColumn <- function(x, column_name, value_delimiter = NULL, rows NULL)
{

  # Validate formatting on column name args
  column_name %<>% trimCompress

  # Multiple columns?
  mult_cols <- grepl(",", column_name)
  if (mult_cols)
    column_name %<>% strsplit(",") %>% unlist %>%
trimCompress

  # Get column + rn
  ..table <- x[, c("rn", column_name), with = FALSE]
  setnames(..table, 2, "value")

  # Long if multiple
  if (mult_cols) {
    ..table %<>% melt(1)
    ..table[, variable := NULL]
  }

  # Key table by rowname
  setkey(..table, rn)

  # If rows was provided, subset
  if (!is.null(rows))
    if (rows != "All")
      ..table <- ..table[textrange2vector(rows) %>% SJ]

  # Split values according to delimiter...
  dlm <- Rdelim(value_delimiter)
  if (!is.null(dlm))
    ..values <- strsplit(..table[, value], Rdelim(value_delimiter)) %>%
    lapply(trimCompress)
  # ... or convert to list if no delimiter
  else
    ..values <- ..table[, value] %>% trimCompress %>% as.list

  # Set list name values to rowname values
  names(..values) <- ..table[, rn]

  # Convert from list to table
  ..values %<>% melt %>% as.data.table
  setnames(..values, 2, "rn")

  # Remove any instances of blank values
  ..values <- ..values[!is.na(value) & grepl("[^[:space:]]",
value)]

  # Encode all text to bytes
  # Will need to encode to UTF-8 before output to make it readable
  Encoding(..values$value) <- "bytes"
  if (is.character(..values$rn)) Encoding(..values$rn) <- "bytes"

  # If row names can be converted to numeric, do so
  if (..values[, rn] %>% is.character)
    if (..values[, rn] %>% type.convert %>% is.numeric)
      ..values[, rn := as.numeric(rn)]

  # Key table by value
  setkey(..values, value, rn)

  # Add attributes
  setattr(..values, "file_path", attr(x, "file_path"))
  setattr(..values, "sheet_name", attr(x, "sheet_name"))
  setattr(..values, "header_row", attr(x, "header_row"))
  setattr(..values, "column_name", column_name)
  setattr(..values, "rownames_name", attr(x,
"rownames_name"))
  setattr(..values, "value_delimiter", value_delimiter)
  setattr(..values, "rows", rows)

  # Return the values table
  return(..values)

}

fillNAlast <- function(x) {
  na <- is.na(x)
  miss <- which(na)
  nonmiss <- which(!na)
  map <- outer(nonmiss, miss, "<") %>%
    apply(2, . %>% which %>% max)
  x[miss] <- x[nonmiss[map]]
  return(x)
}

getSheetIndex <- function(file_path, sheet_name) {

  # Extract workbook.xml to temporary file that will be deleted at end of
  # run
  xmlDir <- file.path(tempdir(), "findSheet")
  workbook <- unzip(file_path, files = "xl/workbook.xml", exdir =
xmlDir)
  on.exit(unlink(xmlDir, recursive = TRUE), add = TRUE)

  # Read workbook.xml and get sheet nodes
  workbook <- readLines(workbook, warn = FALSE, encoding = "UTF-8")
%>%
    unlist
  sheets <- gregexpr("<sheet .*/sheets>", workbook, perl =
TRUE) %>%
    regmatches(workbook, .) %>%
    unlist

  # Extract sheet names from nodes, parse as html, and return text
values
  sheetNames <- gregexpr('(?<=name=")[^"]+', sheets,
perl = TRUE) %>%
    regmatches(sheets, .) %>%
    unlist %>%
    lapply(htmlParse, asText = TRUE) %>%
    sapply(. %>% xpathApply("//body//text()", xmlValue) %>%
unlist)

  # Which sheet name is equal to the sheet_name argument?
  which(sheetNames == sheet_name)

}

Rdelim <- function(x, ...) {
  if (!is.null(x)) {
    if (!is.na(x) & x != "None") {
      if (x == "Newline") "\\n" else x
    } else NULL
  } else NULL
}

readSource <- function(file_path, sheet_name, header_row, column_names,
                       rownames_name = NULL)
{

  # Validate formatting on column name args
  column_names %<>% strsplit(",") %>% unlist %>%
trimCompress
  rownames_name %<>% trimCompress

  # Sheet index
  sheet_index <- getSheetIndex(file_path, sheet_name)

  # Read column names according to header row
  ..names <- read.xlsx(
    xlsxFile = file_path
    , sheet = sheet_index
    , colNames = FALSE
    , rows = header_row
  ) %>% unlist %>% unname %>% trimCompress

  # Read in column plus and any rownames column
  ..table <- read.xlsx(
    xlsxFile = file_path
    , sheet = sheet_index
    , startRow = header_row
    , cols = which(..names %in% c(rownames_name, column_names))
    , skipEmptyRows = FALSE
    , detectDates = TRUE
  ) %>% as.data.table

  # Set names
  setnames(..table,
           ..names[which(..names %in% c(rownames_name, column_names))])

  # Rownames
  ## If no rownames column, use row number
  if (is.null(rownames_name)) {
    if (is.null(rows)) ..table[, rn := 1:.N + 1L] else ..table[, rn := rows]
  } else { # Otherwise, just copy the column
    ..table[, rn := lapply(.SD, identity), .SDcols = rownames_name]
  }
  setcolorder(..table, c("rn", setdiff(names(..table),
"rn")))

  # If row can be converted to numeric, do so
  if (..table[, rn] %>% is.character)
    if (..table[, rn] %>% type.convert %>% is.numeric)
      ..table[, rn := as.numeric(rn)]

  # Key table by row
  setkey(..table, rn)

  # Add attributes
  setattr(..table, "file_path", file_path)
  setattr(..table, "sheet_name", sheet_name)
  setattr(..table, "header_row", header_row)
  setattr(..table, "column_names", column_names)
  setattr(..table, "rownames_name", rownames_name)

  # Return the values table
  return(..table)

}

setABattr <- function(new_table, A, B) {

  # Strip existing attributes in new_table
  setattr(new_table, "file_path", NULL)
  setattr(new_table, "sheet_name", NULL)
  setattr(new_table, "header_row", NULL)
  setattr(new_table, "column_name", NULL)
  setattr(new_table, "rownames_name", NULL)
  setattr(new_table, "value_delimiter", NULL)
  setattr(new_table, "rows", NULL)
  setattr(new_table, "rows_are_rownames", NULL)

  # Set A attributes in new_table
  setattr(new_table, "A_file_path", attributes(A)$file_path)
  setattr(new_table, "A_sheet_name", attributes(A)$sheet_name)
  setattr(new_table, "A_header_row", attributes(A)$header_row)
  setattr(new_table, "A_column_name", attributes(A)$column_name)
  setattr(new_table, "A_rownames_name", attributes(A)$rownames_name)
  setattr(new_table, "A_value_delimiter",
attributes(A)$value_delimiter)
  setattr(new_table, "A_rows", attributes(A)$rows)
  setattr(new_table, "A_rows_are_rownames",
attributes(A)$rows_are_rownames)

  # Set B attributes in new_table
  setattr(new_table, "B_file_path", attributes(B)$file_path)
  setattr(new_table, "B_sheet_name", attributes(B)$sheet_name)
  setattr(new_table, "B_header_row", attributes(B)$header_row)
  setattr(new_table, "B_column_name", attributes(B)$column_name)
  setattr(new_table, "B_rownames_name", attributes(B)$rownames_name)
  setattr(new_table, "B_value_delimiter",
attributes(B)$value_delimiter)
  setattr(new_table, "B_rows", attributes(B)$rows)
  setattr(new_table, "B_rows_are_rownames",
attributes(B)$rows_are_rownames)

}

tableToLower <- function(X, ...) {

  # Copy
  x <- copy(X)

  # Existing keys
  keys <- key(x)
  setkey(x, NULL)

  # Rename value column
  setnames(x, "value", "value_orig")

  # Derived value column
  Encoding(x$value_orig) <- "UTF-8"
  x[, value := tolower(value_orig)]
  Encoding(x$value) <- "bytes"
  Encoding(x$value_orig) <- "bytes"

  # Rekey
  setkeyv(x, keys)

  # Return
  return(x)

}

tableDropLower <- function(X, ...) {

  # Copy
  x <- copy(X)

  # Existing keys
  keys <- key(x)
  setkey(x, NULL)

  # Drop derived value column
  x[, value := NULL]

  # Rename value_orig column
  setnames(x, "value_orig", "value")

  # Rekey
  setkeyv(x, keys)

  # Return
  return(x)

}

textrange2vector <- function(x) {
  strsplit(x, ",") %>%
    lapply(
      . %>%
        strsplit("-") %>%
        lapply(as.numeric) %>%
        lapply(function(s)
          if (length(s) == 1) s
          else seq(s[1], s[2]))) %>%
    lapply(unlist)
}

trimCompress <- function(x) {

  if (!"magrittr" %in% loadedNamespaces()) # check if magrittr is
loaded
    library(magrittr)                      # load if not

  if (is.null(x)) return(NULL)

  x %>%
    gsub("^\\s+", "", .) %>% # remove leading blanks
    gsub("\\s+$", "", .) %>% # remove trailing blanks
    gsub("\\s+", " ", .)     # compress multiple blanks to
one

}







# Read parameterization file ####

message("Reading parameters...")

## Catalog parameters
avail_params <- read.xlsx(
  parameterization_file
  , "Available Parameters"
  , colNames = FALSE
  , startRow = 2
) %>% as.data.table
sheet_params <- c("name", "path", "sheet",
"header", "rn")
setnames(avail_params, 1:5, sheet_params)
avail_params <- avail_params[!is.na(name) &
grepl("[^[:space:]]", name)] %>%
  melt(id.vars = 1:5, value.name = "columns")
avail_params <- avail_params[, lapply(.SD, . %>% Filter(Negate(is.na), .)
%>%
                                        list), by = eval(sheet_params)]
avail_params[, variable := NULL]

## Analysis parameters
analysis_params <- read.xlsx(
  parameterization_file
  , "Parameterization"
  , startRow = 2
  , colNames = FALSE
) %>% as.data.table
setnames(analysis_params, c(
  "name1", "col1", "rows1", "dlm1",
  "verb", "case",
  "name2", "col2", "rows2", "dlm2",
  "outname", "outcols", "outflat"
))
analysis_params <- analysis_params[-1][!is.na(name1) &
                                         grepl("[^[:space:]]", name1)]
analysis_params[, n := 1:.N]

## Combine parameters
setkey(avail_params, name)
setkey(analysis_params, name1)
analysis_params[avail_params, ":="(
  path1 = path
  ,sheet1 = sheet
  ,header1 = header
  ,rn1 = rn
), allow.cartesian = TRUE]
setkey(analysis_params, name2)
analysis_params[avail_params, ":="(
  path2 = path
  ,sheet2 = sheet
  ,header2 = header
  ,rn2 = rn
), allow.cartesian = TRUE]
setkey(analysis_params, n)


# Match actions to functions
verb_function_map <- list(
  "A_i__in__B" = c("In", "Not In"),
  "A_i__in__B_i" = c("In (Same Row)", "Not In (Same
Row)"),
  "A_i__substr__B_i" = c("Substring Of (Same Row)",
                         "Not Substring Of (Same Row)"),
  "A_i__unique" = c("Is Unique", "Not Unique")
) %>% unlist
names(verb_function_map) %<>% gsub("[0-9]+", "", .)
analysis_params[, fun := factor(verb)]
levels(analysis_params$fun) %<>%
  match(verb_function_map) %>%
  "["(names(verb_function_map), .)
analysis_params$fun %<>% as.character



# Read data sources

message("Reading data sources...")

data_names <- avail_params[, name]
data_list <- replicate(length(data_names), list(), simplify = FALSE)
names(data_list) <- data_names
for (i in 1:nrow(avail_params))
  data_list[[i]] <- with(avail_params[i], readSource(
    file_path = path
    , sheet_name = sheet
    , header_row = header
    , column_names = columns[[1]]
    , rownames_name = rn
  ))




# Analysis ####

message("Performing comparisons...")

reports <- analyses <- vector("list", nrow(analysis_params))
names(reports) <- names(analyses) <- analysis_params[, outname]

rowAnalysis2report <- function(analysis, params = list()) {

  # Create a copy
  x <- copy(analysis)

  # Subset to logical_val of logical_col
  setnames(x, setdiff(names(x), c("rn", "value")),
"logical_col")
  x <- x[logical_col == !grepl("Not", params$verb)]
  x[, logical_col := NULL]

  # Re-encode
  Encoding(x$value) <- "UTF-8"
  if (is.character(x$rn))
    Encoding(x$rn) <- "UTF-8"

  # Unique results only
  setkey(x, rn, value)
  setcolorder(x, key(x))
  x <- unique(x)

  # Flatten if desired
  if (params$outflat == "Yes") {
    dlm <- Rdelim(params$dlm1)
    if (!is.null(dlm)) {
      if (dlm == "\\n") dlm <- "\n"
      x <- x[, list(value = paste(value, collapse = dlm)), by = rn]
    }
  }

  # Retrieve all columns if desired
  setkey(x, rn)
  if (params$outcols == "Yes") {
    full_source <- copy(data_list[[params$name1]])
    setkey(full_source, rn)
    x <- x[full_source, nomatch = 0, allow.cartesian = TRUE]
  }

  # Rename results columns
  if (is.null(params$rn1)) setnames(x, 1, "Row") else {
    if (is.na(params$rn1) | params$rn1 == params$col1) setnames(x, 1,
"Row")
    else setnames(x, 1, params$rn1)
  }
  setnames(x, 2, params$col1)

  return(x)

}

## Do it
for (i in 1:nrow(analysis_params)) {
  r <- analysis_params[i]
  args <- list(
    A = extractColumn(data_list[[r$name1]], r$col1, r$dlm1, r$rows1),
    B = if (!is.na(r$name2))
      extractColumn(data_list[[r$name2]], r$col2, r$dlm2, r$rows2),
    case = (r$case == "Yes")
  )
  analyses[[i]] <- do.call(r$fun, args)
  reports[[i]] <- rowAnalysis2report(analyses[[i]], r)
  rm(r, args)
}






# Output ####

message("Writing results to output file...")

detach("package:openxlsx")
suppressPackageStartupMessages(library(xlsx))

# Output file
exists <- TRUE
i <- 0
while (exists) {
  out_file <- if (i > 0) {
    file.path(cd, sprintf("Comparison_Reports_%s_(%s).xlsx",
Sys.Date(), i))
  } else file.path(cd, sprintf("Comparison_Reports_%s.xlsx",
Sys.Date()))
  exists <- file.exists(out_file)
  if (!exists)
    file.copy(parameterization_file, out_file)
  i <- i + 1
}

# Headers
headers <- analysis_params[, lapply(.SD, as.character), .SDcols = c(
  "outname", "col1", "verb", "col2",
"case", "name1", "name2",
  "rows1", "rows2", "dlm1", "dlm2")]
headers[, case := factor(case, c("Yes", "No"),
                         c("(Case Sensitive)", "(Not Case
Sensitive)"))]
headers[!is.na(col2), header_title := paste(col1, verb, col2, case)]
headers[is.na(col2), header_title := paste(col1, verb, case)]
headers[, header_time := Sys.time()]
headers$header_col1 <- headers[, list(col1, name1, rows1, dlm1)] %>%
  t %>%
  as.data.table %>%
  lapply(as.list) %>%
  lapply(as.data.table) %>%
  lapply(setnames, c("Column", "Source", "Rows",
"Delimiter")) %>%
  lapply(as.list)
headers$header_col2 <- headers[, list(col2, name2, rows2, dlm2)] %>%
  t %>%
  as.data.table %>%
  lapply(as.list) %>%
  lapply(as.data.table) %>%
  lapply(setnames, c("Column", "Source", "Rows",
"Delimiter")) %>%
  lapply(as.list)


# Write
keep <- c(ls(), "i", "keep")

## Loop through reports and write
for (i in names(reports)) {

  message(i, "...")

  # Load workbook
  wb <- loadWorkbook(out_file)

  # Workbook styles

  ## Header title
  hd <- CellStyle(
    wb,
    alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
"VERTICAL_TOP"),
    font = Font(wb, heightInPoints = 16, isBold = TRUE)
  )

  ## Date
  dt <- CellStyle(
    wb,
    alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
"VERTICAL_TOP"),
    dataFormat = DataFormat("m/d/yyyy h:mm:ss;@")
  )

  ## Parameters header
  ph <- CellStyle(
    wb,
    alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
"VERTICAL_TOP"),
    font = Font(wb, isItalic = TRUE)
  )

  ## Column names header
  cn <- CellStyle(
    wb,
    alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
"VERTICAL_TOP"),
    border = Border(position = c("BOTTOM", "TOP"),
                    pen = c("BORDER_THIN",
"BORDER_MEDIUM")),
    font = Font(wb, isBold = TRUE)
  )

  ## Column names header for reproduced data
  cnr <- CellStyle(
    wb,
    alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
"VERTICAL_TOP"),
    border = Border(position = c("BOTTOM", "TOP"),
                    pen = c("BORDER_THIN",
"BORDER_MEDIUM")),
    font = Font(wb, isBold = TRUE, isItalic = TRUE)
  )

  ## Values
  vl <- CellStyle(
    wb,
    alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
"VERTICAL_TOP",
                          wrapText = TRUE)
  )

  ## Values for reproduced data
  vlr <- CellStyle(
    wb,
    alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
"VERTICAL_TOP",
                          wrapText = TRUE),
    font = Font(wb, isItalic = TRUE)
  )


  # Create sheet
  sh <- createSheet(wb, i)

  # Add header rows
  h <- headers[outname == i]
  addMergedRegion(sh, 1, 1, 1, 10)
  addMergedRegion(sh, 2, 2, 1, 10)
  rw <- createRow(sh, 1:2)
  cl <- createCell(rw, 1)

  ## Title
  addDataFrame(h[, header_title], sh, FALSE, FALSE, 1, 1)
  rw <- getRows(sh, 1)
  cl <- getCells(rw)
  lapply(cl, setCellStyle, hd)

  ## Date
  addDataFrame(h[, header_time], sh, FALSE, FALSE, 2, 1)
  rw <- getRows(sh, 2)
  cl <- getCells(rw)
  lapply(cl, setCellStyle, dt)

  ## Parameters
  addDataFrame(h[, header_col1] %>% as.data.frame, sh, TRUE, FALSE, 4, 1)
  if (h[, !is.na(col2)])
    addDataFrame(h[, header_col2] %>% as.data.frame, sh, FALSE, FALSE, 6, 1)
  rw <- getRows(sh, 4)
  cl <- getCells(rw)
  lapply(cl, setCellStyle, ph)
  rw <- getRows(sh, 5:6)
  cl <- getCells(rw)
  lapply(cl, setCellStyle, vl)

  # Add report
  addDataFrame(reports[[i]], sh, TRUE, FALSE, 8, 1)
  nc <- ncol(reports[[i]])

  ## Format column names
  rw <- getRows(sh, 8)
  cl <- getCells(rw, 1:2)
  lapply(cl, setCellStyle, cn)
  if (nc > 2)  {
    cl <- getCells(rw, 3:nc)
    lapply(cl, setCellStyle, cnr)
  }

  ## Format values
  rw <- getRows(sh, 9:(nrow(reports[[i]]) + 9))
  cl <- getCells(rw, 1:2)
  lapply(cl, setCellStyle, vl)
  if (nc > 2)  {
    cl <- getCells(rw, 3:nc)
    lapply(cl, setCellStyle, vlr)
  }

  ## Add autofilters
  if (ncol(reports[[i]]) > 26) {
    addAutoFilter(sh, sprintf("A8:%s%s%s",
                              LETTERS[floor(ncol(reports[[i]]) / 26)],
                              LETTERS[ncol(reports[[i]]) %% 26],
                              nrow(reports[[i]]) + 9))
  } else {
    addAutoFilter(sh, sprintf("A8:%s%s", LETTERS[ncol(reports[[i]])],
                              nrow(reports[[i]]) + 9))
  }

  # Autofit columns
  autoSizeColumn(sh, 1:ncol(reports[[i]]))

  # Create freeze on report column names and results columns
  if (nc > 2) createFreezePane(sh, rowSplit = 9, colSplit = 3) else
    createFreezePane(sh, rowSplit = 9, colSplit = 1)

  # Save
  saveWorkbook(wb, out_file)
  rm(list = setdiff(ls(), keep))

}
b

	[[alternative HTML version deleted]]

Jeff Newmiller

2015-Aug-30 01:35 UTC

head link

[R] Advanced Level Script for Traceability Between Worksheets

Some notes:

1) HTML damages your email on this mailing list (we often see run-on lines and
garbage characters.. definitely not whatever you saw).

2) Massive scripts are off topic... please read the Posting Guide. If you
can't narrow your question to a smaller example then you may really need a
consultant.

3) Periods in this case don't have any special meaning... They just make the
variable look weird.

4) This script makes liberal use of the data.tables package, which has some
advantage in speed and memory efficiency if you are working with large data
sets. The odd indexing used in ..A[..B, in_B := TRUE, allow.cartesian = TRUE] is
a relational join that is discussed in the vignette for the data.table package.
I only use data.tables if I need to optimise memory or execution speed because I
don't find them very intuitive. The fact that this code makes copies so
frequently may indicate that it is not as optimised as it could be. Or perhaps
they are necessary and I just have not looked at it closely enough.

5) A very few uses of %>? and ?<>? are from the magrittr package... I
do find that helpful though it seems like the author here was only just getting
started using it. Again, you would need to read the vignette and/or some
internet tutorials to follow that syntax... I find it much easier than
data.table though it is solving a different problem.

6) I don't consult off list... sorry.

---------------------------------------------------------------------------
Jeff Newmiller                        The     .....       .....  Go Live...
DCN:<jdnewmil at dcn.davis.ca.us>        Basics: ##.#.       ##.#.  Live
Go...
                                      Live:   OO#.. Dead: OO#..  Playing
Research Engineer (Solar/Batteries            O.O#.       #.O#.  with
/Software/Embedded Controllers)               .OO#.       .OO#.  rocks...1k
--------------------------------------------------------------------------- 
Sent from my phone. Please excuse my brevity.

On August 29, 2015 1:19:35 PM PDT, C Campbell <cc571309 at gmail.com>
wrote:>Hi folks - I have almost know R skills yet and have been put 'in
>charge' of
>the below script created by a former employee.  Although some of this
>is
>understandable to me, much of it is not.  If anyone can help with
>explaining sections, commenting on the skill level it takes to
>understand
>this level of scripting in R, and/or point me to some resources that
>may
>cover some of this (e.g., what is ..A[..B, in_B := TRUE,
>allow.cartesian >TRUE]; and specifically what do the 2 dots mean?), I
would very much
>appreciate it.  Would also be interested in communicating offline if
>you
>prefer.
>Thank you,
>Jay
>
>
>
># Locate file ####
>parameterization_file <- file.choose()
>cd <- dirname(parameterization_file)
>
># Front matter ####
>message("Installing and loading packages...")
>
># Packages
>required_packages <- c("openxlsx", "xlsx",
"magrittr", "data.table",
>"reshape2",
>                       "XML")
>install_these <- setdiff(required_packages,
>rownames(installed.packages()))
>
>while (length(install_these) > 0) {
>  install.packages(install_these, repos =
"http://cran.rstudio.com")
>  install_these <- setdiff(required_packages,
>rownames(installed.packages()))
>}
>
>suppressPackageStartupMessages(library(openxlsx))
>suppressPackageStartupMessages(library(magrittr))
>suppressPackageStartupMessages(library(data.table))
>suppressPackageStartupMessages(library(reshape2))
>suppressPackageStartupMessages(library(XML))
>
>
># Options
>options(stringsAsFactors = FALSE)
>
># Functions
>message("Loading functions...")
>
>A__in__B <- function(A, B, case = TRUE, ...) {
>
>  # Copies
>  ..A <- copy(A)
>  ..B <- copy(B)
>  setkey(..A, value)
>  setkey(..B, value)
>  ..A <- unique(..A)
>  ..B <- unique(..B)
>
>  # Rownames are unnecessary
>  ..A[, rn := NULL]
>  ..B[, rn := NULL]
>
>  # Case sensitivity
>  if (!case) {
>    ..A <- tableToLower(..A)
>    ..B <- tableToLower(..B)
>  }
>
>  # Check if A is in B
>  ..A[..B, in_B := TRUE, allow.cartesian = TRUE]
>  if ("in_B" %in% names(..A))
>    ..A[is.na(in_B), in_B := FALSE]
>  else
>    ..A[, in_B := FALSE]
>
>  # Case sensitivity
>  if (!case)
>    ..A <- tableDropLower(..A)
>
>  # Set attributes
>  setABattr(..A, A, B)
>
>  # Return results
>  setkey(..A, value)
>  return(..A)
>
>}
>
>A__unique <- function(A, case = TRUE, ...) {
>
>  # Copies
>  ..A <- copy(A)
>  setkey(..A, value)
>
>  # Case sensitivity
>  if (!case)
>    ..A <- tableToLower(..A)
>
>  # Check if A_i values are unique
>  ..A[..A[duplicated(..A), SJ(value)], is_unique := FALSE,
>      allow.cartesian = TRUE]
>  if ("is_unique" %in% names(..A))
>    ..A[is.na(is_unique), is_unique := TRUE]
>  else
>    ..A[, is_unique := TRUE]
>
>  # Case sensitivity
>  if (!case)
>    ..A <- tableDropLower(..A)
>
>  # Roll up to value level
>  ..A <- ..A[, list(is_unique = all(is_unique)), keyby = value]
>
>  # Return results
>  return(..A)
>
>}
>
>A_i__in__B <- function(A, B, case = TRUE, ...) {
>
>  # Copies
>  ..A <- copy(A)
>  setkey(..A, value, rn)
>  ..A <- unique(..A)
>  ..B <- copy(B)
>  setkey(..B, value)
>  ..B %>% unique
>
>  # B rownames are unnecessary
>  ..B[, rn := NULL]
>
>  # Case sensitivity
>  if (!case) {
>    ..A <- tableToLower(..A)
>    ..B <- tableToLower(..B)
>  }
>
>  # Check if A is in B
>  if ("in_B" %in% names(..A))
>    ..A[is.na(in_B), in_B := FALSE]
>  else
>    ..A[, in_B := FALSE]
>
>  # Case sensitivity
>  if (!case)
>    ..A <- tableDropLower(..A)
>
>  # Set attributes
>  setABattr(..A, A, B)
>
>  # Return results
>  setkey(..A, value, rn)
>  return(..A)
>
>}
>
>A_i__in__B_i <- function(A, B, case = TRUE, ...) {
>
>  # Copies
>  ..A <- copy(A)
>  setkey(..A, value, rn)
>  ..A <- unique(..A)
>  ..B <- copy(B)
>  setkey(..B, value, rn)
>  ..B <- unique(..B)
>
>  # Case sensitivity
>  if (!case) {
>    ..A <- tableToLower(..A)
>    ..B <- tableToLower(..B)
>  }
>
>  # Check if A_i terms are in B_i terms
>  ..A[..B, in_B := TRUE, allow.cartesian = TRUE]
>  if ("in_B" %in% names(..A))
>    ..A[is.na(in_B), in_B := FALSE]
>  else
>    ..A[, in_B := FALSE]
>
>  # Case sensitivity
>  if (!case)
>    ..A <- tableDropLower(..A)
>
>  # Set attributes
>  setABattr(..A, A, B)
>
>  # Return results
>  setkey(..A, value, rn)
>  return(..A)
>
>}
>
>A_i__substr__B_i <- function(A, B, case = TRUE, ...) {
>
>  # Copies
>  ..A <- copy(A)
>  setkey(..A, rn)
>  ..B <- copy(B)
>  setkey(..B, rn)
>
>  # Renames
>  setnames(..A, "value", "A_value")
>  setnames(..B, "value", "B_value")
>
>  # Merge
>  ..X <- ..B[..A, allow.cartesian = TRUE]
>
>  # Check if A_i values are substrings of B_i values
>  ..X[is.na(B_value), is_substring := FALSE]
>  Encoding(..X$A_value) <- "UTF-8"
>  Encoding(..X$B_value) <- "UTF-8"
>  if (case) {
>    ..X[!is.na(B_value), is_substring := mapply(
>      grepl, A_value, B_value, fixed = TRUE)]
>  } else {
>    ..X[!is.na(B_value), is_substring := mapply(
>      grepl, tolower(A_value), tolower(B_value), fixed = TRUE)]
>  }
>  Encoding(..X$A_value) <- "bytes"
>  Encoding(..X$B_value) <- "bytes"
>
>  # Rename/reorder
>  ..X <- ..X[, list(value = A_value, rn, is_substring)]
>
>  # Set attributes
>  setABattr(..X, A, B)
>
>  # Return results
>  setkey(..X, value, rn)
>  return(..X)
>
>}
>
>A_i__unique <- function(A, case = TRUE, ...) {
>
>  # Copies
>  ..A <- copy(A)
>  setkey(..A, value)
>
>  # Case sensitivity
>  if (!case)
>    ..A <- tableToLower(..A)
>
>  # Check if A_i values are unique
>  ..A[..A[duplicated(..A), SJ(value)], is_unique := FALSE,
>      allow.cartesian = TRUE]
>  if ("is_unique" %in% names(..A))
>    ..A[is.na(is_unique), is_unique := TRUE]
>  else
>    ..A[, is_unique := TRUE]
>
>  # Case sensitivity
>  if (!case)
>    ..A <- tableDropLower(..A)
>
>  # Return results
>  setkey(..A, value, rn)
>  return(..A)
>
>}
>
>extractColumn <- function(x, column_name, value_delimiter = NULL, rows
>>NULL)
>{
>
>  # Validate formatting on column name args
>  column_name %<>% trimCompress
>
>  # Multiple columns?
>  mult_cols <- grepl(",", column_name)
>  if (mult_cols)
>    column_name %<>% strsplit(",") %>% unlist %>%
trimCompress
>
>  # Get column + rn
>  ..table <- x[, c("rn", column_name), with = FALSE]
>  setnames(..table, 2, "value")
>
>  # Long if multiple
>  if (mult_cols) {
>    ..table %<>% melt(1)
>    ..table[, variable := NULL]
>  }
>
>  # Key table by rowname
>  setkey(..table, rn)
>
>  # If rows was provided, subset
>  if (!is.null(rows))
>    if (rows != "All")
>      ..table <- ..table[textrange2vector(rows) %>% SJ]
>
>  # Split values according to delimiter...
>  dlm <- Rdelim(value_delimiter)
>  if (!is.null(dlm))
>    ..values <- strsplit(..table[, value], Rdelim(value_delimiter))
%>%
>    lapply(trimCompress)
>  # ... or convert to list if no delimiter
>  else
>    ..values <- ..table[, value] %>% trimCompress %>% as.list
>
>  # Set list name values to rowname values
>  names(..values) <- ..table[, rn]
>
>  # Convert from list to table
>  ..values %<>% melt %>% as.data.table
>  setnames(..values, 2, "rn")
>
>  # Remove any instances of blank values
>  ..values <- ..values[!is.na(value) &
grepl("[^[:space:]]", value)]
>
>  # Encode all text to bytes
>  # Will need to encode to UTF-8 before output to make it readable
>  Encoding(..values$value) <- "bytes"
>  if (is.character(..values$rn)) Encoding(..values$rn) <-
"bytes"
>
>  # If row names can be converted to numeric, do so
>  if (..values[, rn] %>% is.character)
>    if (..values[, rn] %>% type.convert %>% is.numeric)
>      ..values[, rn := as.numeric(rn)]
>
>  # Key table by value
>  setkey(..values, value, rn)
>
>  # Add attributes
>  setattr(..values, "file_path", attr(x, "file_path"))
>  setattr(..values, "sheet_name", attr(x, "sheet_name"))
>  setattr(..values, "header_row", attr(x, "header_row"))
>  setattr(..values, "column_name", column_name)
>  setattr(..values, "rownames_name", attr(x,
"rownames_name"))
>  setattr(..values, "value_delimiter", value_delimiter)
>  setattr(..values, "rows", rows)
>
>  # Return the values table
>  return(..values)
>
>}
>
>fillNAlast <- function(x) {
>  na <- is.na(x)
>  miss <- which(na)
>  nonmiss <- which(!na)
>  map <- outer(nonmiss, miss, "<") %>%
>    apply(2, . %>% which %>% max)
>  x[miss] <- x[nonmiss[map]]
>  return(x)
>}
>
>getSheetIndex <- function(file_path, sheet_name) {
>
># Extract workbook.xml to temporary file that will be deleted at end of
>  # run
>  xmlDir <- file.path(tempdir(), "findSheet")
>workbook <- unzip(file_path, files = "xl/workbook.xml", exdir =
xmlDir)
>  on.exit(unlink(xmlDir, recursive = TRUE), add = TRUE)
>
>  # Read workbook.xml and get sheet nodes
>  workbook <- readLines(workbook, warn = FALSE, encoding =
"UTF-8") %>%
>    unlist
>  sheets <- gregexpr("<sheet .*/sheets>", workbook, perl
= TRUE) %>%
>    regmatches(workbook, .) %>%
>    unlist
>
>  # Extract sheet names from nodes, parse as html, and return text
>values
>  sheetNames <- gregexpr('(?<=name=")[^"]+', sheets,
perl = TRUE) %>%
>    regmatches(sheets, .) %>%
>    unlist %>%
>    lapply(htmlParse, asText = TRUE) %>%
>    sapply(. %>% xpathApply("//body//text()", xmlValue) %>%
unlist)
>
>  # Which sheet name is equal to the sheet_name argument?
>  which(sheetNames == sheet_name)
>
>}
>
>Rdelim <- function(x, ...) {
>  if (!is.null(x)) {
>    if (!is.na(x) & x != "None") {
>      if (x == "Newline") "\\n" else x
>    } else NULL
>  } else NULL
>}
>
>readSource <- function(file_path, sheet_name, header_row, column_names,
>                       rownames_name = NULL)
>{
>
>  # Validate formatting on column name args
>  column_names %<>% strsplit(",") %>% unlist %>%
trimCompress
>  rownames_name %<>% trimCompress
>
>  # Sheet index
>  sheet_index <- getSheetIndex(file_path, sheet_name)
>
>  # Read column names according to header row
>  ..names <- read.xlsx(
>    xlsxFile = file_path
>    , sheet = sheet_index
>    , colNames = FALSE
>    , rows = header_row
>  ) %>% unlist %>% unname %>% trimCompress
>
>  # Read in column plus and any rownames column
>  ..table <- read.xlsx(
>    xlsxFile = file_path
>    , sheet = sheet_index
>    , startRow = header_row
>    , cols = which(..names %in% c(rownames_name, column_names))
>    , skipEmptyRows = FALSE
>    , detectDates = TRUE
>  ) %>% as.data.table
>
>  # Set names
>  setnames(..table,
>           ..names[which(..names %in% c(rownames_name, column_names))])
>
>  # Rownames
>  ## If no rownames column, use row number
>  if (is.null(rownames_name)) {
>if (is.null(rows)) ..table[, rn := 1:.N + 1L] else ..table[, rn :>rows]
>  } else { # Otherwise, just copy the column
>    ..table[, rn := lapply(.SD, identity), .SDcols = rownames_name]
>  }
>  setcolorder(..table, c("rn", setdiff(names(..table),
"rn")))
>
>  # If row can be converted to numeric, do so
>  if (..table[, rn] %>% is.character)
>    if (..table[, rn] %>% type.convert %>% is.numeric)
>      ..table[, rn := as.numeric(rn)]
>
>  # Key table by row
>  setkey(..table, rn)
>
>  # Add attributes
>  setattr(..table, "file_path", file_path)
>  setattr(..table, "sheet_name", sheet_name)
>  setattr(..table, "header_row", header_row)
>  setattr(..table, "column_names", column_names)
>  setattr(..table, "rownames_name", rownames_name)
>
>  # Return the values table
>  return(..table)
>
>}
>
>setABattr <- function(new_table, A, B) {
>
>  # Strip existing attributes in new_table
>  setattr(new_table, "file_path", NULL)
>  setattr(new_table, "sheet_name", NULL)
>  setattr(new_table, "header_row", NULL)
>  setattr(new_table, "column_name", NULL)
>  setattr(new_table, "rownames_name", NULL)
>  setattr(new_table, "value_delimiter", NULL)
>  setattr(new_table, "rows", NULL)
>  setattr(new_table, "rows_are_rownames", NULL)
>
>  # Set A attributes in new_table
>  setattr(new_table, "A_file_path", attributes(A)$file_path)
>  setattr(new_table, "A_sheet_name", attributes(A)$sheet_name)
>  setattr(new_table, "A_header_row", attributes(A)$header_row)
>  setattr(new_table, "A_column_name", attributes(A)$column_name)
>  setattr(new_table, "A_rownames_name",
attributes(A)$rownames_name)
> setattr(new_table, "A_value_delimiter",
attributes(A)$value_delimiter)
>  setattr(new_table, "A_rows", attributes(A)$rows)
>setattr(new_table, "A_rows_are_rownames",
>attributes(A)$rows_are_rownames)
>
>  # Set B attributes in new_table
>  setattr(new_table, "B_file_path", attributes(B)$file_path)
>  setattr(new_table, "B_sheet_name", attributes(B)$sheet_name)
>  setattr(new_table, "B_header_row", attributes(B)$header_row)
>  setattr(new_table, "B_column_name", attributes(B)$column_name)
>  setattr(new_table, "B_rownames_name",
attributes(B)$rownames_name)
> setattr(new_table, "B_value_delimiter",
attributes(B)$value_delimiter)
>  setattr(new_table, "B_rows", attributes(B)$rows)
>setattr(new_table, "B_rows_are_rownames",
>attributes(B)$rows_are_rownames)
>
>}
>
>tableToLower <- function(X, ...) {
>
>  # Copy
>  x <- copy(X)
>
>  # Existing keys
>  keys <- key(x)
>  setkey(x, NULL)
>
>  # Rename value column
>  setnames(x, "value", "value_orig")
>
>  # Derived value column
>  Encoding(x$value_orig) <- "UTF-8"
>  x[, value := tolower(value_orig)]
>  Encoding(x$value) <- "bytes"
>  Encoding(x$value_orig) <- "bytes"
>
>  # Rekey
>  setkeyv(x, keys)
>
>  # Return
>  return(x)
>
>}
>
>tableDropLower <- function(X, ...) {
>
>  # Copy
>  x <- copy(X)
>
>  # Existing keys
>  keys <- key(x)
>  setkey(x, NULL)
>
>  # Drop derived value column
>  x[, value := NULL]
>
>  # Rename value_orig column
>  setnames(x, "value_orig", "value")
>
>  # Rekey
>  setkeyv(x, keys)
>
>  # Return
>  return(x)
>
>}
>
>textrange2vector <- function(x) {
>  strsplit(x, ",") %>%
>    lapply(
>      . %>%
>        strsplit("-") %>%
>        lapply(as.numeric) %>%
>        lapply(function(s)
>          if (length(s) == 1) s
>          else seq(s[1], s[2]))) %>%
>    lapply(unlist)
>}
>
>trimCompress <- function(x) {
>
> if (!"magrittr" %in% loadedNamespaces()) # check if magrittr is
loaded
>    library(magrittr)                      # load if not
>
>  if (is.null(x)) return(NULL)
>
>  x %>%
>    gsub("^\\s+", "", .) %>% # remove leading blanks
>    gsub("\\s+$", "", .) %>% # remove trailing blanks
>    gsub("\\s+", " ", .)     # compress multiple blanks
to one
>
>}
>
>
>
>
>
>
>
># Read parameterization file ####
>
>message("Reading parameters...")
>
>## Catalog parameters
>avail_params <- read.xlsx(
>  parameterization_file
>  , "Available Parameters"
>  , colNames = FALSE
>  , startRow = 2
>) %>% as.data.table
>sheet_params <- c("name", "path", "sheet",
"header", "rn")
>setnames(avail_params, 1:5, sheet_params)
>avail_params <- avail_params[!is.na(name) &
grepl("[^[:space:]]",
>name)] %>%
>  melt(id.vars = 1:5, value.name = "columns")
>avail_params <- avail_params[, lapply(.SD, . %>% Filter(Negate(is.na),
>.)
>%>%
>                                        list), by = eval(sheet_params)]
>avail_params[, variable := NULL]
>
>## Analysis parameters
>analysis_params <- read.xlsx(
>  parameterization_file
>  , "Parameterization"
>  , startRow = 2
>  , colNames = FALSE
>) %>% as.data.table
>setnames(analysis_params, c(
>  "name1", "col1", "rows1", "dlm1",
>  "verb", "case",
>  "name2", "col2", "rows2", "dlm2",
>  "outname", "outcols", "outflat"
>))
>analysis_params <- analysis_params[-1][!is.na(name1) &
>                                         grepl("[^[:space:]]",
name1)]
>analysis_params[, n := 1:.N]
>
>## Combine parameters
>setkey(avail_params, name)
>setkey(analysis_params, name1)
>analysis_params[avail_params, ":="(
>  path1 = path
>  ,sheet1 = sheet
>  ,header1 = header
>  ,rn1 = rn
>), allow.cartesian = TRUE]
>setkey(analysis_params, name2)
>analysis_params[avail_params, ":="(
>  path2 = path
>  ,sheet2 = sheet
>  ,header2 = header
>  ,rn2 = rn
>), allow.cartesian = TRUE]
>setkey(analysis_params, n)
>
>
># Match actions to functions
>verb_function_map <- list(
>  "A_i__in__B" = c("In", "Not In"),
>  "A_i__in__B_i" = c("In (Same Row)", "Not In (Same
Row)"),
>  "A_i__substr__B_i" = c("Substring Of (Same Row)",
>                         "Not Substring Of (Same Row)"),
>  "A_i__unique" = c("Is Unique", "Not Unique")
>) %>% unlist
>names(verb_function_map) %<>% gsub("[0-9]+", "",
.)
>analysis_params[, fun := factor(verb)]
>levels(analysis_params$fun) %<>%
>  match(verb_function_map) %>%
>  "["(names(verb_function_map), .)
>analysis_params$fun %<>% as.character
>
>
>
># Read data sources
>
>message("Reading data sources...")
>
>data_names <- avail_params[, name]
>data_list <- replicate(length(data_names), list(), simplify = FALSE)
>names(data_list) <- data_names
>for (i in 1:nrow(avail_params))
>  data_list[[i]] <- with(avail_params[i], readSource(
>    file_path = path
>    , sheet_name = sheet
>    , header_row = header
>    , column_names = columns[[1]]
>    , rownames_name = rn
>  ))
>
>
>
>
># Analysis ####
>
>message("Performing comparisons...")
>
>reports <- analyses <- vector("list", nrow(analysis_params))
>names(reports) <- names(analyses) <- analysis_params[, outname]
>
>rowAnalysis2report <- function(analysis, params = list()) {
>
>  # Create a copy
>  x <- copy(analysis)
>
>  # Subset to logical_val of logical_col
>  setnames(x, setdiff(names(x), c("rn", "value")),
"logical_col")
>  x <- x[logical_col == !grepl("Not", params$verb)]
>  x[, logical_col := NULL]
>
>  # Re-encode
>  Encoding(x$value) <- "UTF-8"
>  if (is.character(x$rn))
>    Encoding(x$rn) <- "UTF-8"
>
>  # Unique results only
>  setkey(x, rn, value)
>  setcolorder(x, key(x))
>  x <- unique(x)
>
>  # Flatten if desired
>  if (params$outflat == "Yes") {
>    dlm <- Rdelim(params$dlm1)
>    if (!is.null(dlm)) {
>      if (dlm == "\\n") dlm <- "\n"
>      x <- x[, list(value = paste(value, collapse = dlm)), by = rn]
>    }
>  }
>
>  # Retrieve all columns if desired
>  setkey(x, rn)
>  if (params$outcols == "Yes") {
>    full_source <- copy(data_list[[params$name1]])
>    setkey(full_source, rn)
>    x <- x[full_source, nomatch = 0, allow.cartesian = TRUE]
>  }
>
>  # Rename results columns
>  if (is.null(params$rn1)) setnames(x, 1, "Row") else {
>    if (is.na(params$rn1) | params$rn1 == params$col1) setnames(x, 1,
>"Row")
>    else setnames(x, 1, params$rn1)
>  }
>  setnames(x, 2, params$col1)
>
>  return(x)
>
>}
>
>## Do it
>for (i in 1:nrow(analysis_params)) {
>  r <- analysis_params[i]
>  args <- list(
>    A = extractColumn(data_list[[r$name1]], r$col1, r$dlm1, r$rows1),
>    B = if (!is.na(r$name2))
>      extractColumn(data_list[[r$name2]], r$col2, r$dlm2, r$rows2),
>    case = (r$case == "Yes")
>  )
>  analyses[[i]] <- do.call(r$fun, args)
>  reports[[i]] <- rowAnalysis2report(analyses[[i]], r)
>  rm(r, args)
>}
>
>
>
>
>
>
># Output ####
>
>message("Writing results to output file...")
>
>detach("package:openxlsx")
>suppressPackageStartupMessages(library(xlsx))
>
># Output file
>exists <- TRUE
>i <- 0
>while (exists) {
>  out_file <- if (i > 0) {
>file.path(cd, sprintf("Comparison_Reports_%s_(%s).xlsx",
Sys.Date(),
>i))
>} else file.path(cd, sprintf("Comparison_Reports_%s.xlsx",
Sys.Date()))
>  exists <- file.exists(out_file)
>  if (!exists)
>    file.copy(parameterization_file, out_file)
>  i <- i + 1
>}
>
># Headers
>headers <- analysis_params[, lapply(.SD, as.character), .SDcols = c(
>  "outname", "col1", "verb", "col2",
"case", "name1", "name2",
>  "rows1", "rows2", "dlm1", "dlm2")]
>headers[, case := factor(case, c("Yes", "No"),
>                        c("(Case Sensitive)", "(Not Case
Sensitive)"))]
>headers[!is.na(col2), header_title := paste(col1, verb, col2, case)]
>headers[is.na(col2), header_title := paste(col1, verb, case)]
>headers[, header_time := Sys.time()]
>headers$header_col1 <- headers[, list(col1, name1, rows1, dlm1)] %>%
>  t %>%
>  as.data.table %>%
>  lapply(as.list) %>%
>  lapply(as.data.table) %>%
>  lapply(setnames, c("Column", "Source",
"Rows", "Delimiter")) %>%
>  lapply(as.list)
>headers$header_col2 <- headers[, list(col2, name2, rows2, dlm2)] %>%
>  t %>%
>  as.data.table %>%
>  lapply(as.list) %>%
>  lapply(as.data.table) %>%
>  lapply(setnames, c("Column", "Source",
"Rows", "Delimiter")) %>%
>  lapply(as.list)
>
>
># Write
>keep <- c(ls(), "i", "keep")
>
>## Loop through reports and write
>for (i in names(reports)) {
>
>  message(i, "...")
>
>  # Load workbook
>  wb <- loadWorkbook(out_file)
>
>  # Workbook styles
>
>  ## Header title
>  hd <- CellStyle(
>    wb,
>    alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
>"VERTICAL_TOP"),
>    font = Font(wb, heightInPoints = 16, isBold = TRUE)
>  )
>
>  ## Date
>  dt <- CellStyle(
>    wb,
>    alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
>"VERTICAL_TOP"),
>    dataFormat = DataFormat("m/d/yyyy h:mm:ss;@")
>  )
>
>  ## Parameters header
>  ph <- CellStyle(
>    wb,
>    alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
>"VERTICAL_TOP"),
>    font = Font(wb, isItalic = TRUE)
>  )
>
>  ## Column names header
>  cn <- CellStyle(
>    wb,
>    alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
>"VERTICAL_TOP"),
>    border = Border(position = c("BOTTOM", "TOP"),
>                    pen = c("BORDER_THIN",
"BORDER_MEDIUM")),
>    font = Font(wb, isBold = TRUE)
>  )
>
>  ## Column names header for reproduced data
>  cnr <- CellStyle(
>    wb,
>    alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
>"VERTICAL_TOP"),
>    border = Border(position = c("BOTTOM", "TOP"),
>                    pen = c("BORDER_THIN",
"BORDER_MEDIUM")),
>    font = Font(wb, isBold = TRUE, isItalic = TRUE)
>  )
>
>  ## Values
>  vl <- CellStyle(
>    wb,
>    alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
>"VERTICAL_TOP",
>                          wrapText = TRUE)
>  )
>
>  ## Values for reproduced data
>  vlr <- CellStyle(
>    wb,
>    alignment = Alignment(horizontal = "ALIGN_LEFT", vertical
>"VERTICAL_TOP",
>                          wrapText = TRUE),
>    font = Font(wb, isItalic = TRUE)
>  )
>
>
>  # Create sheet
>  sh <- createSheet(wb, i)
>
>  # Add header rows
>  h <- headers[outname == i]
>  addMergedRegion(sh, 1, 1, 1, 10)
>  addMergedRegion(sh, 2, 2, 1, 10)
>  rw <- createRow(sh, 1:2)
>  cl <- createCell(rw, 1)
>
>  ## Title
>  addDataFrame(h[, header_title], sh, FALSE, FALSE, 1, 1)
>  rw <- getRows(sh, 1)
>  cl <- getCells(rw)
>  lapply(cl, setCellStyle, hd)
>
>  ## Date
>  addDataFrame(h[, header_time], sh, FALSE, FALSE, 2, 1)
>  rw <- getRows(sh, 2)
>  cl <- getCells(rw)
>  lapply(cl, setCellStyle, dt)
>
>  ## Parameters
>addDataFrame(h[, header_col1] %>% as.data.frame, sh, TRUE, FALSE, 4, 1)
>  if (h[, !is.na(col2)])
>addDataFrame(h[, header_col2] %>% as.data.frame, sh, FALSE, FALSE, 6,
>1)
>  rw <- getRows(sh, 4)
>  cl <- getCells(rw)
>  lapply(cl, setCellStyle, ph)
>  rw <- getRows(sh, 5:6)
>  cl <- getCells(rw)
>  lapply(cl, setCellStyle, vl)
>
>  # Add report
>  addDataFrame(reports[[i]], sh, TRUE, FALSE, 8, 1)
>  nc <- ncol(reports[[i]])
>
>  ## Format column names
>  rw <- getRows(sh, 8)
>  cl <- getCells(rw, 1:2)
>  lapply(cl, setCellStyle, cn)
>  if (nc > 2)  {
>    cl <- getCells(rw, 3:nc)
>    lapply(cl, setCellStyle, cnr)
>  }
>
>  ## Format values
>  rw <- getRows(sh, 9:(nrow(reports[[i]]) + 9))
>  cl <- getCells(rw, 1:2)
>  lapply(cl, setCellStyle, vl)
>  if (nc > 2)  {
>    cl <- getCells(rw, 3:nc)
>    lapply(cl, setCellStyle, vlr)
>  }
>
>  ## Add autofilters
>  if (ncol(reports[[i]]) > 26) {
>    addAutoFilter(sh, sprintf("A8:%s%s%s",
>                              LETTERS[floor(ncol(reports[[i]]) / 26)],
>                              LETTERS[ncol(reports[[i]]) %% 26],
>                              nrow(reports[[i]]) + 9))
>  } else {
>    addAutoFilter(sh, sprintf("A8:%s%s",
LETTERS[ncol(reports[[i]])],
>                              nrow(reports[[i]]) + 9))
>  }
>
>  # Autofit columns
>  autoSizeColumn(sh, 1:ncol(reports[[i]]))
>
>  # Create freeze on report column names and results columns
>  if (nc > 2) createFreezePane(sh, rowSplit = 9, colSplit = 3) else
>    createFreezePane(sh, rowSplit = 9, colSplit = 1)
>
>  # Save
>  saveWorkbook(wb, out_file)
>  rm(list = setdiff(ls(), keep))
>
>}
>b
>
>	[[alternative HTML version deleted]]
>
>______________________________________________
>R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>https://stat.ethz.ch/mailman/listinfo/r-help
>PLEASE do read the posting guide
>http://www.R-project.org/posting-guide.html
>and provide commented, minimal, self-contained, reproducible code.

R help - Aug 2015 - Advanced Level Script for Traceability Between Worksheets

[R] Advanced Level Script for Traceability Between Worksheets

[R] Advanced Level Script for Traceability Between Worksheets