thr3ads.net - R devel - [Rd] returning information from functions via attributes rather than return list [Jan 2012]

If this information is useful, please help other people find it:
Share via:

Paul Johnson

2012-Jan-03 20:08 UTC

[Rd] returning information from functions via attributes rather than return list

I would like to ask for advice from R experts about the benefits or
dangers of using attr to return information with an object that is
returned from a function. I have a feeling as though I have cheated by
using attributes, and wonder if I've done something fishy.

Maybe I mean to ask, where is the dividing line between attributes and
instance variables?  The separation is not clear in my mind anymore.

Background: I paste below a function that takes in a regression object
and make changes to the data and/or call and then run a
revised regression.  In my earlier effort, I was building a return
list, including the new fitted regression object plus some
variables that have information about the changes that a were made.

That creates some inconvenience, however.  When the regression is in a
list object, then methods for lm objects don't apply to that result
object. The return is not an lm anymore.  I either need to write
custom methods for every function or remember to extract the object
from the list before sending to the generic function.

I *guessed* it would work to write the new information as object
attributes, and it seems to work. There is a generic function
"meanCenter" and a method "meanCenter.default". At the end
of
meanCenter.default, here's my use (or abuse) of attributes.

  res <- eval(mc)
  class(res) <- c("mcreg", class(model))
  attr(res, "centeredVars") <- nc
  attr(res, "centerCall") <-  match.call()
  res

I wrote print and summary methods, but other methods that work for lm
objects like plot will also work for these new ones.



meanCenter <- function(model, centerOnlyInteractors=TRUE,
centerDV=FALSE, standardize=FALSE, centerContrasts = F){
  UseMethod("meanCenter")
}

meanCenter.default <- function(model, centerOnlyInteractors=TRUE,
centerDV=FALSE, standardize=FALSE, centerContrasts = F){

  std <- function(x) {
    if( !is.numeric(x) ){
      stop("center.lm tried to center a factor variable. No Can Do!")
    } else {
      scale(x, center = TRUE, scale = standardize)
    }
  }

  rdf <- get_all_vars(formula(model), model$model) #raw data frame
  t <- terms(model)
  tl <- attr(t, "term.labels")
  tmdc <- attr(t, "dataClasses") ##term model data classes

  isNumeric <- names(tmdc)[ which(tmdc %in% c("numeric"))]
  isFac <-  names(tmdc)[ which(tmdc %in% c("factor"))]
  if (tmdc[1] != "numeric") stop("Sorry, DV not a single numeric
column")

  ##Build "nc", a vector of variable names that "need
centering"
  ##
  if (!centerDV) {
    if (centerOnlyInteractors == FALSE){
      nc <- isNumeric[-1] #-1 excludes response
      unique(nc)
    }else{
      interactTerms <- tl[grep(":", tl)]
      nc <- unique(unlist(strsplit( interactTerms, ":")))
      nc <-  nc[which(nc %in% isNumeric)]
    }
  }else{
    if (centerOnlyInteractors == FALSE){
      nc <- isNumeric
    }else{
      interactTerms <- tl[grep(":", tl)]
      nc <- unique(unlist(strsplit( interactTerms, ":")))
      nc <- nc[which(nc %in% isNumeric)]
      nc <- c( names(tmdc)[1] , nc)
    }
  }


  mc <- model$call
  # run same model call, replacing non centered data with centered data.
  ## if no need to center factor contrasts:
  if (!centerContrasts)
    {
      stddat <- rdf
      for (i in nc) stddat[ , i] <- std( stddat[, i])
      mc$data <- quote(stddat)
    }else{
      ##dm: design matrix, only includes intercept and predictors
      dm <- model.matrix(model, data=rdf, contrasts.arg model$contrasts, xlev
= model$xlevels)
      ##contrastIdx: indexes of contrast variables in dm
      contrastIdx <- which(attr(dm, "assign")== match(isFac, tl))
      contrastVars <- colnames(dm)[contrastIdx]
      nc <- c(nc, contrastVars)

      dm <- as.data.frame(dm)

      hasIntercept <- attr(t, "intercept")
      if (hasIntercept) dm <- dm[ , -1] # removes intercept, column 1

      dv <- rdf[ ,names(tmdc)[1]] #tmdc[1] is response variable name
      dm <- cbind(dv, dm)
      colnames(dm)[1] <- names(tmdc)[1] #put colname for dv

      dmnames <- colnames(dm)
      hasColon <- dmnames[grep(":", dmnames)]
      dm <- dm[ , -match(hasColon, dmnames)] ##remove vars with colons
(lm will recreate)

      ##Now, standardise the variables that need standardizing
      for (i in nc) dm[ , i] <- std( dm[, i])


      fmla <- formula(paste(dmnames[1], " ~ ",  paste(dmnames[-1],
collapse=" + ")))
      cat("This fitted model will use those centered variables\n")
      cat("Model-constructed interactions such as \"x1:x3\" are
built
from centered variables\n")
      mc$formula <- formula(fmla)
      mc$data <-  quote(dm)
    }

  cat("These variables", nc, "Are centered in the design matrix
\n")

  res <- eval(mc)
  class(res) <- c("mcreg", class(model))
  attr(res, "centeredVars") <- nc
  attr(res, "centerCall") <-  match.call()
  res
}

summary.mcreg <- function(object, ...){
  nc <- attr(object, "centeredVars")
  cat("The centered variables were: \n")
  print(nc)
  cat("Even though the variables here have the same names as their
non-centered counterparts, I assure you these are centered.\n")
  mc <- attr(object, "centerCall")
  cat("These results were produced from: \n")
  print(mc)
  NextMethod(generic = "summary", object = object, ...)
}


print.mcreg <- function(x, ...){
  nc <- attr(x, "centeredVars")
  cat("The centered variables were: \n")
  print(nc)
  cat("Even though the variables here have the same names as their
non-centered counterparts, I assure you these are centered.\n")
  mc <- attr(x, "centerCall")
  cat("These results were produced from: \n")
  print(mc)
  NextMethod(generic = "print", object = x, ...)
}


-- 
Paul E. Johnson
Professor, Political Science
1541 Lilac Lane, Room 504
University of Kansas

Simon Urbanek

2012-Jan-03 21:59 UTC

head link

[Rd] returning information from functions via attributes rather than return list

Paul,

On Jan 3, 2012, at 3:08 PM, Paul Johnson wrote:
> I would like to ask for advice from R experts about the benefits or
> dangers of using attr to return information with an object that is
> returned from a function. I have a feeling as though I have cheated by
> using attributes, and wonder if I've done something fishy.
> 
> Maybe I mean to ask, where is the dividing line between attributes and
> instance variables?  The separation is not clear in my mind anymore.
> 
> Background: I paste below a function that takes in a regression object
> and make changes to the data and/or call and then run a
> revised regression.  In my earlier effort, I was building a return
> list, including the new fitted regression object plus some
> variables that have information about the changes that a were made.
> 
> That creates some inconvenience, however.  When the regression is in a
> list object, then methods for lm objects don't apply to that result
> object. The return is not an lm anymore.
Why don't you just subclass it? That's the "normal" way of
doing things - you simply add additional entries for your subclass (e.g.
m$myItem1, m$myItem2, ...), prepend your new subclass name and you're done.
You can still dispatch on your subclass before the superclass while superclass
methods just work as well..

Cheers,
Simon

>  I either need to write
> custom methods for every function or remember to extract the object
> from the list before sending to the generic function.
> 
> I *guessed* it would work to write the new information as object
> attributes, and it seems to work. There is a generic function
> "meanCenter" and a method "meanCenter.default". At the
end of
> meanCenter.default, here's my use (or abuse) of attributes.
> 
>  res <- eval(mc)
>  class(res) <- c("mcreg", class(model))
>  attr(res, "centeredVars") <- nc
>  attr(res, "centerCall") <-  match.call()
>  res
> 
> I wrote print and summary methods, but other methods that work for lm
> objects like plot will also work for these new ones.
> 
> 
> 
> meanCenter <- function(model, centerOnlyInteractors=TRUE,
> centerDV=FALSE, standardize=FALSE, centerContrasts = F){
>  UseMethod("meanCenter")
> }
> 
> meanCenter.default <- function(model, centerOnlyInteractors=TRUE,
> centerDV=FALSE, standardize=FALSE, centerContrasts = F){
> 
>  std <- function(x) {
>    if( !is.numeric(x) ){
>      stop("center.lm tried to center a factor variable. No Can
Do!")
>    } else {
>      scale(x, center = TRUE, scale = standardize)
>    }
>  }
> 
>  rdf <- get_all_vars(formula(model), model$model) #raw data frame
>  t <- terms(model)
>  tl <- attr(t, "term.labels")
>  tmdc <- attr(t, "dataClasses") ##term model data classes
> 
>  isNumeric <- names(tmdc)[ which(tmdc %in% c("numeric"))]
>  isFac <-  names(tmdc)[ which(tmdc %in% c("factor"))]
>  if (tmdc[1] != "numeric") stop("Sorry, DV not a single
numeric column")
> 
>  ##Build "nc", a vector of variable names that "need
centering"
>  ##
>  if (!centerDV) {
>    if (centerOnlyInteractors == FALSE){
>      nc <- isNumeric[-1] #-1 excludes response
>      unique(nc)
>    }else{
>      interactTerms <- tl[grep(":", tl)]
>      nc <- unique(unlist(strsplit( interactTerms, ":")))
>      nc <-  nc[which(nc %in% isNumeric)]
>    }
>  }else{
>    if (centerOnlyInteractors == FALSE){
>      nc <- isNumeric
>    }else{
>      interactTerms <- tl[grep(":", tl)]
>      nc <- unique(unlist(strsplit( interactTerms, ":")))
>      nc <- nc[which(nc %in% isNumeric)]
>      nc <- c( names(tmdc)[1] , nc)
>    }
>  }
> 
> 
>  mc <- model$call
>  # run same model call, replacing non centered data with centered data.
>  ## if no need to center factor contrasts:
>  if (!centerContrasts)
>    {
>      stddat <- rdf
>      for (i in nc) stddat[ , i] <- std( stddat[, i])
>      mc$data <- quote(stddat)
>    }else{
>      ##dm: design matrix, only includes intercept and predictors
>      dm <- model.matrix(model, data=rdf, contrasts.arg >
model$contrasts, xlev = model$xlevels)
>      ##contrastIdx: indexes of contrast variables in dm
>      contrastIdx <- which(attr(dm, "assign")== match(isFac,
tl))
>      contrastVars <- colnames(dm)[contrastIdx]
>      nc <- c(nc, contrastVars)
> 
>      dm <- as.data.frame(dm)
> 
>      hasIntercept <- attr(t, "intercept")
>      if (hasIntercept) dm <- dm[ , -1] # removes intercept, column 1
> 
>      dv <- rdf[ ,names(tmdc)[1]] #tmdc[1] is response variable name
>      dm <- cbind(dv, dm)
>      colnames(dm)[1] <- names(tmdc)[1] #put colname for dv
> 
>      dmnames <- colnames(dm)
>      hasColon <- dmnames[grep(":", dmnames)]
>      dm <- dm[ , -match(hasColon, dmnames)] ##remove vars with colons
> (lm will recreate)
> 
>      ##Now, standardise the variables that need standardizing
>      for (i in nc) dm[ , i] <- std( dm[, i])
> 
> 
>      fmla <- formula(paste(dmnames[1], " ~ ", 
paste(dmnames[-1],
> collapse=" + ")))
>      cat("This fitted model will use those centered variables\n")
>      cat("Model-constructed interactions such as \"x1:x3\"
are built
> from centered variables\n")
>      mc$formula <- formula(fmla)
>      mc$data <-  quote(dm)
>    }
> 
>  cat("These variables", nc, "Are centered in the design
matrix \n")
> 
>  res <- eval(mc)
>  class(res) <- c("mcreg", class(model))
>  attr(res, "centeredVars") <- nc
>  attr(res, "centerCall") <-  match.call()
>  res
> }
> 
> summary.mcreg <- function(object, ...){
>  nc <- attr(object, "centeredVars")
>  cat("The centered variables were: \n")
>  print(nc)
>  cat("Even though the variables here have the same names as their
> non-centered counterparts, I assure you these are centered.\n")
>  mc <- attr(object, "centerCall")
>  cat("These results were produced from: \n")
>  print(mc)
>  NextMethod(generic = "summary", object = object, ...)
> }
> 
> 
> print.mcreg <- function(x, ...){
>  nc <- attr(x, "centeredVars")
>  cat("The centered variables were: \n")
>  print(nc)
>  cat("Even though the variables here have the same names as their
> non-centered counterparts, I assure you these are centered.\n")
>  mc <- attr(x, "centerCall")
>  cat("These results were produced from: \n")
>  print(mc)
>  NextMethod(generic = "print", object = x, ...)
> }
> 
> 
> -- 
> Paul E. Johnson
> Professor, Political Science
> 1541 Lilac Lane, Room 504
> University of Kansas
> 
> ______________________________________________
> R-devel at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-devel
> 
>

Possibly Parallel Threads

Search for more seemingly similar threads

R devel - Jan 2012 - returning information from functions via attributes rather than return list

[Rd] returning information from functions via attributes rather than return list

[Rd] returning information from functions via attributes rather than return list

Possibly Parallel Threads