Hi all,
I was looking at the data frame subscription operator (attached in the end
of this e-mail) and got puzzled by the following line:
class(x) <- attr(x, "row.names") <- NULL
This appears to set the class and row.names attributes of the incoming data
frame to NULL. So far I was not able to figure out why this is necessary -
could anyone help ?
The reason I am looking at it is that changing attributes forces duplication
of the data frame and this is the largest cause of slowness of data.frames in
general.
thank you very much !
Vladimir Dergachev
> `[.data.frame`
function (x, i, j, drop = if (missing(i)) TRUE else length(cols) = 1)
{
mdrop <- missing(drop)
Narg <- nargs() - (!mdrop)
if (Narg < 3) {
if (!mdrop)
warning("drop argument will be ignored")
if (missing(i))
return(x)
if (is.matrix(i))
return(as.matrix(x)[i])
y <- NextMethod("[")
nm <- names(y)
if (!is.null(nm) && any(is.na(nm)))
stop("undefined columns selected")
if (any(duplicated(nm)))
names(y) <- make.unique(nm)
return(structure(y, class = oldClass(x), row.names = attr(x,
"row.names")))
}
rows <- attr(x, "row.names")
cols <- names(x)
cl <- oldClass(x)
class(x) <- attr(x, "row.names") <- NULL
if (missing(i)) {
if (!missing(j))
x <- x[j]
cols <- names(x)
if (any(is.na(cols)))
stop("undefined columns selected")
}
else {
if (is.character(i))
i <- pmatch(i, as.character(rows), duplicates.ok = TRUE)
rows <- rows[i]
if (!missing(j)) {
x <- x[j]
cols <- names(x)
if (any(is.na(cols)))
stop("undefined columns selected")
}
for (j in seq_along(x)) {
xj <- x[[j]]
x[[j]] <- if (length(dim(xj)) != 2)
xj[i]
else xj[i, , drop = FALSE]
}
}
if (drop) {
drop <- FALSE
n <- length(x)
if (n == 1) {
x <- x[[1]]
drop <- TRUE
}
else if (n > 1) {
xj <- x[[1]]
nrow <- if (length(dim(xj)) == 2)
dim(xj)[1]
else length(xj)
if (!mdrop && nrow == 1) {
drop <- TRUE
names(x) <- cols
attr(x, "row.names") <- NULL
}
}
}
if (!drop) {
names(x) <- cols
if (any(is.na(rows) | duplicated(rows))) {
rows[is.na(rows)] <- "NA"
rows <- make.unique(rows)
}
if (any(duplicated(nm <- names(x))))
names(x) <- make.unique(nm)
attr(x, "row.names") <- rows
class(x) <- cl
}
x
}
<environment: namespace:base>