Displaying 2 results from an estimated 2 matches for "impurity_l".
Did you mean:
impurity_r
2011 Jun 13
1
In rpart, how is "improve" calculated? (in the "class" case)
...for the case when y has only 0 and 1
categories...
-sum(p*log(p,2))
}
gini <- function(p) {sum(p*(1-p))}
obs_1 <- y[x>.5]
obs_0 <- y[x<.5]
n_l <- sum(x>.5)
n_R <- sum(x<.5)
n <- length(x)
# for entropy (information)
impurity_root <- entropy(prop.table(table(y)))
impurity_l <- entropy(prop.table(table(obs_0)))
impurity_R <-entropy(prop.table(table(obs_1)))
# shouldn't this have been "improve" ??
impurity_root - ((n_l/n)*impurity_l + (n_R/n)*impurity_R) # 0.7272
# for "gini"
impurity_root <- gini(prop.table(table(y)))
impurity_l <-...
2011 Jun 21
0
How does rpart computes "improve" for split="information"?? (which seems to be different then the "gini" case)
...(1324)
y <- sample(c(0,1), 20, T)
x <- y
x[1:5] <- 0
# manually making the first split
obs_L <- y[x<.5]
obs_R <- y[x>.5]
n_L <- sum(x<.5)
n_R <- sum(x>.5)
n <- length(x)
calc.impurity <- function(func = gini)
{
impurity_root <- func(prop.table(table(y)))
impurity_L <- func(prop.table(table(obs_L)))
impurity_R <-func(prop.table(table(obs_R)))
imp <- impurity_root - ((n_L/n)*impurity_l + (n_R/n)*impurity_R) # 0.3757
imp*n
}
# for "gini"
require(rpart)
fit <- rpart(y~x, method = "class", parms=list(split='gini'))
fit$sp...