Dear R-helpers,
Can you help me to see why "code 1" gives error
while "code 2" runs fine? The only difference in
the data is the distribution of age categories.
I am attaching the session after the code.
Many thanks.
XL
library(survival)
library(rpart)
# code 1
n <- 20
age <- rep(1:3, c(2, 3, 15))
eg<- data.frame(rexp(n), rbinom(n,1,prob=.3), age=age)
names(eg) <- c("surv", "status", "age")
rpart(Surv(surv, status)~age, data=eg)
# code 2
n <- 20
age <- rep(1:3, c(5, 5, 10))
eg<- data.frame(rexp(n), rbinom(n,1,prob=.3), age=age)
names(eg) <- c("surv", "status", "age")
rpart(Surv(surv, status)~age, data=eg)
# my session:
> library(rpart)
> # code 1
> n <- 20
> age <- rep(1:3, c(2, 3, 15))
> eg<- data.frame(rexp(n), rbinom(n,1,prob=.3),
age=age) > names(eg) <- c("surv", "status", "age")
> rpart(Surv(surv, status)~age, data=eg)
Error in "$<-.data.frame"(`*tmp*`, "yval2", value c(1,
7)) :
replacement has 2 rows, data has 1>
> # code 2
> n <- 20
> age <- rep(1:3, c(5, 5, 10))
> eg<- data.frame(rexp(n), rbinom(n,1,prob=.3),
age=age) > names(eg) <- c("surv", "status", "age")
> rpart(Surv(surv, status)~age, data=eg)
n= 20
node), split, n, deviance, yval
* denotes terminal node
1) root 20 19.007310 1.0000000
2) age>=2.5 10 9.673372 0.8230355 *
3) age< 2.5 10 9.027225 1.1922660 *