Zahyah Alharbi (CMP)
2016-Nov-25 14:47 UTC
[R] error with caretEnsmble of different training datasets of SVM - need help
Hi, The following is a reproducible example , what basically I am trying to do , is creating five imputed datasets then apply SVM to each imputed dataset using the train function in caret, then ensemble the resulted training model using caretEnsemble. Lastly, I am predicting each test set using the ensemble model. However, I have this error (Error: { .... is not TRUE) occur with caretEnsemble although I converted the list of the resulted model to caretlist. Any help is truly appreciated. library(mice) library(e1071) library(caret) library("caretEnsemble") data <- iris #Generate 10% missing values at Random iris.mis <- prodNA(iris, noNA = 0.1) #remove categorical variables iris.mis <- subset(iris.mis, select = -c(Species)) # 5 Imputation using mice pmm imp <- mice(iris.mis, m=5, maxit = 10, method = 'pmm', seed = 500) # save 5 imputed dataset. x1 <- complete(imp, action = 1, include = FALSE) x2 <- complete(imp, action = 2, include = FALSE) x3 <- complete(imp, action = 3, include = FALSE) x4 <- complete(imp, action = 4, include = FALSE) x5 <- complete(imp, action = 5, include = FALSE) ## Apply the following method with 10 fold across validation for each imputed set and Compute rmse for each imputed set avg.rmse <- NULL avg.foldrmse <- matrix(data = NA,nrow=5, ncol=1) SDofMean.rmse <- NULL form <- iris$Sepal.Width # target coloumn fold <- 10 # number of fold for cross validation n <- nrow(x1) # since all data sample are the same length prop <- n%/%fold set.seed(7) newseq <- rank(runif(n)) k <- as.factor((newseq - 1)%/%prop + 1) y <- unlist(strsplit(as.character(form), " "))[2] vec.error <- vector(length = fold) for (i in seq(fold)) { avg.foldrmse <- NULL # Perfrom SVM method on each imputed dataset fit1 <- train(Sepal.Width ~., data = x1[k != i, ],method='svmLinear2') fit2 <- train(Sepal.Width ~., data = x2[k != i, ],method='svmLinear2') fit3 <- train(Sepal.Width ~., data = x3[k != i, ],method='svmLinear2') fit4 <- train(Sepal.Width ~., data = x4[k != i, ],method='svmLinear2') fit5 <- train(Sepal.Width ~., data = x5[k != i, ],method='svmLinear2') #combine in the created model to a list svm.fit <- list(svmLinear1 = fit1, svmLinear2 = fit2, svmLinear3 = fit3, svmLinear4 = fit4, svmLinear5 = fit5) # convert the list to cartlist class(svm.fit) <- "caretList" #create the ensemble where the error occur. svm.all <- caretEnsemble(svm.fit,method='svmLinear2') # predict the 5 test set using the ensemble model and compute the RMSE fcast1 <- predict(svm.all, newdata = x1[k == i, ]) rmse1 <- sqrt(mean((x1[k == i, ]$Sepal.Width - fcast1)^2)) avg.foldrmse[1] <- rmse1 # predict using test set of the Second imputed dataset fcast2 <- predict(svm.all, newdata = x2[k == i, ]) rmse2 <- sqrt(mean((x2[k == i, ]$Sepal.Width- fcast2)^2)) avg.foldrmse[2] <- rmse2 # predict using test set of the Third imputed dataset fcast3 <- predict(svm.all, newdata = x3[k == i, ]) rmse3 <- sqrt(mean((x3[k == i, ]$Sepal.Width- fcast3)^2)) avg.foldrmse[3] <- rmse3 # predict using test set of the fourth imputed dataset fcast4 <- predict(svm.all, newdata = x4[k == i, ]) rmse4 <- sqrt(mean((x4[k == i, ]$Sepal.Width - fcast4)^2)) avg.foldrmse[4] <- rmse4 # predict using test set of the fifth imputed dataset fcast5 <- predict(svm.all, newdata = x5[k == i, ]) rmse5 <- sqrt(mean((x5[k == i, ]$Sepal.Width - fcast5)^2)) avg.foldrmse[5] <- rmse5 }# end loop Regards, Zawahy [[alternative HTML version deleted]]