Neha gupta
2019-Dec-25 16:50 UTC
[R] Something is wrong; all the MAE metric values are missing
Hi I am using Simulated annealing to tune the parameters of xgbtree for regression dataset. When I run the code to tune the parameters of SVM and RF, it works but when I run the same code for xgbTree, it gives stops and give error: Something is wrong; all the MAE metric values are missing: RMSE Rsquared MAE Min. : NA Min. : NA Min. : NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA Median : NA Median : NA Median : NA Mean :NaN Mean :NaN Mean :NaN 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA Max. : NA Max. : NA Max. : NA NA's :1 NA's :1 NA's :1 The code is given below: library(xgboost) d=readARFF("ant.arff") dput( head( d, 50 ) ) d-> structure(list(version = c(1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7, 1.7), wmc = c(3, 5, 1, 8, 9, 3, 20, 13, 9, 7, 9, 3, 1, 9, 19, 10, 3, 20, 6, 3, 5, 1, 11, 3, 3, 16, 4, 15, 11, 2, 15, 14, 27, 5, 3, 4, 6, 55, 3, 8, 11, 10, 1, 3, 9, 7, 9, 63, 6, 2), dit c(1, 2, 2, 1, 3, 2, 1, 1, 1, 5, 6, 2, 1, 1, 4, 4, 2, 1, 1, 4, 2, 1, 2, 4, 2, 3, 4, 1, 3, 3, 3, 4, 3, 4, 1, 5, 1, 3, 2, 1, 2, 1, 3, 2, 1, 1, 1, 1, 3, 1), noc = c(0, 0, 0, 9, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2, 2, 0), cbo = c(10, 4, 1, 13, 5, 7, 4, 7, 5, 9, 5, 19, 10, 3, 7, 13, 2, 7, 8, 5, 7, 1, 7, 5, 5, 15, 12, 11, 3, 1, 9, 11, 8, 8, 2, 5, 2, 20, 4, 4, 1, 10, 1, 12, 32, 24, 4, 61, 20, 6), rfc = c(18, 13, 3, 20, 26, 4, 40, 28, 19, 25, 17, 10, 1, 12, 40, 26, 5, 79, 6, 11, 14, 1, 26, 15, 7, 65, 5, 65, 24, 3, 41, 29, 101, 23, 3, 19, 11, 106, 19, 22, 23, 29, 2, 22, 36, 8, 19, 144, 13, 2), lcom = c(3, 0, 0, 12, 16, 1, 130, 20, 8, 0, 26, 3, 0, 20, 129, 0, 0, 136, 15, 1, 6, 0, 0, 3, 1, 76, 0, 75, 17, 1, 0, 85, 157, 4, 3, 6, 3, 1313, 3, 0, 39, 43, 0, 3, 36, 3, 30, 1603, 7, 1), ca = c(1, 1, 0, 9, 0, 6, 0, 2, 4, 6, 0, 14, 8, 1, 0, 7, 1, 5, 7, 1, 0, 0, 5, 0, 1, 0, 8, 0, 0, 1, 0, 3, 5, 4, 2, 0, 1, 8, 0, 0, 1, 7, 0, 2, 32, 24, 1, 51, 17, 6), ce = c(9, 4, 1, 4, 5, 1, 4, 5, 1, 3, 5, 5, 2, 2, 7, 7, 1, 5, 1, 4, 7, 1, 2, 5, 4, 15, 4, 11, 3, 0, 9, 8, 3, 8, 0, 5, 1, 12, 4, 4, 0, 3, 1, 10, 0, 0, 3, 10, 3, 0), npm = c(1, 4, 1, 8, 7, 2, 18, 12, 9, 7, 6, 3, 1, 8, 16, 9, 3, 10, 6, 1, 4, 1, 11, 2, 3, 13, 3, 11, 10, 2, 15, 11, 15, 5, 3, 4, 4, 9, 2, 6, 10, 8, 1, 2, 7, 7, 7, 31, 3, 2), loc = c(106, 76, 7, 101, 185, 16, 345, 183, 119, 255, 71, 38, 1, 54, 252, 110, 34, 835, 6, 41, 70, 1, 181, 51, 29, 483, 18, 443, 309, 7, 204, 108, 1286, 249, 4, 57, 53, 1133, 136, 169, 136, 91, 4, 72, 281, 43, 130, 2303, 56, 2), moa = c(0, 1, 0, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 3, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 1, 0), ic = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0), cbm c(0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 0, 0, 0, 3, 1, 2, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 2, 0, 7, 2, 0, 4, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 0), max_cc = c(1, 1, 0, 1, 2, 1, 3, 7, 3, 9, 3, 1, 1, 1, 6, 3, 1, 10, 1, 3, 4, 1, 1, 2, 1, 9, 1, 5, 11, 1, 2, 4, 4, 1, 1, 2, 1, 10, 2, 7, 1, 3, 0, 1, 11, 1, 4, 35, 1, 1), bug c(0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0)), row.names = c(NA, 50L), class = "data.frame") index <- createDataPartition(log10(d$bug), p = .70,list = FALSE) tr <- d[index, ] ts <- d[-index, ] index_2 <- createFolds(tr$bug, returnTrain = TRUE, list = TRUE) ctrl <- trainControl(method = "repeatedcv", index = index_2) obj <- function(param, maximize = FALSE) { mod <- train(bug ~ ., data = tr, method = "xgbTree", preProc = c("center", "scale", "zv"), metric = "MAE", trControl = ctrl, tuneGrid = data.frame(nrounds = (param[1]), max_depth (param[2]), eta=(param[3]), gamma=(param[4]), colsample_bytree= (param[5]), min_child_weight=(param[6]), subsample=(param[7]))) if(maximize) -getTrainPerf(mod)[, "TrainMAE"] else getTrainPerf(mod)[, "TrainMAE"] } num_mods <- 50 ## Simulated annealing from base R set.seed(30218) tic() san_res <- optim(par = c(20, 1, 0.1, 0, 0.1, 1, 0.1), fn = obj, method "SANN", control = list(maxit = num_mods)) san_res [[alternative HTML version deleted]]
Ivan Krylov
2019-Dec-25 17:06 UTC
[R] Something is wrong; all the MAE metric values are missing
Try printing the value of `param` in your `obj` function before calling train() or getTrainPerf(). Optimizers are prone to giving unexpected values [*] in trying to lower the loss function. It might be the case of an unconstrained optimizer leaving the realm of the feasible because no-one told it not to. -- Best regards, Ivan [*] See e.g. https://arxiv.org/abs/1803.03453 for examples of very unorthodox ways of gaming the fitness fuction