Tree Methods

set.seed(31415L)

lrns = list(
  lrn("classif.rpart", id = "rpart_md1", maxdepth = 1, predict_type = "prob"),
  lrn("classif.rpart", id = "rpart_md5", maxdepth = 5, predict_type = "prob"),
  lrn("classif.rpart", id = "rpart_md20", maxdepth = 20, predict_type = "prob"),
  lrn("classif.ranger", id = "rf_mtryr0.2", mtry.ratio = 0.2, predict_type = "prob"),
  lrn("classif.ranger", id = "rf_mtryr0.5", mtry.ratio = 0.5, predict_type = "prob"),
  lrn("classif.ranger", id = "rf_mtry0.8", mtry.ratio = 0.8, predict_type = "prob"))

cv5 = rsmp("cv", folds = 5)
cv5$instantiate(task)

bmr = benchmark(benchmark_grid(task, lrns, cv5))

INFO  [19:03:34.711] [mlr3] Running benchmark with 30 resampling iterations
INFO  [19:03:34.784] [mlr3] Applying learner 'rpart_md1' on task 'german_credit' (iter 1/5)
INFO  [19:03:34.816] [mlr3] Applying learner 'rpart_md1' on task 'german_credit' (iter 2/5)
INFO  [19:03:34.847] [mlr3] Applying learner 'rpart_md1' on task 'german_credit' (iter 3/5)
INFO  [19:03:34.878] [mlr3] Applying learner 'rpart_md1' on task 'german_credit' (iter 4/5)
INFO  [19:03:34.910] [mlr3] Applying learner 'rpart_md1' on task 'german_credit' (iter 5/5)
INFO  [19:03:34.940] [mlr3] Applying learner 'rpart_md5' on task 'german_credit' (iter 1/5)
INFO  [19:03:34.971] [mlr3] Applying learner 'rpart_md5' on task 'german_credit' (iter 2/5)
INFO  [19:03:35.009] [mlr3] Applying learner 'rpart_md5' on task 'german_credit' (iter 3/5)
INFO  [19:03:35.041] [mlr3] Applying learner 'rpart_md5' on task 'german_credit' (iter 4/5)
INFO  [19:03:35.072] [mlr3] Applying learner 'rpart_md5' on task 'german_credit' (iter 5/5)
INFO  [19:03:35.073] [mlr3] Applying learner 'rpart_md20' on task 'german_credit' (iter 1/5)
INFO  [19:03:35.119] [mlr3] Applying learner 'rpart_md20' on task 'german_credit' (iter 2/5)
INFO  [19:03:35.160] [mlr3] Applying learner 'rpart_md20' on task 'german_credit' (iter 3/5)
INFO  [19:03:35.202] [mlr3] Applying learner 'rpart_md20' on task 'german_credit' (iter 4/5)
INFO  [19:03:35.244] [mlr3] Applying learner 'rpart_md20' on task 'german_credit' (iter 5/5)
INFO  [19:03:35.286] [mlr3] Applying learner 'rf_mtryr0.2' on task 'german_credit' (iter 1/5)
INFO  [19:03:35.330] [mlr3] Applying learner 'rf_mtryr0.2' on task 'german_credit' (iter 2/5)
INFO  [19:03:35.373] [mlr3] Applying learner 'rf_mtryr0.2' on task 'german_credit' (iter 3/5)
INFO  [19:03:35.422] [mlr3] Applying learner 'rf_mtryr0.2' on task 'german_credit' (iter 4/5)
INFO  [19:03:35.467] [mlr3] Applying learner 'rf_mtryr0.2' on task 'german_credit' (iter 5/5)
INFO  [19:03:35.518] [mlr3] Applying learner 'rf_mtryr0.5' on task 'german_credit' (iter 1/5)
INFO  [19:03:35.571] [mlr3] Applying learner 'rf_mtryr0.5' on task 'german_credit' (iter 2/5)
INFO  [19:03:35.626] [mlr3] Applying learner 'rf_mtryr0.5' on task 'german_credit' (iter 3/5)
INFO  [19:03:35.683] [mlr3] Applying learner 'rf_mtryr0.5' on task 'german_credit' (iter 4/5)
INFO  [19:03:35.738] [mlr3] Applying learner 'rf_mtryr0.5' on task 'german_credit' (iter 5/5)
INFO  [19:03:35.794] [mlr3] Applying learner 'rf_mtry0.8' on task 'german_credit' (iter 1/5)
INFO  [19:03:35.850] [mlr3] Applying learner 'rf_mtry0.8' on task 'german_credit' (iter 2/5)
INFO  [19:03:35.902] [mlr3] Applying learner 'rf_mtry0.8' on task 'german_credit' (iter 3/5)
INFO  [19:03:35.955] [mlr3] Applying learner 'rf_mtry0.8' on task 'german_credit' (iter 4/5)
INFO  [19:03:36.014] [mlr3] Applying learner 'rf_mtry0.8' on task 'german_credit' (iter 5/5)
INFO  [19:03:36.324] [mlr3] Finished benchmark

mlr3viz::autoplot(bmr, measure = msr("classif.ce"))

Looking at the boxplots reveals that the performance of the learners highly depends on the choice of the hyperparameters.

Follow up question: How to properly set the hyperparameters? Answer: Hyperparameter optimization (see next use case)

Related

Leave a Reply Cancel reply