Merge branch 'master' into weights_ii

mlr-org · Dec 19, 2024 · 5f75f1c · 5f75f1c
2 parents 89202f5 + 2c2c16d
commit 5f75f1c
Show file tree

Hide file tree

Showing 121 changed files with 1,536 additions and 689 deletions.
diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml
@@ -44,7 +44,7 @@ jobs:
 
       - name: Deploy
         if: github.event_name != 'pull_request'
-        uses: JamesIves/github-pages-deploy-action@v4.6.3
+        uses: JamesIves/github-pages-deploy-action@v4.7.2
         with:
           clean: false
           branch: gh-pages

diff --git a/.gitignore b/.gitignore
@@ -24,7 +24,8 @@
 .LSOverride
 
 # Icon must end with two \r
-Icon
+Icon
+
 
 # Thumbnails
 ._*
@@ -180,3 +181,4 @@ revdep/
 
 # misc
 Meta/
+Rplots.pdf
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: mlr3
 Title: Machine Learning in R - Next Generation
-Version: 0.20.2.9000
+Version: 0.22.1.9000
 Authors@R:
   c(
     person("Michel", "Lang", , "[email protected]", role = "aut",
@@ -52,7 +52,7 @@ Imports:
     future.apply (>= 1.5.0),
     lgr (>= 0.3.4),
     mlbench,
-    mlr3measures (>= 0.6.0),
+    mlr3measures (>= 1.0.0),
     mlr3misc (>= 0.15.0),
     parallelly,
     palmerpenguins,
@@ -69,9 +69,7 @@ Suggests:
     remotes,
     RhpcBLASctl,
     rpart,
-    testthat (>= 3.1.0)
-Remotes:
-    mlr-org/mlr3measures
+    testthat (>= 3.2.0)
 Encoding: UTF-8
 Config/testthat/edition: 3
 Config/testthat/parallel: false
@@ -158,7 +156,7 @@ Collate:
     'TaskGeneratorSpirals.R'
     'TaskGeneratorXor.R'
     'TaskRegr.R'
-    'TaskRegr_boston_housing.R'
+    'TaskRegr_california_housing.R'
     'TaskRegr_mtcars.R'
     'TaskUnsupervised.R'
     'as_benchmark_result.R'
@@ -181,6 +179,7 @@ Collate:
     'benchmark.R'
     'benchmark_grid.R'
     'bibentries.R'
+    'default_fallback.R'
     'default_measures.R'
     'fix_factor_levels.R'
     'helper.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -74,6 +74,9 @@ S3method(col_info,DataBackend)
 S3method(col_info,data.table)
 S3method(create_empty_prediction_data,TaskClassif)
 S3method(create_empty_prediction_data,TaskRegr)
+S3method(default_fallback,Learner)
+S3method(default_fallback,LearnerClassif)
+S3method(default_fallback,LearnerRegr)
 S3method(default_values,Learner)
 S3method(default_values,LearnerClassifRpart)
 S3method(default_values,LearnerRegrRpart)
@@ -108,6 +111,11 @@ S3method(set_threads,list)
 S3method(set_validate,Learner)
 S3method(summary,Task)
 S3method(tail,Task)
+S3method(task_check_col_roles,Task)
+S3method(task_check_col_roles,TaskClassif)
+S3method(task_check_col_roles,TaskRegr)
+S3method(task_check_col_roles,TaskSupervised)
+S3method(task_check_col_roles,TaskUnsupervised)
 S3method(unmarshal_model,classif.debug_model_marshaled)
 S3method(unmarshal_model,default)
 S3method(unmarshal_model,learner_state_marshaled)
@@ -241,6 +249,7 @@ export(rsmp)
 export(rsmps)
 export(set_threads)
 export(set_validate)
+export(task_check_col_roles)
 export(tgen)
 export(tgens)
 export(tsk)

diff --git a/NEWS.md b/NEWS.md
@@ -5,25 +5,52 @@
   The weights used during training by the Learner are renamed to `weights_learner`, the previous column role `weight` is dysfunctional.
   Additionally, it is now possible to disable the use of weights via the new hyperparameter `use_weights`.
   Note that this is a breaking change, but appears to be the less error-prone solution in the long run.
-* refactor: Deprecated `data_format` and `data_formats` for Learners, Tasks, and DataBackends.
-* feat: The `partition()` function creates training, test and validation sets.
-* refactor: Optimize runtime of fixing factor levels.
-* refactor: Optimize runtime of setting row roles.
-* refactor: Optimize runtime of marshalling.
-* refactor: Optimize runtime of `Task$col_info`.
-* fix: Column info is now checked for compatibility during `Learner$predict` (#943).
+
+# mlr3 0.22.1
+
+* fix: Extend `assert_measure()` with checks for trained models in `assert_scorable()`.
+
+# mlr3 0.22.0
+
+* fix: Quantiles must not ascend with probabilities.
+* refactor: Replace `tsk("boston_housing")` with `tsk("california_housing")`.
+* feat: Require unique learner ids in `benchmark_grid()`.
+* BREAKING CHANGE: Remove ``$loglik()`` method from all learners.
+* fix: Ignore `future.globals.maxSize` when `future::plan("sequential")` is used.
+* feat: Add `$characteristics` field to `Task` to store additional information.
+
+# mlr3 0.21.1
+
+* feat: Throw warning when prediction and measure type do not match.
+* fix: The `mlr_reflections` were broken when an extension package was not loaded on the workers.
+  Extension packages must now register themselves in the `mlr_reflections$loaded_packages` field.
+
+# mlr3 0.21.0
+
+* BREAKING CHANGE: Deprecated `data_format` and `data_formats` for `Learner`, `Task`, and `DataBackend` classes.
+* feat: The `partition()` function creates training, test and validation sets now.
+* perf: Optimize the runtime of fixing factor levels.
+* perf: Optimize the runtime of setting row roles.
+* perf: Optimize the runtime of marshalling.
+* perf: Optimize the runtime of `Task$col_info`.
+* fix: column info is now checked for compatibility during `Learner$predict` (#943).
 * BREAKING CHANGE: The predict time of the learner now stores the cumulative duration for all predict sets (#992).
 * feat: `$internal_valid_task` can now be set to an `integer` vector.
 * feat: Measures can now have an empty `$predict_sets` (#1094).
   This is relevant for measures that only extract information from the model of a learner (such as internal validation scores or AIC / BIC)
-* refactor: Deprecated the `$divide()` method
-* fix: `Task$cbind()` now works with non-standard primary keys  for `data.frames` (#961).
+* BREAKING CHANGE: Deprecated the `$divide()` method
+* fix: `Task$cbind()` now works with non-standard primary keys for `data.frames` (#961).
 * fix: Triggering of fallback learner now has log-level `"info"` instead of `"debug"` (#972).
-* feat: Added new measure `pinballs `.
-* feat: Added new measure `mu_auc`.
+* feat: Added new measure `regr.pinball` here and in mlr3measures.
+* feat: Added new measure `mu_auc` here and in mlr3measures.
 * feat: Add option to calculate the mean of the true values on the train set in `msr("regr.rsq")`.
 * feat: Default fallback learner is set when encapsulation is activated.
-* feat: Learners classif.debug and regr.debug have new methods `$importance()` and `$selected_features()` for testing, also in downstream packages
+* feat: Learners `classif.debug` and `regr.debug` have new methods `$importance()` and `$selected_features()` for testing, also in downstream packages.
+* feat: Create default fallback learner with `default_fallback()`.
+* feat: Check column roles when using `$set_col_roles()` and `$col_roles`.
+* fix: Add predict set to learner hash.
+* BREAKING CHANGE: Encapsulation and the fallback learner are now set with the `$encapsulate(method, fallback)` method.
+  The `$fallback` field is read-only now and the encapsulate status can be retrieved from the `$encapsulation` field.
 
 # mlr3 0.20.2
 

diff --git a/R/BenchmarkResult.R b/R/BenchmarkResult.R
@@ -19,7 +19,7 @@
 #' @template param_measures
 #'
 #' @section S3 Methods:
-#' * `as.data.table(rr, ..., reassemble_learners = TRUE, convert_predictions = TRUE, predict_sets = "test")`\cr
+#' * `as.data.table(rr, ..., reassemble_learners = TRUE, convert_predictions = TRUE, predict_sets = "test", task_characteristics = FALSE)`\cr
 #'   [BenchmarkResult] -> [data.table::data.table()]\cr
 #'   Returns a tabular view of the internal data.
 #' * `c(...)`\cr
@@ -545,9 +545,17 @@ BenchmarkResult = R6Class("BenchmarkResult",
 )
 
 #' @export
-as.data.table.BenchmarkResult = function(x, ..., hashes = FALSE, predict_sets = "test") { # nolint
+as.data.table.BenchmarkResult = function(x, ..., hashes = FALSE, predict_sets = "test", task_characteristics = FALSE) { # nolint
+  assert_flag(task_characteristics)
   tab = get_private(x)$.data$as_data_table(view = NULL, predict_sets = predict_sets)
-  tab[, c("uhash", "task", "learner", "resampling", "iteration", "prediction"), with = FALSE]
+  tab = tab[, c("uhash", "task", "learner", "resampling", "iteration", "prediction"), with = FALSE]
+
+  if (task_characteristics) {
+    set(tab, j = "characteristics", value = map(tab$task, "characteristics"))
+    tab = unnest(tab, "characteristics")
+  }
+
+  tab[]
 }
 
 #' @export

diff --git a/R/DataBackend.R b/R/DataBackend.R
@@ -90,7 +90,8 @@ DataBackend = R6Class("DataBackend", cloneable = FALSE,
     #' This is deprecated and will be removed in the future.
     data_formats = deprecated_binding("DataBackend$data_formats", "data.table"),
 
-    #' @template field_hash
+    #' @field hash (`character(1)`)\cr
+    #' Hash (unique identifier) for this object.
     hash = function(rhs) {
       if (missing(rhs)) {
         if (is.na(private$.hash)) {