Merge pull request #407 from mlr-org/pipeop_ipcw

add pipeop IPCW draft
mlr-org · Oct 8, 2024 · 323bfea · 323bfea
2 parents a3318b7 + 69e13df
commit 323bfea
Show file tree

Hide file tree

Showing 81 changed files with 1,871 additions and 479 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -23,3 +23,4 @@ README.html
 ^\.vscode$
 ^\.lintr$
 ^\.pre-commit-config\.yaml$
+^pkgdown/_pkgdown\.yml$
diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml
@@ -42,7 +42,7 @@ jobs:
         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
         shell: Rscript {0}
 
-      - name: Deploy
+      - name: Deploy to GitHub pages 🚀
         if: github.event_name != 'pull_request'
         uses: JamesIves/[email protected]
         with:

diff --git a/.gitignore b/.gitignore
@@ -104,4 +104,5 @@ src/*.so
 src/*.dll
 CRAN-RELEASE
 .vscode
-check/*
+check/*
+docs
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: mlr3proba
 Title: Probabilistic Supervised Learning for 'mlr3'
-Version: 0.6.8
+Version: 0.6.9
 Authors@R:
     c(person(given = "Raphael",
              family = "Sonabend",
@@ -70,6 +70,7 @@ Suggests:
     GGally,
     knitr,
     lgr,
+    lifecycle,
     mlr3pipelines (>= 0.3.4),
     param6 (>= 0.2.4),
     pracma,
@@ -138,6 +139,7 @@ Collate:
     'PipeOpCrankCompositor.R'
     'PipeOpDistrCompositor.R'
     'PipeOpPredClassifSurvDiscTime.R'
+    'PipeOpPredClassifSurvIPCW.R'
     'PipeOpTransformer.R'
     'PipeOpPredTransformer.R'
     'PipeOpPredRegrSurv.R'
@@ -147,6 +149,7 @@ Collate:
     'PipeOpSurvAvg.R'
     'PipeOpTaskRegrSurv.R'
     'PipeOpTaskSurvClassifDiscTime.R'
+    'PipeOpTaskSurvClassifIPCW.R'
     'PipeOpTaskSurvRegr.R'
     'PipeOpTaskTransformer.R'
     'PredictionDataDens.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -72,6 +72,7 @@ export(PipeOpBreslow)
 export(PipeOpCrankCompositor)
 export(PipeOpDistrCompositor)
 export(PipeOpPredClassifSurvDiscTime)
+export(PipeOpPredClassifSurvIPCW)
 export(PipeOpPredRegrSurv)
 export(PipeOpPredSurvRegr)
 export(PipeOpPredTransformer)
@@ -80,6 +81,7 @@ export(PipeOpResponseCompositor)
 export(PipeOpSurvAvg)
 export(PipeOpTaskRegrSurv)
 export(PipeOpTaskSurvClassifDiscTime)
+export(PipeOpTaskSurvClassifIPCW)
 export(PipeOpTaskSurvRegr)
 export(PipeOpTaskTransformer)
 export(PipeOpTransformer)
@@ -99,6 +101,7 @@ export(assert_surv_matrix)
 export(breslow)
 export(get_mortality)
 export(pecs)
+export(pipeline_survtoclassif_IPCW)
 export(pipeline_survtoclassif_disctime)
 export(pipeline_survtoregr)
 export(plot_probregr)

diff --git a/NEWS.md b/NEWS.md
@@ -1,26 +1,36 @@
+# mlr3proba 0.6.9
+
+* New `PipeOp`s: `PipeOpTaskSurvClassifIPCW`, `PipeOpPredClassifSurvIPCW`
+* New pipeline (**reduction method**): `pipeline_survtoclassif_IPCW`
+* Improved the way Integrated Brier score handles the `times` argument and the `t_max`, especially when the survival matrix has one time point (column)
+* Improved documentation of integrated survival scores
+* Improved documentation of all pipelines
+* Temp fix of math-rendering issue in package website
+* Add experimental `lifecycle` badge for 3 pipelines (`survtoregr`, `distrcompositor` and `probregr`) - these are currently either not supported by literature or tested enough.
+
 # mlr3proba 0.6.8
 
-- `Rcpp` code optimizations
-- Fixed ERV scoring to comply with `mlr3` dev version (no bugs before)
-- Skipping `survtoregr` pipelines due to bugs (to be refactored in the future)
+* `Rcpp` code optimizations
+* Fixed ERV scoring to comply with `mlr3` dev version (no bugs before)
+* Skipping `survtoregr` pipelines due to bugs (to be refactored in the future)
 
 # mlr3proba 0.6.7
 
-- Deprecate `crank` to `distr` composition in `distrcompose` pipeop (only from `lp` => `distr` works now)
-- Add `get_mortality()` function (from `survivalmodels::surv_to_risk()`
-- Add Rcpp function `assert_surv_matrix()`
-- Update and simplify `crankcompose` pipeop and respective pipeline (no `response` is created anymore)
-- Add `responsecompositor` pipeline with `rmst` and `median`
+* Deprecate `crank` to `distr` composition in `distrcompose` pipeop (only from `lp` => `distr` works now)
+* Add `get_mortality()` function (from `survivalmodels::surv_to_risk()`
+* Add Rcpp function `assert_surv_matrix()`
+* Update and simplify `crankcompose` pipeop and respective pipeline (no `response` is created anymore)
+* Add `responsecompositor` pipeline with `rmst` and `median`
 
 # mlr3proba 0.6.6
 
-- Small fixes and refactoring to the discrete-time pipeops
+* Small fixes and refactoring to the discrete-time pipeops
 
 # mlr3proba 0.6.5
 
 * Add support for discrete-time survival analysis
 * New `PipeOp`s: `PipeOpTaskSurvClassifDiscTime`, `PipeOpPredClassifSurvDiscTime`
-* New pipeline: `pipeline_survtoclassif`
+* New pipeline (**reduction method**): `pipeline_survtoclassif_disctime`
 
 # mlr3proba 0.6.4
 

diff --git a/R/MeasureSurvGraf.R b/R/MeasureSurvGraf.R
@@ -19,28 +19,31 @@
 #' or squared survival loss.
 #'
 #' @details
-#' For an individual who dies at time \eqn{t}, with predicted Survival function, \eqn{S}, the
-#' Graf Score at time \eqn{t^*}{t*} is given by
-#' \deqn{L_{ISBS}(S,t|t^*) = [(S(t^*)^2)I(t \le t^*, \delta = 1)(1/G(t))] + [((1 - S(t^*))^2)I(t > t^*)(1/G(t^*))]}
+#' This measure has two dimensions: (test set) observations and time points.
+#' For a specific individual \eqn{i} from the test set, with observed survival
+#' outcome \eqn{(t_i, \delta_i)} (time and censoring indicator) and predicted
+#' survival function \eqn{S_i(t)}, the *observation-wise* loss integrated across
+#' the time dimension up to the time cutoff \eqn{\tau^*}, is:
+#'
+#' \deqn{L_{ISBS}(S_i, t_i, \delta_i) = \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0  \frac{S_i^2(\tau) \text{I}(t_i \leq \tau, \delta=1)}{G(t_i)} + \frac{(1-S_i(\tau))^2 \text{I}(t_i > \tau)}{G(\tau)} \ d\tau}
+#'
 #' where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution.
 #'
-#' The re-weighted ISBS (RISBS) is given by
-#' \deqn{L_{RISBS}(S,t|t^*) = [(S(t^*)^2)I(t \le t^*, \delta = 1)(1/G(t))] + [((1 - S(t^*))^2)I(t > t^*)(1/G(t))]}
-#' where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution, i.e. always
-#' weighted by \eqn{G(t)}.
-#' RISBS is strictly proper when the censoring distribution is independent
-#' of the survival distribution and when G is fit on a sufficiently large dataset.
-#' ISBS is never proper. Use `proper = FALSE` for ISBS and `proper = TRUE` for RISBS.
-#' Results may be very different if many observations are
-#' censored at the last observed time due to division by 1/`eps` in `proper = TRUE`.
+#' The **re-weighted ISBS** (RISBS) is:
+#'
+#' \deqn{L_{RISBS}(S_i, t_i, \delta_i) = \delta_i \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0  \frac{S_i^2(\tau) \text{I}(t_i \leq \tau) + (1-S_i(\tau))^2 \text{I}(t_i > \tau)}{G(t_i)} \ d\tau}
 #'
-#' **Note**: If comparing the integrated graf score to other packages, e.g.
-#' \CRANpkg{pec}, then `method = 2` should be used. However the results may
-#' still be very slightly different as this package uses `survfit` to estimate
-#' the censoring distribution, in line with the Graf 1999 paper; whereas some
-#' other packages use `prodlim` with `reverse = TRUE` (meaning Kaplan-Meier is
-#' not used).
+#' which is always weighted by \eqn{G(t_i)} and is equal to zero for a censored subject.
 #'
+#' To get a single score across all \eqn{N} observations of the test set, we
+#' return the average of the time-integrated observation-wise scores:
+#' \deqn{\sum_{i=1}^N L(S_i, t_i, \delta_i) / N}
+#'
+#' @template properness
+#' @templateVar improper_id ISBS
+#' @templateVar proper_id RISBS
+#' @template which_times
+#' @template details_method
 #' @template details_trainG
 #' @template details_tmax
 #'
@@ -95,16 +98,22 @@ MeasureSurvGraf = R6::R6Class("MeasureSurvGraf",
   private = list(
     .score = function(prediction, task, train_set, ...) {
       ps = self$param_set$values
+      # times must be unique, sorted and positive numbers
+      times = assert_numeric(ps$times, lower = 0, any.missing = FALSE,
+                             unique = TRUE, sorted = TRUE, null.ok = TRUE)
+      # ERV score
       if (ps$ERV) return(.scoring_rule_erv(self, prediction, task, train_set))
-      nok = sum(!is.null(ps$times), !is.null(ps$t_max), !is.null(ps$p_max)) > 1
+
+      nok = sum(!is.null(times), !is.null(ps$t_max), !is.null(ps$p_max)) > 1
       if (nok) {
         stop("Only one of `times`, `t_max`, and `p_max` should be provided")
       }
+
       if (!ps$integrated) {
         msg = "If `integrated=FALSE` then `times` should be a scalar numeric."
-        assert_numeric(ps$times, len = 1L, .var.name = msg)
+        assert_numeric(times, len = 1L, .var.name = msg)
       } else {
-        if (!is.null(ps$times) && length(ps$times) == 1L) {
+        if (!is.null(times) && length(times) == 1L) {
           ps$integrated = FALSE
         }
       }
@@ -118,9 +127,10 @@ MeasureSurvGraf = R6::R6Class("MeasureSurvGraf",
         train = NULL
       }
 
+      # `score` is a matrix, IBS(i,j) => n_test_obs x times
       score = weighted_survival_score("graf",
         truth = prediction$truth,
-        distribution = prediction$data$distr, times = ps$times,
+        distribution = prediction$data$distr, times = times,
         t_max = ps$t_max, p_max = ps$p_max, proper = ps$proper, train = train,
         eps = ps$eps
       )

diff --git a/R/MeasureSurvIntLogloss.R b/R/MeasureSurvIntLogloss.R
@@ -17,22 +17,31 @@
 #' Logarithmic (log) Loss, aka integrated cross entropy.
 #'
 #' @details
-#' For an individual who dies at time \eqn{t}, with predicted Survival function, \eqn{S}, the
-#' probabilistic log loss at time \eqn{t^*}{t*} is given by
-#' \deqn{L_{ISLL}(S,t|t^*) = - [log(1 - S(t^*))I(t \le t^*, \delta = 1)(1/G(t))] - [log(S(t^*))I(t > t^*)(1/G(t^*))]}
+#' This measure has two dimensions: (test set) observations and time points.
+#' For a specific individual \eqn{i} from the test set, with observed survival
+#' outcome \eqn{(t_i, \delta_i)} (time and censoring indicator) and predicted
+#' survival function \eqn{S_i(t)}, the *observation-wise* loss integrated across
+#' the time dimension up to the time cutoff \eqn{\tau^*}, is:
+#'
+#' \deqn{L_{ISLL}(S_i, t_i, \delta_i) = -\text{I}(t_i \leq \tau^*) \int^{\tau^*}_0  \frac{log[1-S_i(\tau)] \text{I}(t_i \leq \tau, \delta=1)}{G(t_i)} + \frac{\log[S_i(\tau)] \text{I}(t_i > \tau)}{G(\tau)} \ d\tau}
+#'
 #' where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution.
 #'
-#' The re-weighted ISLL, RISLL is given by
-#' \deqn{L_{RISLL}(S,t|t^*) = - [log(1 - S(t^*))I(t \le t^*, \delta = 1)(1/G(t))] - [log(S(t^*))I(t > t^*)(1/G(t))]}
-#' where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution, i.e. always
-#' weighted by \eqn{G(t)}.
-#' RISLL is strictly proper when the censoring distribution is independent
-#' of the survival distribution and when G is fit on a sufficiently large dataset.
-#' ISLL is never proper.
-#' Use `proper = FALSE` for ISLL and `proper = TRUE` for RISLL.
-#' Results may be very different if many observations are censored at the last
-#' observed time due to division by 1/`eps` in `proper = TRUE`.
+#' The **re-weighted ISLL** (RISLL) is:
+#'
+#' \deqn{L_{RISLL}(S_i, t_i, \delta_i) = -\delta_i \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0  \frac{\log[1-S_i(\tau)]) \text{I}(t_i \leq \tau) + \log[S_i(\tau)] \text{I}(t_i > \tau)}{G(t_i)} \ d\tau}
+#'
+#' which is always weighted by \eqn{G(t_i)} and is equal to zero for a censored subject.
+#'
+#' To get a single score across all \eqn{N} observations of the test set, we
+#' return the average of the time-integrated observation-wise scores:
+#' \deqn{\sum_{i=1}^N L(S_i, t_i, \delta_i) / N}
 #'
+#' @template properness
+#' @templateVar improper_id ISLL
+#' @templateVar proper_id RISLL
+#' @template which_times
+#' @template details_method
 #' @template details_trainG
 #' @template details_tmax
 #'
@@ -87,17 +96,22 @@ MeasureSurvIntLogloss = R6::R6Class("MeasureSurvIntLogloss",
   private = list(
     .score = function(prediction, task, train_set, ...) {
       ps = self$param_set$values
-
+      # times must be unique, sorted and positive numbers
+      times = assert_numeric(ps$times, lower = 0, any.missing = FALSE,
+                             unique = TRUE, sorted = TRUE, null.ok = TRUE)
+      # ERV score
       if (ps$ERV) return(.scoring_rule_erv(self, prediction, task, train_set))
-      nok = sum(!is.null(ps$times), !is.null(ps$t_max), !is.null(ps$p_max)) > 1
+
+      nok = sum(!is.null(times), !is.null(ps$t_max), !is.null(ps$p_max)) > 1
       if (nok) {
         stop("Only one of `times`, `t_max`, and `p_max` should be provided")
       }
+
       if (!ps$integrated) {
         msg = "If `integrated=FALSE` then `times` should be a scalar numeric."
-        assert_numeric(ps$times, len = 1L, .var.name = msg)
+        assert_numeric(times, len = 1L, .var.name = msg)
       } else {
-        if (!is.null(ps$times) && length(ps$times) == 1L) {
+        if (!is.null(times) && length(times) == 1L) {
           ps$integrated = FALSE
         }
       }
@@ -111,9 +125,13 @@ MeasureSurvIntLogloss = R6::R6Class("MeasureSurvIntLogloss",
         train = NULL
       }
 
-      score = weighted_survival_score("intslogloss", truth = prediction$truth,
-        distribution = prediction$data$distr, times = ps$times, t_max = ps$t_max,
-        p_max = ps$p_max, proper = ps$proper, train = train, eps = ps$eps)
+      # `score` is a matrix, IBS(i,j) => n_test_obs x times
+      score = weighted_survival_score("intslogloss",
+        truth = prediction$truth,
+        distribution = prediction$data$distr, times = times,
+        t_max = ps$t_max, p_max = ps$p_max, proper = ps$proper, train = train,
+        eps = ps$eps
+      )
 
       if (ps$se) {
         integrated_se(score, ps$integrated)

diff --git a/R/MeasureSurvLogloss.R b/R/MeasureSurvLogloss.R
@@ -16,19 +16,19 @@
 #' The Log Loss, in the context of probabilistic predictions, is defined as the
 #' negative log probability density function, \eqn{f}, evaluated at the
 #' observation time (event or censoring), \eqn{t},
-#' \deqn{L_{NLL}(f, t) = -log(f(t))}
+#' \deqn{L_{NLL}(f, t) = -\log[f(t)]}
 #'
 #' The standard error of the Log Loss, L, is approximated via,
 #' \deqn{se(L) = sd(L)/\sqrt{N}}{se(L) = sd(L)/\sqrt N}
 #' where \eqn{N} are the number of observations in the test set, and \eqn{sd} is the standard
 #' deviation.
 #'
-#' The **Re-weighted Negative Log-Likelihood** (RNLL) or IPCW Log Loss is defined by
-#' \deqn{L_{RNLL}(f, t, \Delta) = -\Delta log(f(t))/G(t)}
-#' where \eqn{\Delta} is the censoring indicator and G is the Kaplan-Meier estimator of the
+#' The **Re-weighted Negative Log-Likelihood** (RNLL) or IPCW (Inverse Probability Censoring Weighted) Log Loss is defined by
+#' \deqn{L_{RNLL}(f, t, \delta) = - \frac{\delta \log[f(t)]}{G(t)}}
+#' where \eqn{\delta} is the censoring indicator and \eqn{G(t)} is the Kaplan-Meier estimator of the
 #' censoring distribution.
 #' So only observations that have experienced the event are taking into account
-#' for RNLL and both \eqn{f(t), G(t)} are calculated only at the event times.
+#' for RNLL (i.e. \eqn{\delta = 1}) and both \eqn{f(t), G(t)} are calculated only at the event times.
 #' If only censored observations exist in the test set, `NaN` is returned.
 #'
 #' @template details_trainG
-Original file line number
+Diff line change
@@ Expand Up / @@ -104,4 +104,5 @@ src/*.so @@
     src/*.dll
     CRAN-RELEASE
     .vscode
-    check/*
+    check/*
+    docs