mlr-org
diff --git a/‎DESCRIPTION
Lines changed: 3 additions & 5 deletions b/‎DESCRIPTION
Lines changed: 3 additions & 5 deletions
diff --git a/‎NAMESPACE
Lines changed: 0 additions & 1 deletion b/‎NAMESPACE
Lines changed: 0 additions & 1 deletion
diff --git a/‎NEWS.md
Lines changed: 13 additions & 0 deletions b/‎NEWS.md
Lines changed: 13 additions & 0 deletions
diff --git a/‎R/MeasureCompRisksAUC.R
Lines changed: 4 additions & 4 deletions b/‎R/MeasureCompRisksAUC.R
Lines changed: 4 additions & 4 deletions
diff --git a/‎R/MeasureSurvAUC.R
Lines changed: 1 addition & 2 deletions b/‎R/MeasureSurvAUC.R
Lines changed: 1 addition & 2 deletions
diff --git a/‎R/MeasureSurvCalibrationAlpha.R
Lines changed: 8 additions & 8 deletions b/‎R/MeasureSurvCalibrationAlpha.R
Lines changed: 8 additions & 8 deletions
diff --git a/‎R/MeasureSurvCalibrationBeta.R
Lines changed: 14 additions & 12 deletions b/‎R/MeasureSurvCalibrationBeta.R
Lines changed: 14 additions & 12 deletions
diff --git a/‎R/MeasureSurvChamblessAUC.R
Lines changed: 2 additions & 1 deletion b/‎R/MeasureSurvChamblessAUC.R
Lines changed: 2 additions & 1 deletion
diff --git a/‎R/MeasureSurvCindex.R
Lines changed: 29 additions & 34 deletions b/‎R/MeasureSurvCindex.R
Lines changed: 29 additions & 34 deletions
diff --git a/‎R/MeasureSurvDCalibration.R
Lines changed: 6 additions & 19 deletions b/‎R/MeasureSurvDCalibration.R
Lines changed: 6 additions & 19 deletions
@@ -1,6 +1,6 @@
 Package: mlr3proba
 Title: Probabilistic Supervised Learning for 'mlr3'
-Version: 0.8.0
+Version: 0.8.1
 Authors@R: c(
     person("Raphael", "Sonabend", , "[email protected]", role = "aut",
            comment = c(ORCID = "0000-0001-9225-4654")),
@@ -153,16 +153,14 @@ Collate:
     'autoplot.R'
     'bibentries.R'
     'breslow.R'
-    'cindex.R'
     'data.R'
+    'helper_measures.R'
     'helpers.R'
     'histogram.R'
-    'integrated_scores.R'
     'mlr3proba-package.R'
     'pecs.R'
     'pipelines.R'
     'plot_probregr.R'
-    'scoring_rule_erv.R'
-    'surv_measures.R'
     'surv_return.R'
+    'weighted_survival_score.R'
     'zzz.R'
@@ -39,7 +39,6 @@ S3method(pecs,PredictionSurv)
 S3method(pecs,list)
 S3method(plot,TaskDens)
 S3method(plot,TaskSurv)
-export(.c_weight_survival_score)
 export(.surv_return)
 export(LearnerCompRisks)
 export(LearnerCompRisksAalenJohansen)
 
@@ -1,3 +1,16 @@
+# mlr3proba 0.8.1
+
+* feat: `surv.logloss` and `surv.rcll` now use linear interpolation of S(t) to calculate the density f(t)
+* fix: `surv.mae`/`surv.mse`/`surv.rmse` scores return `NA` when test set has only censored observations
+* fix: fix bug in msr(`surv.brier`) that resulted in 0 division instead of `eps` division (`Inf` values are filtered out so this was kinda masking the inflation of ISBS)
+* refactor: remove `se` argument from most of the scores (not practically used)
+* refactor: remove `method` argument from integrated survival scores (the previous default, `method = 2`, time-weighted integration, is now always used)
+* **BREACKING CHANGE**: we removed all experimental `proper` scoring rules (and `remove_obs` argument).
+Scores yield the same results as before with the default option `proper = FALSE`
+* refactor: all private functions start with `.` now and are adequately (privately) documented. Code was refactored for clarity
+* refactor: all internal `Rcpp` measure functions
+* refine doc in lots of measures
+
 # mlr3proba 0.8.0
 
 * Compatibility with `mlr3` v1.0.0 (`weights_learner`) and `mlr3pipelines` v0.8.0
 
@@ -97,10 +97,10 @@ MeasureCompRisksAUC = R6Class(
         cif_mat = cif[[as.character(cause)]]
 
         # get CIF on the time horizon
-        mat = interpolate_cif(cif_mat, new_times = time_horizon)
+        mat = .interp_cif(cif_mat, eval_times = time_horizon)
 
         # calculate AUC(t) score
-        res = riskRegression_score(
+        res = .riskRegr_score(
           mat_list = list(mat),
           metric = "auc",
           data = data,
@@ -118,10 +118,10 @@ MeasureCompRisksAUC = R6Class(
           cif_mat = cif[[cause]]
 
           # get CIF on the time horizon
-          mat = interpolate_cif(cif_mat, new_times = time_horizon)
+          mat = .interp_cif(cif_mat, eval_times = time_horizon)
 
           # calculate AUC(t) score
-          res = riskRegression_score(
+          res = .riskRegr_score(
             mat_list = list(mat),
             metric = "auc",
             data = data,
 
@@ -22,7 +22,7 @@ MeasureSurvAUC = R6Class("MeasureSurvAUC",
 
       super$initialize(
         id = id,
-        range = 0:1,
+        range = c(0, 1),
         minimize = FALSE,
         packages = "survAUC",
         predict_type = "lp",
@@ -36,7 +36,6 @@ MeasureSurvAUC = R6Class("MeasureSurvAUC",
 
   private = list(
     .score = function(prediction, learner, task, train_set, FUN, ...) {
-
       args = list()
       ps = self$param_set$values
 
 
@@ -6,13 +6,13 @@
 #'
 #' @description
 #' This calibration method is defined by estimating
-#' \deqn{\hat{\alpha} = \sum \delta_i / \sum H_i(T_i)}
-#' where \eqn{\delta} is the observed censoring indicator from the test data,
-#' \eqn{H_i} is the predicted cumulative hazard, and \eqn{T_i} is the observed
-#' survival time (event or censoring).
+#' \deqn{\hat{\alpha} = \frac{\sum_{i=1}^n \delta_i}{\sum_{i=1}^n H_i(T_i)}}
+#' where \eqn{\delta} is the observed censoring indicator from the test data
+#' \eqn{n} observations), \eqn{H_i} is the predicted cumulative hazard, and \eqn{T_i}
+#' is the observed survival time (event or censoring).
 #'
 #' The standard error is given by
-#' \deqn{\hat{\alpha_{se}} = exp(1/\sqrt{\sum \delta_i})}
+#' \deqn{\hat{\alpha_{se}} = e^{1/\sqrt{\sum \delta_i}}}
 #'
 #' The model is well calibrated if the estimated \eqn{\hat{\alpha}} coefficient
 #' (returned score) is equal to 1.
@@ -75,11 +75,11 @@ MeasureSurvCalibrationAlpha = R6Class("MeasureSurvCalibrationAlpha",
       truth = prediction$truth
       all_times = truth[, 1L] # both event times and censoring times
       status = truth[, 2L]
-      deaths = sum(status)
+      n_events = sum(status)
 
       ps = self$param_set$values
       if (ps$se) {
-        return(exp(1 / sqrt(deaths)))
+        return(exp(1 / sqrt(n_events)))
       } else {
         distr = prediction$data$distr
 
@@ -113,7 +113,7 @@ MeasureSurvCalibrationAlpha = R6Class("MeasureSurvCalibrationAlpha",
         # Inf => case where censoring occurs at last time point
         # 0   => case where survival probabilities are all 1
         cumhaz[cumhaz == Inf | cumhaz == 0] = ps$eps
-        out = deaths / sum(cumhaz)
+        out = n_events / sum(cumhaz)
 
         if (ps$method == "diff") {
           out = abs(1 - out)
 
@@ -6,7 +6,7 @@
 #' This calibration method fits the predicted linear predictor from a Cox PH
 #' model as the only predictor in a new Cox PH model with the test data as
 #' the response.
-#' \deqn{h(t|x) = h_0(t)exp(\beta \times lp)}
+#' \deqn{h(t|x) = h_0(t)e^{\beta \times lp}}
 #' where \eqn{lp} is the predicted linear predictor on the test data.
 #'
 #' The model is well calibrated if the estimated \eqn{\hat{\beta}} coefficient
@@ -56,7 +56,8 @@ MeasureSurvCalibrationBeta = R6Class("MeasureSurvCalibrationBeta",
         predict_type = "lp",
         label = "Van Houwelingen's Beta",
         man = "mlr3proba::mlr_measures_surv.calib_beta",
-        param_set = ps
+        param_set = ps,
+        properties = "na_score"
       )
     }
   ),
@@ -68,21 +69,22 @@ MeasureSurvCalibrationBeta = R6Class("MeasureSurvCalibrationBeta",
 
       if (inherits(fit, "try-error")) {
         return(NA)
-      } else {
-        ps = self$param_set$values
+      }
 
-        if (ps$se) {
-          return(fit$coefficients[, "se(coef)"])
-        } else {
-          out = fit$coefficients[, "coef"]
+      ps = self$param_set$values
 
-          if (ps$method == "diff") {
-            out = abs(1 - out)
-          }
+      if (ps$se) {
+        return(fit$coefficients[, "se(coef)"])
+      } else {
+        out = fit$coefficients[, "coef"]
 
-          return(out)
+        if (ps$method == "diff") {
+          out = abs(1 - out)
         }
+
+        return(out)
       }
+
     }
   )
 )
 
@@ -42,8 +42,9 @@ MeasureSurvChamblessAUC = R6Class("MeasureSurvChamblessAUC",
   private = list(
     .score = function(prediction, learner, task, train_set, ...) {
       if (!inherits(learner, "LearnerSurvCoxPH")) {
-        stop("surv.chambless_auc only compatible with Cox PH models")
+        stop("Only compatible with Cox PH models")
       }
+
       ps = self$param_set$values
       if (!ps$integrated) {
         msg = "If `integrated=FALSE` then `times` should be a scalar numeric."
 
@@ -44,7 +44,7 @@
 #' Weighting applied to tied rankings, default is to give them half (0.5) weighting.
 #'
 #' @references
-#' `r format_bib("peto_1972", "harrell_1982", "goenen_2005", "schemper_2009", "uno_2011")`
+#' `r format_bib("peto_1972", "harrell_1982", "gonen_2005", "schemper_2009", "uno_2011")`
 #'
 #' @template param_range
 #' @template param_minimize
@@ -90,11 +90,10 @@ MeasureSurvCindex = R6Class("MeasureSurvCindex",
 
       super$initialize(
         id = "surv.cindex",
-        range = 0:1,
+        range = c(0, 1),
         minimize = FALSE,
-        packages = character(),
         predict_type = "crank",
-        properties = character(),
+        properties = "na_score",
         label = "Concordance Index",
         man = "mlr3proba::mlr_measures_surv.cindex",
         param_set = ps
@@ -108,43 +107,39 @@ MeasureSurvCindex = R6Class("MeasureSurvCindex",
     .score = function(prediction, task, train_set, ...) {
       ps = self$param_set$values
 
-      # calculate t_max (cutoff time horizon)
-      if (is.null(ps$t_max) && !is.null(ps$p_max)) {
+      # Determine cutoff time horizon (t_max)
+      t_max = ps$t_max
+      if (is.null(t_max) && !is.null(ps$p_max)) {
         truth = prediction$truth
-        unique_times = unique(sort(truth[, "time"]))
+        unique_times = unique(sort(truth[, 1L]))
         surv = survival::survfit(truth ~ 1)
-        indx = which(1 - (surv$n.risk / surv$n) > ps$p_max)
-        if (length(indx) == 0L) {
-          t_max = NULL # t_max calculated in `cindex()`
-        } else {
-          # first time point that surpasses the specified
-          # `p_max` proportion of censoring
-          t_max = surv$time[indx[1L]]
-        }
-      } else {
-        t_max = ps$t_max
+        censored_proportion = 1 - (surv$n.risk / surv$n)
+        indx = which(censored_proportion > ps$p_max)
+
+        # First time point that surpasses `p_max` censoring
+        t_max = if (length(indx) > 0L) surv$time[indx[1L]] else NULL
       }
 
-      if (ps$weight_meth == "GH") {
-        return(gonen(prediction$crank, ps$tiex))
-      } else if (ps$weight_meth == "I") {
-        return(cindex(prediction$truth, prediction$crank, t_max, ps$weight_meth, ps$tiex))
-      } else {
-        if (is.null(task) | is.null(train_set)) {
-          stop("'task' and 'train_set' required for all weighted C-indexes (except GH).")
-        }
-        return(cindex(prediction$truth, prediction$crank, t_max, ps$weight_meth,
-                      ps$tiex, task$truth(train_set), ps$eps))
+      # Select weighting method
+      weight_meth = ps$weight_meth
+
+      if (weight_meth == "I") {
+        return(.cindex(prediction$truth, prediction$crank, t_max, weight_meth, ps$tiex))
       }
+
+      if (weight_meth == "GH") {
+        return(.gonen(prediction$crank, ps$tiex))
+      }
+
+      # All other methods require task and train_set
+      if (is.null(task) || is.null(train_set)) {
+        stopf("'task' and 'train_set' are required for weighted C-index method '%s'", weight_meth)
+      }
+
+      train_truth = task$truth(train_set)
+      .cindex(prediction$truth, prediction$crank, t_max, weight_meth, ps$tiex, train_truth, ps$eps)
     }
   )
 )
 
-gonen = function(crank, tiex) {
-  assert_numeric(crank, any.missing = FALSE)
-  assert_number(tiex)
-
-  c_gonen(sort(crank), tiex)
-}
-
 register_measure("surv.cindex", MeasureSurvCindex)
@@ -89,26 +89,13 @@ MeasureSurvDCalibration = R6Class("MeasureSurvDCalibration",
       true_times = prediction$truth[, 1L]
 
       # predict individual probability of death at observed event time
-      # bypass distr6 construction if possible
-      if (inherits(prediction$data$distr, "array")) {
-        surv = prediction$data$distr
-        if (length(dim(surv)) == 3) {
-          # survival 3d array, extract median
-          surv = .ext_surv_mat(arr = surv, which.curve = 0.5)
-        }
-        times = as.numeric(colnames(surv))
+      surv = .get_surv_matrix(prediction)
+      times = as.numeric(colnames(surv))
 
-        extend_times = getFromNamespace("C_Vec_WeightedDiscreteCdf", ns = "distr6")
-        si = diag(extend_times(true_times, times, cdf = t(1 - surv), FALSE, FALSE))
-      } else {
-        distr = prediction$distr
-        if (inherits(distr, c("Matdist", "Arrdist"))) {
-          si = diag(distr$survival(true_times))
-        } else { # VectorDistribution or single Distribution, e.g. WeightDisc()
-          si = as.numeric(distr$survival(data = matrix(true_times, nrow = 1L)))
-        }
-      }
-      # remove zeros
+      extend_times = getFromNamespace("C_Vec_WeightedDiscreteCdf", ns = "distr6")
+      si = diag(extend_times(true_times, times, cdf = t(1 - surv), FALSE, FALSE))
+
+      # replace zeros
       si = map_dbl(si, function(.x) max(.x, 1e-5))
       # index of associated bucket
       js = ceiling(B * si)