% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MCBoostSurv.R
\name{MCBoostSurv}
\alias{MCBoostSurv}
\title{Multi-Calibration Boosting}
\description{
Implements Multi-Calibration Boosting by Hebert-Johnson et al. (2018) and
Multi-Accuracy Boosting by Kim et al. (2019) for the multi-calibration of a
machine learning model's prediction for survival models.
Multi-Calibration works best in scenarios where the underlying data & labels are unbiased
but a bias is introduced within the algorithm's fitting procedure. This is often the case,
e.g. when an algorithm fits a majority population while ignoring or under-fitting minority
populations.\cr
Expects initial models that predict probobilities (between 0 and 1) for different time points.
The method defaults to \verb{Multi-Accuracy Boosting} as described in Kim et al. (2019).
In order to obtain behaviour as described in Hebert-Johnson et al. (2018) set
\code{multiplicative=FALSE} and \code{num_buckets} to 10.
\itemize{
For additional details, please refer to the relevant publications:
\item{Hebert-Johnson et al., 2018. Multicalibration: Calibration for the (Computationally-Identifiable) Masses.
Proceedings of the 35th International Conference on Machine Learning, PMLR 80:1939-1948.
https://proceedings.mlr.press/v80/hebert-johnson18a.html.}{}
\item{Kim et al., 2019. Multiaccuracy: Black-Box Post-Processing for Fairness in Classification.
Proceedings of the 2019 AAAI/ACM Conference on AI, Ethics, and Society (AIES '19).
Association for Computing Machinery, New York, NY, USA, 247–254.
https://dl.acm.org/doi/10.1145/3306618.3314287}{}
}
}
\section{Super class}{
\code{\link[mcboost:MCBoost]{mcboost::MCBoost}} -> \code{MCBoostSurv}
}
\section{Public fields}{
\if{html}{\out{<div class="r6-fields">}}
\describe{
\item{\code{max_iter}}{\code{\link{integer}} \cr
The maximum number of iterations of the multi-calibration/multi-accuracy method.}

\item{\code{alpha}}{\code{\link{numeric}} \cr
Accuracy parameter that determines the stopping condition.}

\item{\code{eta}}{\code{\link{numeric}} \cr
Parameter for multiplicative weight update (step size).}

\item{\code{num_buckets}}{\code{\link{integer}} \cr
The number of buckets to split into in addition to using the whole sample.}

\item{\code{bucket_strategy}}{\code{\link{character}} \cr
Currently only supports "simple", even split along probabilities.
Only relevant for \code{num_buckets} > 1.}

\item{\code{rebucket}}{\code{\link{logical}} \cr
Should buckets be re-calculated at each iteration?}

\item{\code{eval_fulldata}}{\code{\link{logical}} \cr
Should auditor be evaluated on the full data?}

\item{\code{partition}}{\code{\link{logical}} \cr
True/False flag for whether to split up predictions by their "partition"
(e.g., predictions less than 0.5 and predictions greater than 0.5).}

\item{\code{multiplicative}}{\code{\link{logical}} \cr
Specifies the strategy for updating the weights (multiplicative weight vs additive).}

\item{\code{iter_sampling}}{\code{\link{character}} \cr
Specifies the strategy to sample the validation data for each iteration.}

\item{\code{auditor_fitter}}{\code{\link{AuditorFitter}} \cr
Specifies the type of model used to fit the residuals.}

\item{\code{predictor}}{\code{\link{function}} \cr
Initial predictor function.}

\item{\code{iter_models}}{\code{\link{list}} \cr
Cumulative list of fitted models.}

\item{\code{iter_partitions}}{\code{\link{list}} \cr
Cumulative list of data partitions for models.}

\item{\code{iter_corr}}{\code{\link{list}} \cr
Auditor correlation in each iteration.}

\item{\code{auditor_effects}}{\code{\link{list}} \cr
Auditor effect in each iteration.}

\item{\code{time_points}}{\code{\link{integer}} \cr
Times included in the prediction (columnames)}

\item{\code{time_buckets}}{\code{\link{integer}} \cr
The number of buckets to split the time points (columns) of the prediction.}

\item{\code{bucket_strategies}}{\code{\link{character}} \cr
Possible bucket_strategies in McBoostSurv.
Only relevant for \code{time_buckets} > 1.
\code{even_splits}: split buckets evenly
\code{quantiles}: split buckets by quantiles}

\item{\code{bucket_aggregation}}{\code{\link{function}} \cr
If not NULL, predictions are not selected by time/probability,
but by time/individual. Individuals are selected by aggregated value per
individual (e.g. mean).
Only relevant for \code{time_buckets} > 1.}

\item{\code{max_time_quantile}}{\code{\link{double}} \cr
Time quantile which should be evaluated and multicalibrated.
Similar to a 75\%-Integrated Brier Score.}

\item{\code{time_points_eval}}{\code{\link{integer}} \cr
Vector of time_points that should be evaluated.}

\item{\code{loss}}{\code{\link{character}} \cr
Loss function which is optimized during boosting.
\code{censored_brier}: censored version of the integrated brier score
\code{brier}: uncensored version of the integrated brier score
\code{censored_brier_proper}: proper version of the censored version of the integrated brier score
For more details, we are referring to https://mlr3proba.mlr-org.com/reference/mlr_measures_surv.graf.html.}
}
\if{html}{\out{</div>}}
}
\section{Methods}{
\subsection{Public methods}{
\itemize{
\item \href{#method-new}{\code{MCBoostSurv$new()}}
\item \href{#method-clone}{\code{MCBoostSurv$clone()}}
}
}
\if{html}{
\out{<details open ><summary>Inherited methods</summary>}
\itemize{
\item \out{<span class="pkg-link" data-pkg="mcboost" data-topic="MCBoost" data-id="auditor_effect">}\href{../../mcboost/html/MCBoost.html#method-auditor_effect}{\code{mcboost::MCBoost$auditor_effect()}}\out{</span>}
\item \out{<span class="pkg-link" data-pkg="mcboost" data-topic="MCBoost" data-id="multicalibrate">}\href{../../mcboost/html/MCBoost.html#method-multicalibrate}{\code{mcboost::MCBoost$multicalibrate()}}\out{</span>}
\item \out{<span class="pkg-link" data-pkg="mcboost" data-topic="MCBoost" data-id="predict_probs">}\href{../../mcboost/html/MCBoost.html#method-predict_probs}{\code{mcboost::MCBoost$predict_probs()}}\out{</span>}
\item \out{<span class="pkg-link" data-pkg="mcboost" data-topic="MCBoost" data-id="print">}\href{../../mcboost/html/MCBoost.html#method-print}{\code{mcboost::MCBoost$print()}}\out{</span>}
}
\out{</details>}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-new"></a>}}
\if{latex}{\out{\hypertarget{method-new}{}}}
\subsection{Method \code{new()}}{
Initialize a multi-calibration instance.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{MCBoostSurv$new(
  max_iter = 25,
  alpha = 1e-04,
  eta = 0.1,
  num_buckets = 1,
  partition = ifelse(num_buckets > 1, TRUE, FALSE),
  time_buckets = 2L,
  max_time_quantile = 1,
  bucket_strategy = "even_splits",
  bucket_aggregation = NULL,
  rebucket = FALSE,
  eval_fulldata = FALSE,
  multiplicative = TRUE,
  auditor_fitter = "RidgeAuditorFitter",
  subpops = NULL,
  default_model_class = LearnerSurvKaplan,
  init_predictor = NULL,
  loss = "censored_brier",
  iter_sampling = "none"
)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{max_iter}}{\code{\link{integer}} \cr
The maximum number of iterations of the multi-calibration/multi-accuracy method.
Default \code{5L}.}

\item{\code{alpha}}{\code{\link{numeric}} \cr
Accuracy parameter that determines the stopping condition. Default \code{1e-4}.}

\item{\code{eta}}{\code{\link{numeric}} \cr
Parameter for multiplicative weight update (step size). Default \code{1.0}.}

\item{\code{num_buckets}}{\code{\link{integer}} \cr
The number of buckets to split into in addition to using the whole sample. Default \code{2L}.}

\item{\code{partition}}{\code{\link{logical}} \cr
True/False flag for whether to split up predictions by their "partition"
(e.g., predictions less than 0.5 and predictions greater than 0.5).
Defaults to \code{TRUE} (multi-accuracy boosting).}

\item{\code{time_buckets}}{\code{\link{integer}} \cr
The number of buckets to split the time points (columns) of the prediction.}

\item{\code{max_time_quantile}}{\code{\link{double}} \cr
Time quantile which should be evaluated and multicalibrated.
Can be used to perform multi-calibration only up to the \code{max_time_quantile} percent of timepoints.
Initialized to \code{1}.}

\item{\code{bucket_strategy}}{\code{\link{character}} \cr
Bucketstragy for bucketing.
\code{even_splits}: split buckets evenly
\code{quantiles}: split buckets by quantiles}

\item{\code{bucket_aggregation}}{\code{\link{function}} \cr
If not NULL, predictions are not selected by time/probability,
but by time/individual. Individuals are selected by aggregated value per
individual (e.g. mean).
Only relevant for \code{time_buckets} > 1.}

\item{\code{rebucket}}{\code{\link{logical}} \cr
Should buckets be re-done at each iteration? Default \code{FALSE}.}

\item{\code{eval_fulldata}}{\code{\link{logical}} \cr
Should the auditor be evaluated on the full data or on the respective bucket for determining
the stopping criterion? Default \code{FALSE}, auditor is only evaluated on the bucket.
This setting keeps the implementation closer to the Algorithm proposed in the corresponding
multi-accuracy paper (Kim et al., 2019) where auditor effects are computed across the full
sample (i.e. eval_fulldata = TRUE).}

\item{\code{multiplicative}}{\code{\link{logical}} \cr
Specifies the strategy for updating the weights (multiplicative weight vs additive).
Defaults to \code{TRUE} (multi-accuracy boosting). Set to \code{FALSE} for multi-calibration.}

\item{\code{auditor_fitter}}{\code{\link{AuditorFitter}}|\code{\link{character}}|\code{\link[mlr3:Learner]{mlr3::Learner}} \cr
Specifies the type of model used to fit the
residuals. The default is \code{\link{RidgeAuditorFitter}}.
Can be a \code{character}, the name of a \code{\link{AuditorFitter}}, a \code{\link[mlr3:Learner]{mlr3::Learner}} that is then
auto-converted into a \code{\link{LearnerAuditorFitter}} or a custom \code{\link{AuditorFitter}}.}

\item{\code{subpops}}{\code{\link{list}} \cr
Specifies a collection of characteristic attributes
and the values they take to define subpopulations
e.g. list(age = c('20-29','30-39','40+'), nJobs = c(0,1,2,'3+'), ,..).}

\item{\code{default_model_class}}{\code{Predictor} \cr
The class of the model that should be used as the init predictor model if
\code{init_predictor} is not specified. Defaults to \code{ConstantPredictor} which
predicts a constant value.}

\item{\code{init_predictor}}{\code{\link{function}}|\code{\link[mlr3:Learner]{mlr3::Learner}} \cr
The initial predictor function to use (i.e., if the user has a pretrained model).
If a \code{mlr3} \code{Learner} is passed, it will be autoconverted using \code{mlr3_init_predictor}.
This requires the \code{\link[mlr3:Learner]{mlr3::Learner}} to be trained.}

\item{\code{loss}}{\code{\link{character}} \cr
#' Loss function which is optimized during boosting.
\code{censored_brier}: censored version of the integrated brier score
\code{brier}: uncensored version of the integrated brier score
\code{censored_brier_proper}: proper version of the censored version of the integrated brier score
For more details, we are referring to https://mlr3proba.mlr-org.com/reference/mlr_measures_surv.graf.html.}

\item{\code{iter_sampling}}{\code{\link{character}} \cr
How to sample the validation data for each iteration?
Can be \code{bootstrap}, \code{split} or \code{none}.\cr
"split" splits the data into \code{max_iter} parts and validates on each sample in each iteration.\cr
"bootstrap" uses a new bootstrap sample in each iteration.\cr
"none" uses the same dataset in each iteration.}
}
\if{html}{\out{</div>}}
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-clone"></a>}}
\if{latex}{\out{\hypertarget{method-clone}{}}}
\subsection{Method \code{clone()}}{
The objects of this class are cloneable with this method.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{MCBoostSurv$clone(deep = FALSE)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{deep}}{Whether to make a deep clone.}
}
\if{html}{\out{</div>}}
}
}
}
