% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/model_imp.R
\name{model_imp}
\alias{model_imp}
\alias{lm_imp}
\alias{glm_imp}
\alias{clm_imp}
\alias{lognorm_imp}
\alias{betareg_imp}
\alias{mlogit_imp}
\alias{lme_imp}
\alias{lmer_imp}
\alias{glme_imp}
\alias{glmer_imp}
\alias{betamm_imp}
\alias{lognormmm_imp}
\alias{clmm_imp}
\alias{mlogitmm_imp}
\alias{survreg_imp}
\alias{coxph_imp}
\alias{JM_imp}
\title{Joint Analysis and Imputation of incomplete data}
\usage{
lm_imp(formula, data, n.chains = 3, n.adapt = 100, n.iter = 0,
  thin = 1, monitor_params = c(analysis_main = TRUE), auxvars = NULL,
  refcats = NULL, models = NULL, no_model = NULL, shrinkage = FALSE,
  ppc = TRUE, seed = NULL, inits = NULL, warn = TRUE, mess = TRUE,
  ...)

glm_imp(formula, family, data, n.chains = 3, n.adapt = 100, n.iter = 0,
  thin = 1, monitor_params = c(analysis_main = TRUE), auxvars = NULL,
  refcats = NULL, models = NULL, no_model = NULL, shrinkage = FALSE,
  ppc = TRUE, seed = NULL, inits = NULL, warn = TRUE, mess = TRUE,
  ...)

clm_imp(formula, data, n.chains = 3, n.adapt = 100, n.iter = 0,
  thin = 1, monitor_params = c(analysis_main = TRUE), auxvars = NULL,
  refcats = NULL, nonprop = NULL, rev = NULL, models = NULL,
  no_model = NULL, shrinkage = FALSE, ppc = TRUE, seed = NULL,
  inits = NULL, warn = TRUE, mess = TRUE, ...)

lognorm_imp(formula, data, n.chains = 3, n.adapt = 100, n.iter = 0,
  thin = 1, monitor_params = c(analysis_main = TRUE), auxvars = NULL,
  refcats = NULL, models = NULL, no_model = NULL, shrinkage = FALSE,
  ppc = TRUE, seed = NULL, inits = NULL, warn = TRUE, mess = TRUE,
  ...)

betareg_imp(formula, data, n.chains = 3, n.adapt = 100, n.iter = 0,
  thin = 1, monitor_params = c(analysis_main = TRUE), auxvars = NULL,
  refcats = NULL, models = NULL, no_model = NULL, shrinkage = FALSE,
  ppc = TRUE, seed = NULL, inits = NULL, warn = TRUE, mess = TRUE,
  ...)

mlogit_imp(formula, data, n.chains = 3, n.adapt = 100, n.iter = 0,
  thin = 1, monitor_params = c(analysis_main = TRUE), auxvars = NULL,
  refcats = NULL, models = NULL, no_model = NULL, shrinkage = FALSE,
  ppc = TRUE, seed = NULL, inits = NULL, warn = TRUE, mess = TRUE,
  ...)

lme_imp(fixed, data, random, n.chains = 3, n.adapt = 100, n.iter = 0,
  thin = 1, monitor_params = c(analysis_main = TRUE), auxvars = NULL,
  refcats = NULL, rd_vcov = "blockdiag", models = NULL,
  no_model = NULL, shrinkage = FALSE, ppc = TRUE, seed = NULL,
  inits = NULL, warn = TRUE, mess = TRUE, ...)

lmer_imp(fixed, data, random, n.chains = 3, n.adapt = 100, n.iter = 0,
  thin = 1, monitor_params = c(analysis_main = TRUE), auxvars = NULL,
  refcats = NULL, rd_vcov = "blockdiag", models = NULL,
  no_model = NULL, shrinkage = FALSE, ppc = TRUE, seed = NULL,
  inits = NULL, warn = TRUE, mess = TRUE, ...)

glme_imp(fixed, data, random, family, n.chains = 3, n.adapt = 100,
  n.iter = 0, thin = 1, monitor_params = c(analysis_main = TRUE),
  auxvars = NULL, refcats = NULL, rd_vcov = "blockdiag", models = NULL,
  no_model = NULL, shrinkage = FALSE, ppc = TRUE, seed = NULL,
  inits = NULL, warn = TRUE, mess = TRUE, ...)

glmer_imp(fixed, data, random, family, n.chains = 3, n.adapt = 100,
  n.iter = 0, thin = 1, monitor_params = c(analysis_main = TRUE),
  auxvars = NULL, refcats = NULL, rd_vcov = "blockdiag", models = NULL,
  no_model = NULL, shrinkage = FALSE, ppc = TRUE, seed = NULL,
  inits = NULL, warn = TRUE, mess = TRUE, ...)

betamm_imp(fixed, random, data, n.chains = 3, n.adapt = 100, n.iter = 0,
  thin = 1, monitor_params = c(analysis_main = TRUE), auxvars = NULL,
  refcats = NULL, rd_vcov = "blockdiag", models = NULL,
  no_model = NULL, shrinkage = FALSE, ppc = TRUE, seed = NULL,
  inits = NULL, warn = TRUE, mess = TRUE, ...)

lognormmm_imp(fixed, random, data, n.chains = 3, n.adapt = 100,
  n.iter = 0, thin = 1, monitor_params = c(analysis_main = TRUE),
  auxvars = NULL, refcats = NULL, rd_vcov = "blockdiag", models = NULL,
  no_model = NULL, shrinkage = FALSE, ppc = TRUE, seed = NULL,
  inits = NULL, warn = TRUE, mess = TRUE, ...)

clmm_imp(fixed, data, random, n.chains = 3, n.adapt = 100, n.iter = 0,
  thin = 1, monitor_params = c(analysis_main = TRUE), auxvars = NULL,
  refcats = NULL, nonprop = NULL, rev = NULL, rd_vcov = "blockdiag",
  models = NULL, no_model = NULL, shrinkage = FALSE, ppc = TRUE,
  seed = NULL, inits = NULL, warn = TRUE, mess = TRUE, ...)

mlogitmm_imp(fixed, data, random, n.chains = 3, n.adapt = 100,
  n.iter = 0, thin = 1, monitor_params = c(analysis_main = TRUE),
  auxvars = NULL, refcats = NULL, rd_vcov = "blockdiag", models = NULL,
  no_model = NULL, shrinkage = FALSE, ppc = TRUE, seed = NULL,
  inits = NULL, warn = TRUE, mess = TRUE, ...)

survreg_imp(formula, data, n.chains = 3, n.adapt = 100, n.iter = 0,
  thin = 1, monitor_params = c(analysis_main = TRUE), auxvars = NULL,
  refcats = NULL, models = NULL, no_model = NULL, shrinkage = FALSE,
  ppc = TRUE, seed = NULL, inits = NULL, warn = TRUE, mess = TRUE,
  ...)

coxph_imp(formula, data, df_basehaz = 6, n.chains = 3, n.adapt = 100,
  n.iter = 0, thin = 1, monitor_params = c(analysis_main = TRUE),
  auxvars = NULL, refcats = NULL, models = NULL, no_model = NULL,
  shrinkage = FALSE, ppc = TRUE, seed = NULL, inits = NULL,
  warn = TRUE, mess = TRUE, ...)

JM_imp(formula, data, df_basehaz = 6, n.chains = 3, n.adapt = 100,
  n.iter = 0, thin = 1, monitor_params = c(analysis_main = TRUE),
  auxvars = NULL, timevar = NULL, refcats = NULL,
  rd_vcov = "blockdiag", models = NULL, no_model = NULL,
  assoc_type = NULL, shrinkage = FALSE, ppc = TRUE, seed = NULL,
  inits = NULL, warn = TRUE, mess = TRUE, ...)
}
\arguments{
\item{formula}{a two sided model formula (see \code{\link[stats]{formula}})
or a list of such formulas; (more details below).}

\item{data}{a \code{data.frame} containing the original data
(more details below)}

\item{n.chains}{number of MCMC chains}

\item{n.adapt}{number of iterations for adaptation of the MCMC samplers
(see \code{\link[rjags]{adapt}})}

\item{n.iter}{number of iterations of the MCMC chain (after adaptation;
see \code{\link[rjags]{coda.samples}})}

\item{thin}{thinning interval (integer; see \code{\link[coda]{window.mcmc}}).
For example, \code{thin = 1} (default) will keep the MCMC samples
from all iterations; \code{thin = 5} would only keep every 5th
iteration.}

\item{monitor_params}{named list or vector specifying which parameters
should be monitored (more details below)}

\item{auxvars}{optional; one-sided formula of variables that should be used
as predictors in the imputation procedure (and will be imputed
if necessary) but are not part of the analysis model(s).
For more details with regards to the behaviour with
non-linear effects see the vignette on
\href{https://nerler.github.io/JointAI/articles/ModelSpecification.html#auxvars}{Model Specification}}

\item{refcats}{optional; either one of \code{"first"}, \code{"last"},
\code{"largest"} (which sets the category for all categorical
variables) or a named list specifying which category should
be used as reference category per categorical variable.
Options are the category label, the category number,
or one of "first" (the first category),
"last" (the last category) or "largest" (chooses the category
with the most observations).
Default is "first". If reference categories are specified for
a subset of the categorical variables the default will be
used for the remaining variables.
(See also \code{\link{set_refcat}})}

\item{models}{optional; named vector specifying the types of models for
(incomplete) covariates.
This arguments replaces the argument \code{meth} used in
earlier versions.
If \code{NULL} (default) models will be determined
automatically based on the class of the respective columns of
\code{data}.}

\item{no_model}{optional; vector of names of variables for which no model
should be specified.
Note that this is only possible for completely observed
variables and implies the assumptions of independence between
the excluded variable and the incomplete variables.}

\item{shrinkage}{optional; either a character string naming the shrinkage
method to be used for regression coefficients in all models
or a named vector specifying the type of shrinkage to be
used in the models given as names.}

\item{ppc}{logical: should monitors for posterior predictive checks be
set? (not yet used)}

\item{seed}{optional; seed value (for reproducibility)}

\item{inits}{optional; specification of initial values in the form of a list
or a function (see \code{\link[rjags]{jags.model}}).
If omitted, starting values for the random number generator are
created by \strong{JointAI}, initial values are then generated
by JAGS.
It is an error to supply an initial value for an observed node.}

\item{warn}{logical; should warnings be given? Default is
\code{TRUE}.}

\item{mess}{logical; should messages be given? Default is
\code{TRUE}.}

\item{...}{additional, optional arguments
\describe{
\item{\code{trunc}}{named list specifying limits of truncation for the
distribution of the named incomplete variables (see the
vignette
\href{https://nerler.github.io/JointAI/articles/ModelSpecification.html#functions-with-restricted-support}{ModelSpecification})}
\item{\code{hyperpars}}{list of hyper-parameters, as obtained by
\code{\link{default_hyperpars}()}}
\item{\code{scale_vars}}{named vector of (continuous) variables that
will be centred and scaled (such that mean = 0 and sd = 1)
when they enter a linear predictor to improve
convergence of the MCMC sampling. Default is that all
numeric variables and integer variables with >20 different
values will be scaled.
If set to \code{FALSE} no scaling will be done.}
\item{\code{custom}}{named list of JAGS model chunks (character strings)
that replace the model for the given variable.}
\item{\code{append_data_list}}{list that will be appended to the list
containing the data that is passed to \strong{rjags}
(\code{data_list}). This may be necessary if additional data /
variables are needed for custom (covariate) models.}
\item{\code{progress.bar}}{character string specifying the type of
progress bar. Possible values are "text" (default), "gui",
and "none" (see \code{\link[rjags]{update}}). Note: when
sampling is performed in parallel it is not possible to
display a progress bar.}
\item{\code{quiet}}{logical; if \code{TRUE} then messages generated by
\strong{rjags} during compilation as well as the progress bar
for the adaptive phase will be suppressed,
(see \code{\link[rjags]{jags.model}})}
\item{\code{keep_scaled_mcmc}}{should the "original" MCMC sample (i.e.,
the scaled version returned by \code{coda.samples()}) be
kept? (The MCMC sample that is re-scaled to the scale of the
data is always kept.)}
\item{\code{modelname}}{character string specifying the name of the
model file (including the ending, either .R or .txt). If
unspecified a random name will be generated.}
\item{\code{modeldir}}{directory containing the model file or directory
in which the model file should be written. If unspecified a
temporary directory will be created.}
\item{\code{overwrite}}{logical; whether an existing model file with
the specified \code{<modeldir>/<modelname>} should be
overwritten. If set to \code{FALSE} and a model already
exists, that model will be used. If unspecified (\code{NULL})
and a file exists, the user is asked for input on how to
proceed.}
\item{\code{keep_model}}{logical; whether the created JAGS model file
should be saved or removed from (\code{FALSE}; default) when
the sampling has finished.}
}}

\item{family}{only for \code{glm_imp} and \code{glmm_imp}/\code{glmer_imp}:
a description of the distribution and link function to
be used in the model. This can be a character string naming a
family function, a family function or the result of a call to
a family function. (For more details see below and
\code{\link[stats]{family}}.)}

\item{nonprop}{optional named list of one-sided formulas specifying
covariates that have non-proportional effects in cumulative
logit models. These covariates should also be part of the
regular model formula, and the names of the list should be
the names of the ordinal response variables.}

\item{rev}{optional character vector; vector of ordinal outcome variable
names for which the odds should be reversed, i.e.,
\eqn{logit(y\le k)} instead of \eqn{logit(y > k)}.}

\item{fixed}{a two sided formula describing the fixed-effects part of the
model (see \code{\link[stats]{formula}})}

\item{random}{only for multi-level models:
a one-sided formula of the form \code{~x1 + ... + xn | g},
where \code{x1 + ... + xn} specifies the model for the random
effects and \code{g} the grouping variable}

\item{rd_vcov}{character string or list specifying the structure of the
random effects variance covariance matrix, see details below.}

\item{df_basehaz}{degrees of freedom for the B-spline used to model the
baseline hazard in proportional hazards models
(\code{coxph_imp} and \code{JM_imp})}

\item{timevar}{name of the variable indicating the time of the measurement of
a time-varying covariate in a proportional hazards survival
model (also in a joint model).
The variable specified in
"timevar" will automatically be added to "no_model".}

\item{assoc_type}{named vector specifying the type of the association used
for a time-varying covariate in the linear predictor of the
survival model when using a "JM" model.
Implemented options are "underl.value"
(linear predictor; default for covariates modelled using a
Gaussian, Gamma, beta or log-normal distribution)
covariates) and "obs.value" (the observed/imputed value;
default for covariates modelled using other distributions).}
}
\value{
An object of class \link[=JointAIObject]{JointAI}.
}
\description{
Main analysis functions to estimate different types of models using MCMC
sampling, while imputing missing values.
}
\section{Model formulas}{
\subsection{Random effects}{

It is possible to specify multi-level models as it is done in the package
\href{https://CRAN.R-project.org/package=nlme}{\pkg{nlme}},
using \code{fixed} and \code{random}, or as it is done in the package
\href{https://CRAN.R-project.org/package=lme4}{\pkg{lme4}},
using \code{formula} and specifying the random effects in brackets:\if{html}{\out{<div class="sourceCode r">}}\preformatted{formula = y ~ x1 + x2 + x3 + (1 | id)
}\if{html}{\out{</div>}}

is equivalent to\if{html}{\out{<div class="sourceCode r">}}\preformatted{fixed = y ~ x1 + x2 + x3, random = ~ 1|id
}\if{html}{\out{</div>}}
}

\subsection{Multiple levels of grouping}{

For multiple levels of grouping the specification using \code{formula}
should be used. There is no distinction between nested and crossed random
effects, i.e., \code{... + (1 | id) + (1 | center)} is treated the same as
\code{... + (1 | center/id)}.
}

\subsection{Nested vs crossed random effects}{

The distinction between nested and crossed random effects should come from
the levels of the grouping variables, i.e., if \code{id} is nested in
\code{center}, then there cannot be observations with the same \code{id}
but different values for \code{center}.
}

\subsection{Modelling multiple models simultaneously & joint models}{

To fit multiple main models at the same time, a \code{list} of \code{formula}
objects can be passed to the argument \code{formula}.
Outcomes of one model may be contained as covariates in another model and
it is possible to combine models for variables on different levels,
for example:\if{html}{\out{<div class="sourceCode r">}}\preformatted{formula = list(y ~ x1 + x2 + x3 + x4 + time + (time | id),
                     x2 ~ x3 + x4 + x5)
}\if{html}{\out{</div>}}

This principle is also used for the specification of a joint model for
longitudinal and survival data.

Note that it is not possible to specify multiple models for the same outcome
variable.
\subsection{Random effects variance-covariance structure}{

(Note: This feature is new and has not been fully tested yet.)

By default, a block-diagonal structure is assumed for the variance-covariance
matrices of the random effects in models with random effects. This means that
per outcome and level random effects are assumed to be correlated, but
random effects of different outcomes are modelled as independent.
The argument \code{rd_vcov} allows the user specify different assumptions about
these variance-covariance matrices. Implemented structures are \code{full},
\code{blockdiag} and \code{indep} (all off-diagonal elements are zero).

If \code{rd_vcov} is set to one of these options, the structure is assumed for
all random effects variance-covariance matrices.
Alternatively, it is possible to specify a named list of vectors, where
the names are the structures and the vectors contain the names of the
response variables which are included in this structure.

For example, for a multivariate mixed model with five outcomes
\code{y1}, ..., \code{y5}, the specification could be:\if{html}{\out{<div class="sourceCode r">}}\preformatted{rd_vcov = list(blockdiag = c("y1", "y2"),
               full = c("y3", "y4"),
               indep = "y5")
}\if{html}{\out{</div>}}

This would entail that the random effects for \code{y3} and \code{y4} are assumed to
be correlated (within and across outcomes),
random effects for \code{y1} and \code{y2} are assumed to be correlated within each
outcome, and the random effects for \code{y5} are assumed to be independent.

It is possible to have multiple sets of response variables for which separate
full variance-covariance matrices are used, for example:\if{html}{\out{<div class="sourceCode r">}}\preformatted{rd_vcov = list(full = c("y1", "y2", "y5"),
               full = c("y3", "y4"))
}\if{html}{\out{</div>}}

In models with multiple levels of nesting, separate structures can be
specified per level:\if{html}{\out{<div class="sourceCode r">}}\preformatted{rd_vcov = list(id = list(blockdiag = c("y1", "y2"),
                         full = c("y3", "y4"),
                         indep = "y5"),
              center = "indep")
}\if{html}{\out{</div>}}
}

}

\subsection{Survival models with frailties or time-varying covariates}{

Random effects specified in brackets can also be used to indicate a
multi-level structure in survival models, as would, for instance be needed
in a multi-centre setting, where patients are from multiple hospitals.

It also allows to model time-dependent covariates in a proportional
hazards survival model (using \code{coxph_imp}), also in combination with
additional grouping levels.

In time-dependent proportional hazards models,
last-observation-carried-forward is used to fill in missing values in the
time-varying covariates, and to determine the value of the covariate at the
event time. Preferably, all time-varying covariates should be measured at
baseline (\code{timevar = 0}). If a value for a time-varying covariate needs to be
filled in and there is no previous observation, the next observation will be
carried backward.
}

\subsection{Differences to basic regression models}{

It is not possible to specify transformations of outcome variables, i.e.,
it is not possible to use a model formula like\if{html}{\out{<div class="sourceCode r">}}\preformatted{log(y) ~ x1 + x2 + ...
}\if{html}{\out{</div>}}

In the specific case of a transformation with the natural logarithm,
a log-normal model can be used instead of a normal model.

Moreover, it is not possible to use \code{.} to indicate that all variables in a
\code{data.frame} other than the outcome variable should be used as covariates.
I.e., a formula \code{y ~ .} is not valid in \strong{JointAI}.
}
}

\section{Data structure}{
For multi-level settings, the data must be in long format, so that repeated
measurements are recorded in separate rows.

For survival data with time-varying covariates (\code{coxph_imp} and
\code{JM_imp}) the data should also be in long format. The
survival/censoring times and event indicator variables must be stored in
separate variables in the same data and should be constant across all rows
referring to the same subject.

During the pre-processing of the data the survival/censoring times will
automatically be merged with the observation times of the  time-varying
covariates (which must be supplied via the argument \code{timevar}).

It is possible to have multiple time-varying covariates, which do not
have to be measured at the same time points, but there can only be one
\code{timevar}.
}

\section{Distribution families and link functions}{
\tabular{ll}{
\code{gaussian} \tab with links: \code{identity}, \code{log}\cr
\code{binomial} \tab with links: \code{logit}, \code{probit}, \code{log},
\code{cloglog}\cr
\code{Gamma}    \tab with links: \code{inverse}, \code{identity},
\code{log}\cr
\code{poisson}  \tab with links: \code{log}, \code{identity}
}
}

\section{Imputation methods / model types}{
Implemented model types that can be chosen in the argument \code{models}
for baseline covariates (not repeatedly measured) are:
\tabular{ll}{
\code{lm} \tab linear (normal) model with identity link
(alternatively: \code{glm_gaussian_identity}); default for
continuous variables\cr
\code{glm_gaussian_log} \tab linear (normal) model with log link\cr
\code{glm_gaussian_inverse} \tab linear (normal) model with inverse link\cr
\code{glm_logit} \tab logistic model for binary data
(alternatively: \code{glm_binomial_logit});
default for binary variables\cr
\code{glm_probit} \tab probit model for binary data
(alternatively: \code{glm_binomial_probit})\cr
\code{glm_binomial_log} \tab binomial model with log link\cr
\code{glm_binomial_cloglog} \tab binomial model with complementary
log-log link\cr
\code{glm_gamma_inverse} \tab gamma model with inverse link for skewed
continuous data\cr
\code{glm_gamma_identity} \tab gamma model with identity link for skewed
continuous data\cr
\code{glm_gamma_log} \tab gamma model with log link for skewed continuous
data\cr
\code{glm_poisson_log} \tab Poisson model with log link for count data\cr
\code{glm_poisson_identity} \tab Poisson model with identity link for count
data\cr
\code{lognorm} \tab log-normal model for skewed continuous data\cr
\code{beta} \tab beta model (with logit link) for skewed continuous
data in (0, 1)\cr
\code{mlogit} \tab multinomial logit model for unordered categorical
variables;
default for unordered factors with >2 levels\cr
\code{clm} \tab cumulative logit model for ordered categorical variables;
default for ordered factors\cr
}

For repeatedly measured variables the following model types are available:
\tabular{ll}{
\code{lmm} \tab linear (normal) mixed model with identity link
(alternatively: \code{glmm_gaussian_identity});
default for continuous variables\cr
\code{glmm_gaussian_log} \tab linear (normal) mixed model with log link\cr
\code{glmm_gaussian_inverse} \tab linear (normal) mixed model with
inverse link\cr
\code{glmm_logit} \tab logistic mixed model for binary data
(alternatively: \code{glmm_binomial_logit});
default for binary variables\cr
\code{glmm_probit} \tab probit model for binary data
(alternatively: \code{glmm_binomial_probit})\cr
\code{glmm_binomial_log} \tab binomial mixed model with log link\cr
\code{glmm_binomial_cloglog} \tab binomial mixed model with complementary
log-log link\cr
\code{glmm_gamma_inverse} \tab gamma mixed model with inverse link for
skewed continuous data\cr
\code{glmm_gamma_identity} \tab gamma mixed model with identity link for
skewed continuous data\cr
\code{glmm_gamma_log} \tab gamma mixed model with log link for skewed
continuous data\cr
\code{glmm_poisson_log} \tab Poisson mixed model with log link for
count data\cr
\code{glmm_poisson_identity} \tab Poisson mixed model with identity link for
count data\cr
\code{glmm_lognorm} \tab log-normal mixed model for skewed covariates\cr
\code{glmm_beta} \tab beta mixed model for continuous data in (0, 1)\cr
\code{mlogitmm} \tab multinomial logit mixed model for unordered categorical
variables;
default for unordered factors with >2 levels\cr
\code{clmm} \tab cumulative logit mixed model for ordered factors;
default for ordered factors
}

When models are specified for only a subset of the variables for which a
model is needed, the default model choices (as indicated in the tables)
are used for the unspecified variables.
}

\section{Parameters to follow (\code{monitor_params})}{
See also the vignette:
\href{https://nerler.github.io/JointAI/articles/SelectingParameters.html}{Parameter Selection}\cr

Named vector specifying which parameters should be monitored. This can be
done either directly by specifying the name of the parameter or indirectly
by one of the key words selecting a set of parameters.
Except for \code{other}, in which parameter names are specified directly,
parameter (groups) are just set as \code{TRUE} or \code{FALSE}.

Models are divided into two groups, the main models, which are the models
for which the user has explicitly specified a formula (via \code{formula}
or \code{fixed}), and all other models, for which models were specified
automatically.

If left unspecified, \code{monitor_params = c("analysis_main" = TRUE)}
will be used.

\tabular{ll}{
\strong{name/key word} \tab \strong{what is monitored}\cr
\code{analysis_main} \tab \code{betas} and \code{sigma_main}, \code{tau_main}
(for beta regression) or \code{shape_main}
(for parametric survival models), \code{gamma_main}
(for cumulative logit models),
code{D_main} (for multi-level models) and
\code{basehaz} in proportional hazards models)\cr
\code{analysis_random} \tab \code{ranef_main}, \code{D_main},
\code{invD_main}, \code{RinvD_main}\cr
\code{other_models} \tab \code{alphas}, \code{tau_other}, \code{gamma_other},
\code{delta_other}\cr
\code{imps} \tab imputed values\cr
\code{betas} \tab regression coefficients of the main analysis model\cr
\code{tau_main} \tab precision of the residuals from the main analysis
model(s)\cr
\code{sigma_main} \tab standard deviation of the residuals from the main
analysis model(s)\cr
\code{gamma_main} \tab intercepts in ordinal main model(s)\cr
\code{delta_main} \tab increments of ordinal main model(s)\cr
\code{ranef_main} \tab random effects from the main analysis model(s)
\code{b}\cr
\code{D_main} \tab covariance matrix of the random effects from the
main model(s)\cr
\code{invD_main} \tab inverse(s) of \code{D_main}\cr
\code{RinvD_main} \tab matrices in the priors for \code{invD_main}\cr
\code{alphas} \tab regression coefficients in the covariate models\cr
\code{tau_other} \tab precision parameters of the residuals from
covariate models\cr
\code{gamma_other} \tab intercepts in ordinal covariate models\cr
\code{delta_other} \tab increments of ordinal intercepts\cr
\code{ranef_other} \tab random effects from the other  models \code{b}\cr
\code{D_other} \tab covariance matrix of the random effects from the
other models\cr
\code{invD_other} \tab inverses of \code{D_other}\cr
\code{RinvD_other} \tab matrices in the priors for \code{invD_other}\cr
\code{other} \tab additional parameters
}

\strong{For example:}\cr
\code{monitor_params = c(analysis_main = TRUE, tau_main = TRUE,
sigma_main = FALSE)}
would monitor the regression parameters \code{betas} and the
residual precision \code{tau_main} instead of the residual standard
deviation \code{sigma_main}.

For a linear model, \code{monitor_params = c(imps = TRUE)} would monitor
\code{betas}, and \code{sigma_main} (because \code{analysis_main = TRUE} by
default) as well as the imputed values.

\loadmathjax
}

\section{Cumulative logit (mixed) models}{

In the default setting for cumulative logit models, i.e, \code{rev = NULL}, the
odds for a variable \mjeqn{y}{ascii} with \mjeqn{K}{ascii} ordered categories
are defined as \mjdeqn{\log\left(\frac{P(y_i > k)}{P(y_i \leq k)}\right) =
\gamma_k + \eta_i, \quad k = 1, \ldots, K-1,}{ascii} where
\mjeqn{\gamma_k}{ascii} is a category specific intercept and
\mjeqn{\eta_i}{ascii} the subject specific linear predictor.

To reverse the odds to \mjdeqn{\log\left(\frac{P(y_i \leq k)}{P(y_i >
k)}\right) = \gamma_k + \eta_i, \quad k = 1, \ldots, K-1,}{ascii} the name of
the response variable has to be specified in the argument \code{rev}, e.g., \code{rev = c("y")}.

By default, proportional odds are assumed and only the intercepts differ
per category of the ordinal response. To allow for non-proportional odds,
i.e.,
\mjdeqn{\log\left(\frac{P(y_i > k)}{P(y_i \leq k)}\right) =
\gamma_k + \eta_i + \eta_{ki}, \quad k = 1, \ldots, K-1,}{ascii}
the argument \code{nonprop} can be specified. It takes a one-sided formula or
a list of one-sided formulas. When a single formula is supplied, or a
unnamed list with just one element, it is assumed that the formula
corresponds to the main model.
To specify non-proportional effects for linear predictors in models for
ordinal covariates, the list has to be named with the names of the
ordinal response variables.

For example, the following three specifications are equivalent and assume a
non-proportional effect of \code{C1} on \code{O1}, but \code{C1} is assumed to have a
proportional effect on the incomplete ordinal covariate \code{O2}:\if{html}{\out{<div class="sourceCode r">}}\preformatted{clm_imp(O1 ~ C1 + C2 + B2 + O2, data = wideDF, nonprop = ~ C1)
clm_imp(O1 ~ C1 + C2 + B2 + O2, data = wideDF, nonprop = list(~ C1))
clm_imp(O1 ~ C1 + C2 + B2 + O2, data = wideDF, nonprop = list(O1 = ~ C1))
}\if{html}{\out{</div>}}

To specify non-proportional effects on \code{O2}, a named list has to be provided:\if{html}{\out{<div class="sourceCode r">}}\preformatted{clm_imp(O1 ~ C1 + C2 + B2 + O2 + B1, data = wideDF,
        nonprop = list(O1 = ~ C1,
                       O2 = ~ C1 + B1))
}\if{html}{\out{</div>}}

The variables for which a non-proportional effect is assumed also have to be
part of the regular model formula.
}

\section{Custom model parts}{

(Note: This feature is experimental and has not been fully tested yet.)

Via the argument \code{custom} it is possible to provide custom sub-models that
replace the sub-models that are automatically generated by \strong{JointAI}.

Using this feature it is, for instance, possible to use the value of
a repeatedly measured variable at a specific time point as covariate in
another model. An example would be the use of "baseline" cholesterol
(\code{chol} at \code{day = 0}) as covariate in a survival model.

First, the variable \code{chol0} is added to the \code{PBC} data.
For most patients the value of cholesterol at baseline is observed, but not
for all. It is important that the data has a row with \code{day = 0} for each
patient.\if{html}{\out{<div class="sourceCode r">}}\preformatted{PBC <- merge(PBC,
             subset(PBC, day == 0, select = c("id", "chol")),
             by = "id", suffixes = c("", "0"))
}\if{html}{\out{</div>}}

Next, the custom piece of JAGS model syntax needs to be specified.
We loop here only over the patients for which the baseline cholesterol
is missing.\if{html}{\out{<div class="sourceCode r">}}\preformatted{calc_chol0 <- "
for (ii in 1:28) \{
  M_id[row_chol0_id[ii], 3] <- M_lvlone[row_chol0_lvlone[ii], 1]
  \}"
}\if{html}{\out{</div>}}

To be able to run the model with the custom imputation "model" for baseline
cholesterol we need to provide the numbers of the rows in the data matrices
that contain the missing values of baseline cholesterol and the rows that
contain the imputed cholesterol at \code{day = 0}:\if{html}{\out{<div class="sourceCode r">}}\preformatted{row_chol0_lvlone <- which(PBC$day == 0 & is.na(PBC$chol0))
row_chol0_id <- match(PBC$id, unique(PBC$id))[row_chol0_lvlone]
}\if{html}{\out{</div>}}

Then we pass both the custom sub-model and the additional data to the
analysis function \code{coxph_imp()}. Note that we explicitly need to specify
the model for \code{chol}.\if{html}{\out{<div class="sourceCode r">}}\preformatted{coxph_imp(list(Surv(futime, status != "censored") ~ age + sex + chol0,
               chol ~ age + sex + day + (day | id)),
          no_model = "day", data = PBC,
          append_data_list = list(row_chol0_lvlone = row_chol0_lvlone,
                                  row_chol0_id = row_chol0_id),
          custom = list(chol0 = calc_chol0))
}\if{html}{\out{</div>}}
}

\section{Note}{

\subsection{Coding of variables:}{

The default covariate (imputation) models are chosen based on the
\code{class} of each of the variables, distinguishing between \code{numeric},
\code{factor} with two levels, unordered \code{factor} with >2 levels and
ordered \code{factor} with >2 levels.\cr

When a continuous variable has only two different values it is
assumed to be binary and its coding and default (imputation) model will be
changed accordingly. This behaviour can be overwritten specifying a model
type via the argument \code{models}.\cr

Variables of type \code{logical} are automatically converted to unordered
factors.\cr
\subsection{Contrasts}{

\strong{JointAI} version \mjeqn{\geq}{ascii} 1.0.0 uses the globally (via
\code{options("contrasts")}) specified contrasts. However, for incomplete
categorical variables, for which the contrasts need to be re-calculated
within the JAGS model, currently only \code{contr.treatment} and \code{contr.sum} are
possible. Therefore, when an in complete ordinal covariate is used and the
default contrasts (\code{contr.poly()}) are set to be used for ordered factors, a
warning message is printed and dummy coding (\code{contr.treatment()}) is used for
that variable instead.
}

}

\subsection{Non-linear effects and transformation of variables:}{

\strong{JointAI} handles non-linear effects, transformation of covariates
and interactions the following way:\cr
When, for instance, a model formula contains the function \code{log(x)} and
\code{x} has missing values, \code{x} will be imputed and used in the linear
predictor of models for which no formula was specified,
i.e., it is assumed that the other variables have a linear association with
\code{x}. The \code{log()} of the observed and imputed values of
\code{x} is calculated and used in the linear predictor of the main
analysis model.\cr

If, instead of using \code{log(x)} in the model formula, a pre-calculated
variable \code{logx} is used, this variable is imputed directly
and used in the linear predictors of all models, implying that
variables that have \code{logx} in their linear predictors have a linear
association with \code{logx} but not with \code{x}.\cr

When different transformations of the same incomplete variable are used in
one model it is strongly discouraged to calculate these transformations
beforehand and supply them as different variables.
If, for example, a model formula contains both \code{x} and \code{x2} (where
\code{x2} = \code{x^2}), they are treated as separate variables and imputed
with separate models. Imputed values of \code{x2} are thus not equal to the
square of imputed values of \code{x}.
Instead, \code{x} and \code{I(x^2)} should be used in the model formula.
Then only \code{x} is imputed and \code{x^2} is calculated from the imputed
values of \code{x} internally.

The same applies to interactions involving incomplete variables.
}

\subsection{Sequence of models:}{

Models generated automatically (i.e., not mentioned in \code{formula} or \code{fixed}
are specified in a sequence based on the level of the outcome of the
respective model in the multi-level hierarchy and within each level
according to the number of missing values.
This means that level-1 variables have all level-2, level-3, ... variables
in their linear predictor, and variables on the highest level only have
variables from the same level in their linear predictor.
Within each level, the variable with the most missing values has the most
variables in its linear predictor.
}

\subsection{Not (yet) possible:}{

\itemize{
\item prediction (using \code{predict}) conditional on random effects
\item the use of splines for incomplete variables
\item the use of (or equivalents for) \code{\link[survival]{pspline}},
or \code{\link[survival]{strata}} in survival models
\item left censored or interval censored data
}
}
}

\examples{
# Example 1: Linear regression with incomplete covariates
mod1 <- lm_imp(y ~ C1 + C2 + M1 + B1, data = wideDF, n.iter = 100)


# Example 2: Logistic regression with incomplete covariates
mod2 <- glm_imp(B1 ~ C1 + C2 + M1, data = wideDF,
                family = binomial(link = "logit"), n.iter = 100)

\dontrun{

# Example 3: Linear mixed model with incomplete covariates
mod3 <- lme_imp(y ~ C1 + B2 + c1 + time, random = ~ time|id,
                data = longDF, n.iter = 300)


# Example 4: Parametric Weibull survival model
mod4 <- survreg_imp(Surv(time, status) ~ age + sex + meal.cal + wt.loss,
                    data = survival::lung, n.iter = 100)


# Example 5: Proportional hazards survival model
mod5 <- coxph_imp(Surv(time, status) ~ age + sex + meal.cal + wt.loss,
                    data = survival::lung, n.iter = 200)

# Example 6: Joint model for longitudinal and survival data
mod6 <- JM_imp(list(Surv(futime, status != 'censored') ~ age + sex +
                    albumin + copper + trig + (1 | id),
                    albumin ~ day + age + sex + (day | id)),
                    timevar = 'day', data = PBC, n.iter = 100)

# Example 7: Proportional hazards  model with a time-dependent covariate
mod7 <- coxph_imp(Surv(futime, status != 'censored') ~ age + sex + copper +
                  trig + stage + (1 | id),
                  timevar = 'day', data = PBC, n.iter = 100)



# Example 8: Parallel computation
# If no strategy how the "future" should be handled is specified, the
# MCMC chains are run sequentially.
# To run MCMC chains in parallel, a strategy can be specified using the
# package \pkg{future} (see ?future::plan), for example:
doFuture::registerDoFuture()
future::plan(future::multisession, workers = 4)
mod8 <- lm_imp(y ~ C1 + C2 + B2, data = wideDF, n.iter = 500, n.chains = 8)
mod8$comp_info$future
# To re-set the strategy to sequential computation, the sequential strategy
# can be specified:
future::plan(future::sequential)

}

}
\seealso{
\code{\link{set_refcat}},
\code{\link{traceplot}}, \code{\link{densplot}},
\code{\link{summary.JointAI}}, \code{\link{MC_error}},
\code{\link{GR_crit}},
\code{\link{predict.JointAI}}, \code{\link{add_samples}},
\code{\link{JointAIObject}}, \code{\link{add_samples}},
\code{\link{parameters}}, \code{\link{list_models}}

Vignettes
\itemize{
\item \href{https://nerler.github.io/JointAI/articles/MinimalExample.html}{Minimal Example}
\item \href{https://nerler.github.io/JointAI/articles/ModelSpecification.html}{Model Specification}
\item \href{https://nerler.github.io/JointAI/articles/SelectingParameters.html}{Parameter Selection}
\item \href{https://nerler.github.io/JointAI/articles/MCMCsettings.html}{MCMC Settings}
\item \href{https://nerler.github.io/JointAI/articles/AfterFitting.html}{After Fitting}
\item \href{https://nerler.github.io/JointAI/articles/TheoreticalBackground.html}{Theoretical Background}
}
}
