% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/loo-kfold.R
\name{kfold.stanreg}
\alias{kfold.stanreg}
\alias{kfold}
\title{K-fold cross-validation}
\usage{
\method{kfold}{stanreg}(x, K = 10, ..., folds = NULL,
  save_fits = FALSE, cores = getOption("mc.cores", 1))
}
\arguments{
\item{x}{A fitted model object returned by one of the rstanarm modeling
functions. See \link{stanreg-objects}.}

\item{K}{For \code{kfold}, the number of subsets (folds) into which the data
will be partitioned for performing \eqn{K}-fold cross-validation. The model
is refit \code{K} times, each time leaving out one of the \code{K} folds.
If the \code{folds} argument is specified then \code{K} will automatically
be set to \code{length(unique(folds))}, otherwise the specified value of
\code{K} is passed to \code{loo::\link[loo]{kfold_split_random}} to
randomly partition the data into \code{K} subsets of equal (or as close to
equal as possible) size.}

\item{...}{Currently ignored.}

\item{folds}{For \code{kfold}, an optional integer vector with one element
per observation in the data used to fit the model. Each element of the
vector is an integer in \code{1:K} indicating to which of the \code{K}
folds the corresponding observation belongs. There are some convenience
functions available in the \pkg{loo} package that create integer vectors to
use for this purpose (see the \strong{Examples} section below and also the
\link[loo]{kfold-helpers} page).}

\item{save_fits}{For \code{kfold}, if \code{TRUE}, a component \code{'fits'}
is added to the returned object to store the cross-validated
\link[=stanreg-objects]{stanreg} objects and the indices of the omitted
observations for each fold. Defaults to \code{FALSE}.}

\item{cores}{The number of cores to use for parallelization. Instead fitting
separate Markov chains for the same model on different cores, by default
\code{kfold} will distribute the \code{K} models to be fit across the cores
(using \code{\link[parallel]{parLapply}} on Windows and
\code{\link[parallel]{mclapply}} otherwise). The Markov chains for each
model will be run sequentially. This will often be the most efficient
option, especially if many cores are available, but in some cases it may be
preferable to fit the \code{K} models sequentially and instead use the
cores for the Markov chains. This can be accomplished by setting
\code{options(mc.cores)} to be the desired number of cores to use
for the Markov chains \emph{and} also manually specifying \code{cores=1}
when calling the \code{kfold} function. See the end of the
\strong{Examples} section for a demonstration.}
}
\value{
An object with classes 'kfold' and 'loo' that has a similar structure
  as the objects returned by the \code{\link{loo}} and \code{\link{waic}}
  methods and is compatible with the \code{\link{loo_compare}} function for
  comparing models.
}
\description{
The \code{kfold} method performs exact \eqn{K}-fold cross-validation. First
the data are randomly partitioned into \eqn{K} subsets of equal size (or as close
to equal as possible), or the user can specify the \code{folds} argument
to determine the partitioning. Then the model is refit \eqn{K} times, each time
leaving out one of the \eqn{K} subsets. If \eqn{K} is equal to the total
number of observations in the data then \eqn{K}-fold cross-validation is
equivalent to exact leave-one-out cross-validation (to which
\code{\link[=loo.stanreg]{loo}} is an efficient approximation).
}
\examples{
\donttest{
fit1 <- stan_glm(mpg ~ wt, data = mtcars)
fit2 <- stan_glm(mpg ~ wt + cyl, data = mtcars)
fit3 <- stan_glm(mpg ~ disp * as.factor(cyl), data = mtcars)

# 10-fold cross-validation
# (if possible also specify the 'cores' argument to use multiple cores)
(kfold1 <- kfold(fit1, K = 10))
kfold2 <- kfold(fit2, K = 10)
kfold3 <- kfold(fit3, K = 10) 
loo_compare(kfold1, kfold2, kfold3)

# stratifying by a grouping variable
# (note: might get some divergences warnings with this model but 
# this is just intended as a quick example of how to code this)
fit4 <- stan_lmer(mpg ~ disp + (1|cyl), data = mtcars)
table(mtcars$cyl)
folds_cyl <- loo::kfold_split_stratified(K = 3, x = mtcars$cyl)
table(cyl = mtcars$cyl, fold = folds_cyl)
kfold4 <- kfold(fit4, folds = folds_cyl, cores = 2)
print(kfold4)
}

# Example code demonstrating the different ways to specify the number 
# of cores and how the cores are used
# 
# options(mc.cores = NULL)
# 
# # spread the K models over N_CORES cores (method 1)
# kfold(fit, K, cores = N_CORES)
# 
# # spread the K models over N_CORES cores (method 2)
# options(mc.cores = N_CORES)
# kfold(fit, K)
#  
# # fit K models sequentially using N_CORES cores for the Markov chains each time
# options(mc.cores = N_CORES)
# kfold(fit, K, cores = 1)

}
\references{
Vehtari, A., Gelman, A., and Gabry, J. (2017). Practical
  Bayesian model evaluation using leave-one-out cross-validation and WAIC.
  \emph{Statistics and Computing}. 27(5), 1413--1432.
  doi:10.1007/s11222-016-9696-4. arXiv preprint:
  \url{http://arxiv.org/abs/1507.04544/}

  Yao, Y., Vehtari, A., Simpson, D., and Gelman, A. (2018) Using
  stacking to average Bayesian predictive distributions. \emph{Bayesian
  Analysis}, advance publication,  doi:10.1214/17-BA1091.
  (\href{https://projecteuclid.org/euclid.ba/1516093227}{online}).
}
