% From SamplerCompare, (c) 2010-2011 Madeleine Thompson
% $Id: compare.samplers.Rd 3139 2012-01-28 19:24:19Z mthompson $

\name{compare.samplers}

\alias{compare.samplers}

\title{Compare MCMC samplers on distributions}

\description{Simulate a set of distributions with a set of samplers
and tuning parameters}

\usage{compare.samplers(sample.size, dists, samplers, tuning=1,
                 trace=TRUE, seed=17, burn.in=0.2,
                 cores=1, completed.file=NULL)}

\arguments{
  \item{sample.size}{An integer specifying how long a chain to simulate.}

  \item{dists}{A list of \code{dist} objects (often generated by
    \code{\link{make.dist}}) specifying the probability distributions
    to simulate.}

  \item{samplers}{A list of sampler functions.  See
    the section \dQuote{Sampler calling convention}.}

  \item{tuning}{A numeric vector of tuning parameters}

  \item{trace}{A logical indicating whether a message should be
    printed when a chain completes (useful for large simulations).}

  \item{seed}{If not null, the random seed is set to this with
    \code{set.seed} before each chain and restored afterwards.
    This makes each chain individually replicable, useful when
    debugging.}

  \item{burn.in}{Fraction of chain to discard before computing
    autocorrelation time.}

  \item{cores}{Number of threads to use.}

  \item{completed.file}{If not NULL, the name of a file to log
    partial results to.}
}

\details{
  \code{compare.samplers} runs a single Markov chain simulation of
  length \code{sampler.size} size for each combination of the
  elements of \code{dists}, \code{samplers}, and \code{tuning}.
  Each chain starts at a point generated by the \code{initial}
  member of the distribution object, or a point uniformly drawn
  from the unit hypercube if \code{initial} is not defined.  It
  returns a data frame with one row per simulation so that performance
  of the methods can be compared on the various distributions.  The
  simplest way to visualize the results is with the
  \code{\link{comparison.plot}} function.

  As the simulations are run, they are logged to a file in the
  format generated by \code{save}.  If \var{completed.file}
  is set, that file is used and retained after the function returns.
  If it is not, a temporary file is used and deleted on successful
  completion.  (It may be leaked if the simulations are aborted.)
  If \var{trace} is set, the filename will be output so that the
  output can be inspected incrementally.

  If \var{cores} is greater than one, the \pkg{multicore} and
  \pkg{synchronicity} packages are used to run multiple simulations
  simultaneously.  These packages are not available for Windows.
  There is currently a bug such that if a simulation calls \code{error},
  R will not print the error message, just a message indicating
  that timing stopped.  The simulation can be re-run with \code{cores=1}
  to see the error.

  For an example of the use of this method, see the \dQuote{Introduction
  to SamplerCompare} vignette.  For discussion of the ideas behind
  it, see Thompson (2010).
}

\value{
  A data frame with columns \code{dist}, \code{dist.expr}, \code{ndim},
  \code{sampler}, \code{sampler.expr}, \code{tuning}, \code{act},
  \code{act.025}, \code{act.975}, \code{act.y}, \code{act.y.025},
  \code{act.y.975}, \code{evals}, \code{grads}, \code{cpu}, \code{err},
  and \code{aborted}.  Each row represents a single simulation.

  \itemize{
    \item \code{sampler} and \code{dist} are the names of the sampler
      and distribution taken from the lists passed to
      \code{compare.samplers}.
    \item \code{sampler.expr} and \code{dist.expr} are
      \code{plotmath} versions of \code{sampler} and \code{dist}.
      If not specified by the distribution object and sampler function,
      they are constructed from \code{dist} and \code{sampler}.
    \item \code{ndim} is the dimension of the state space of the
      target distribution.
    \item \code{tuning} is the tuning parameter for the chain.
    \item \code{act} is the estimated autocorrelation time, taken
      over all parameters of the simulation; see \code{\link{ar.act}}.
      This is more accurate if \code{target.dist$mean} is defined.
    \item \code{act.025} and \code{act.975} bound a nominal 95\%
      confidence interval for \code{act}.  Since the interval is
      asymmetric, a standard error is not sufficient.
    \item \code{act.y}, \code{act.y.025}, and \code{act.y.975} are
      an estimate and endpoints for a nominal 95\% confidence interval
      for the autocorrelation time of the log density.  These are
      more accurate if \code{target.dist$mean.log.dens} is defined.
    \item \code{evals} and \code{grads} are the mean log-density
      and gradient evaluations per observation.
    \item \code{cpu} is the number of processor seconds used per
      observation.
    \item \code{err} is the two-norm of the difference
      between the estimated mean and the true mean.  Set to \code{NA}
      if the distribution does not specify a true mean.
    \item \code{aborted} is a logical indicating whether the
      simulation returned fewer rows than requested.
  }
}

\section{Sampler calling convention}{
  Sampler functions passed to \code{compare.samplers} should be of
  the form:
  \preformatted{sampler(target.dist, x0, sample.size, tuning)}
  \code{target.dist} is a \code{dist} object representing
  the distribution to sample from; see \code{\link{make.dist}} for
  more information on these.  \code{x0} is the initial state of the
  chain; it must be a numeric vector of length \code{target.dist$ndim}.
  \code{sample.size} is the desired length of the chain, passed
  down from \code{compare.samplers}.  \code{tuning} is a scalar
  tuning parameter from the vector passed to \code{compare.samplers}.

  Sampler functions should return a list with elements \code{X},
  \code{evals}, and (optionally) \code{grads}.  \code{X} should be
  a matrix with \code{target.dist$ndim} columns and \code{sample.size}
  rows.  If for some reason it is necessary to abort the chain,
  returning fewer rows is acceptable.  \code{evals} and \code{grads}
  indicate the number of calls to \code{target.dist$log.density}
  and \code{target.dist$grad.log.density} respectively.

  Sampler functions must have a \code{name} attribute with a human-readable
  name for the MCMC method.  If desired, they may also have a
  \code{name.expression} attribute containing a more nicely-formatted
  version of the name in \code{plotmath} format.

  See the vignette \dQuote{Introduction to SamplerCompare} for an
  example of a function that implements this interface.
}

\references{
  Thompson, M. B. (2010), Graphical comparison of MCMC performance,
  University of Toronto Dept. of Statistics technical report no. 1010.

  Thompson, M. B. (2011), \dQuote{Introduction to SamplerCompare,}
  Journal of Statistical Software 43(12):1-10.
}

\seealso{
  \code{\link{make.dist}},
  \code{\link{comparison.plot}},
  \code{\link{ar.act}},
  \dQuote{Introduction to SamplerCompare} (vignette)
}
