% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/genBinom.R
\name{genBinom}
\alias{genBinom}
\alias{genBinomDf}
\alias{genBinomDt}
\title{Generate data for binomial regression}
\usage{
genBinomDf(
  b = 2L,
  f = 2L,
  c = 1L,
  n = 20L,
  nlf = 3L,
  pb = 0.5,
  rc = 0.8,
  py = 0.5,
  asFactor = TRUE,
  model = FALSE,
  timelim = 5,
  speedglm = FALSE
)

genBinomDt(
  b = 2L,
  f = 2L,
  c = 1L,
  n = 20L,
  nlf = 3L,
  pb = 0.5,
  rc = 0.8,
  py = 0.5,
  asFactor = TRUE,
  model = FALSE,
  timelim = 5,
  speedglm = FALSE
)
}
\arguments{
\item{b}{The number of \bold{b}inomial variables
(the number of predictors
which are binary).
\cr
These are limited to \eqn{0} or \eqn{1}.}

\item{f}{The number of \bold{f}actor predictors.
\cr
The number of predictors
which are \code{factor}s.}

\item{c}{The number of \bold{c}ontinuous predictors.
\cr
the number of predictors which are
continuous.}

\item{n}{The \bold{n}umber of observations (rows) in the
\code{data.frame} or \code{data.table}.}

\item{nlf}{The \bold{n}umber of \bold{l}evels
in a \bold{f}actor.}

\item{pb}{The \bold{p}robability for \bold{b}inomnial
predictors:
\cr
the probability of binomial predictors being \eqn{=1}.
\cr
E.g. if \code{pb=0.3}, \eqn{30\%} will be \eqn{1}s,
 \eqn{70\%} will be \eqn{0}s}

\item{rc}{The \bold{r}atio for \bold{c}ontinuous variables.
\cr
The ratio of levels of
continuous variables to the total number of
observations \code{n}.
\cr
E.g. if \code{rc=0.8} and \code{n=100},
it will be in the range \eqn{1} to \eqn{80}.}

\item{py}{The \bold{r}atio for \bold{y},
the ratio of \eqn{1}s to the total number of observations
for the
binomial predictors.
\cr
E.g. if \code{ry=0.5},
50\% will be \eqn{1}s, \eqn{50\%} will be \eqn{0}s.}

\item{asFactor}{If \code{asFactor=TRUE} (the default),
predictors given as \code{factor}s
will be converted to \code{factor}s
in the data frame before the model
is fit.}

\item{model}{If \code{model=TRUE},
will also return a model fitted with
\code{stats::glm} or \code{speedglm::speedglm}}

\item{timelim}{function will timeout after \code{timelim} secs.
This is present to prevent duplication of rows.}

\item{speedglm}{If \code{speedglm=TRUE},
return a model fitted with \code{speedglm}
instead of \code{glm}. See:
?speedglm::speedglm}
}
\value{
If \code{model=TRUE}: a list with the following values:
 \item{df or dt}{A \code{data.frame} (for \code{genBinomDf})
or \code{data.table}
(for \code{genBinomDt}).
\cr
Predictors are labelled \eqn{x1, x2, ..., xn}.
\cr
The response is \eqn{y}.
\cr
Rows represent to \eqn{n} observations}
 \item{model}{A model fit with \code{stats::glm}
or \code{speedglm::speedglm}}
If \code{model=FALSE} a \code{data.frame}
or \code{data.table} as above.
}
\description{
Generates a \code{data.frame} or \code{data.table}
with a binary outcome, and a logistic model to
describe it.
}
\note{
\code{genBinomDt} is faster
and more efficient for large datasets.
\cr \cr
Using \code{asFactor=TRUE} with \code{factor}s
which have a large number of \code{levels}
(e.g. \code{nlf > 30})
on large datasets (e.g. \eqn{n > 1000})
can cause fitting to be excessively slow.
}
\examples{
set.seed(1)
genBinomDf(speedglm=TRUE)

genBinomDt(b=0, c=2, n=100L, rc=0.7, model=FALSE)

}
\keyword{datagen}
