% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bootstrap.R
\name{bootstrap_persistence_thresholds}
\alias{bootstrap_persistence_thresholds}
\title{Estimate persistence threshold(s) for topological features in a data set using bootstrapping.}
\usage{
bootstrap_persistence_thresholds(
  X,
  FUN = "calculate_homology",
  maxdim = 0,
  thresh,
  distance_mat = FALSE,
  ripser = NULL,
  ignore_infinite_cluster = TRUE,
  calculate_representatives = FALSE,
  num_samples = 30,
  alpha = 0.05,
  return_subsetted = FALSE,
  return_diag = TRUE,
  num_workers = parallelly::availableCores(omit = 1)
)
}
\arguments{
\item{X}{the input dataset, must either be a matrix or data frame.}

\item{FUN}{a string representing the persistent homology function to use, either
'calculate_homology' (the default) or 'ripsDiag'.}

\item{maxdim}{the integer maximum homological dimension for persistent homology, default 0.}

\item{thresh}{the positive numeric maximum radius of the Vietoris-Rips filtration.}

\item{distance_mat}{a boolean representing if `X` is a distance matrix (TRUE) or not (FALSE, default).
dimensions together (TRUE, the default) or if one threshold should be calculated for each dimension separately (FALSE).}

\item{ripser}{the imported ripser module when `FUN` is `PyH`.}

\item{ignore_infinite_cluster}{a boolean indicating whether or not to ignore the infinitely lived cluster when `FUN` is `PyH`.}

\item{calculate_representatives}{a boolean representing whether to calculate representative (co)cycles, default FALSE. Note that representatives cant be
calculated when using the 'calculate_homology' function.}

\item{num_samples}{the positive integer number of bootstrap samples, default 30.}

\item{alpha}{the type-1 error threshold, default 0.05.}

\item{return_subsetted}{a boolean representing whether or not to return the subsetted persistence diagram (with or without representatives), default FALSE.}

\item{return_diag}{a boolean representing whether or not to return the calculated persistence diagram, default TRUE.}

\item{num_workers}{the integer number of cores used for parallelizing (over bootstrap samples), default one less the maximum amount of cores on the machine.}
}
\value{
a numeric vector of threshold values ,with one for each dimension 0..`maxdim` (in that order).
}
\description{
Bootstrapping is used to find a conservative estimate of a "confidence interval" around
each point in the persistence diagram of the data set, and points whose (open) intervals do not
overlap with the diagonal (birth = death) would be considered "significant" or "real".
One threshold is computed for each dimension in the diagram.
}
\details{
The thresholds are determined by calculating the 1-alpha percentile of the bottleneck
distance values between the real persistence diagram and other diagrams obtained
by bootstrap resampling the data. Note that since \code{\link[TDAstats]{calculate_homology}} 
can ignore the longest-lived cluster, fewer "real" clusters may be found. To avoid this possibility
try setting `FUN` equal to 'ripsDiag'.
}
\examples{

# create a persistence diagram from a sample of the unit circle
df = TDA::circleUnif(n = 50)

# calculate persistence thresholds for alpha = 0.05 
# and return the calculated diagram as well as the subsetted diagram
bootstrapped_diagram <- bootstrap_persistence_thresholds(X = df,
FUN = "calculate_homology",maxdim = 1,thresh = 2,num_workers = 2)
}
\references{
Chazal F et al (2017). "Robust Topological Inference: Distance to a Measure and Kernel Distance." \url{https://www.jmlr.org/papers/volume18/15-484/15-484.pdf}.
}
\author{
Shael Brown - \email{shaelebrown@gmail.com}
}
