% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/machine_learning.R
\name{diagram_kkmeans}
\alias{diagram_kkmeans}
\title{Cluster a group of persistence diagrams using kernel k-means.}
\usage{
diagram_kkmeans(
  diagrams,
  K = NULL,
  centers,
  dim = 0,
  t = 1,
  sigma = 1,
  rho = NULL,
  num_workers = parallelly::availableCores(omit = 1),
  ...
)
}
\arguments{
\item{diagrams}{a list of n>=2 persistence diagrams which are either the output of a persistent homology calculation like ripsDiag/\code{\link[TDAstats]{calculate_homology}}/\code{\link{PyH}}, or the \code{\link{diagram_to_df}} function.}

\item{K}{an optional precomputed Gram matrix of persistence diagrams, default NULL.}

\item{centers}{number of clusters to initialize, no more than the number of diagrams although smaller values are recommended.}

\item{dim}{the non-negative integer homological dimension in which the distance is to be computed, default 0.}

\item{t}{a positive number representing the scale for the persistence Fisher kernel, default 1.}

\item{sigma}{a positive number representing the bandwidth for the Fisher information metric, default 1.}

\item{rho}{an optional positive number representing the heuristic for Fisher information metric approximation, see \code{\link{diagram_distance}}. Default NULL. If supplied, Gram matrix calculation is sequential.}

\item{num_workers}{the number of cores used for parallel computation, default is one less than the number of cores on the machine.}

\item{...}{additional parameters for the \code{\link[kernlab]{kkmeans}} kernlab function.}
}
\value{
a list of class 'diagram_kkmeans' containing the output of \code{\link[kernlab]{kkmeans}} on the Gram matrix, i.e. a list containing the elements

\describe{

\item{clustering}{an S4 object of class specc, the output of a \code{\link[kernlab]{kkmeans}} function call. The `.Data` slot of this object contains cluster memberships, `withinss` contains the within-cluster sum of squares for each cluster, etc.}

\item{diagrams}{the input `diagrams` argument.}

\item{dim}{the input `dim` argument.}

\item{t}{the input `t` argument.}

\item{sigma}{the input `sigma` argument.}

}
}
\description{
Finds latent cluster labels for a group of persistence diagrams, using a kernelized version
of the popular k-means algorithm. An optimal number of clusters may be determined by analyzing
the withinss field of the clustering object over several values of k.
}
\details{
Returns the output of \code{\link[kernlab]{kkmeans}} on the desired Gram matrix of a group of persistence diagrams
in a particular dimension. The additional list elements stored in the output are needed
to estimate cluster labels for new persistence diagrams in the `predict_diagram_kkmeans`
function.
}
\examples{

if(require("TDAstats"))
{
  # create two diagrams
  D1 <- TDAstats::calculate_homology(TDAstats::circle2d[sample(1:100,20),],
                      dim = 1,threshold = 2)
  D2 <- TDAstats::calculate_homology(TDAstats::circle2d[sample(1:100,20),],
                      dim = 1,threshold = 2)
  g <- list(D1,D1,D2,D2)

  # calculate kmeans clusters with centers = 2, and sigma = t = 2 in dimension 0
  clust <- diagram_kkmeans(diagrams = g,centers = 2,dim = 0,t = 2,sigma = 2,num_workers = 2)
  
  # repeat with precomputed Gram matrix, gives the same result just much faster
  K <- gram_matrix(diagrams = g,num_workers = 2,t = 2,sigma = 2)
  cluster <- diagram_kkmeans(diagrams = g,K = K,centers = 2,dim = 0,sigma = 2,t = 2)
  
}
}
\references{
Dhillon, I and Guan, Y and Kulis, B (2004). "A Unified View of Kernel k-means , Spectral Clustering and Graph Cuts." \url{https://people.bu.edu/bkulis/pubs/spectral_techreport.pdf}.
}
\seealso{
\code{\link{predict_diagram_kkmeans}} for predicting cluster labels of new diagrams.
}
\author{
Shael Brown - \email{shaelebrown@gmail.com}
}
