% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CoCA.R
\name{CoCA}
\alias{CoCA}
\title{Performs Concept Class Analysis (CoCA)}
\usage{
CoCA(
  dtm,
  wv = NULL,
  directions = NULL,
  filter_sig = TRUE,
  filter_value = 0.05,
  zero_action = c("drop", "ownclass")
)
}
\arguments{
\item{dtm}{Document-term matrix with words as columns. Works with DTMs
produced by any popular text analysis package, or you can use the
\code{dtm_builder()} function.}

\item{wv}{Matrix of word embedding vectors (a.k.a embedding model)
with rows as words.}

\item{directions}{direction vectors output from get_direction()}

\item{filter_sig}{logical (default = TRUE), sets 'insignificant'
ties to 0 to decrease noise and increase stability}

\item{filter_value}{Minimum significance cutoff.
Absolute row correlations below
this value will be set to 0}

\item{zero_action}{If 'drop', CCA drops rows with
0 variance from the analyses (default).
If 'ownclass', the correlations between 0-variance
rows and all other rows is set 0, and the correlations
between all pairs of 0-var rows are set to 1}
}
\value{
Returns a named list object of class \code{CoCA}. List elements include:
\itemize{
\item membership: document memberships
\item modules: schematic classes
\item cormat: correlation matrix
}
}
\description{
CoCA outputs schematic classes derived from documents' engagement
with multiple bi-polar concepts (in a Likert-style fashion)
The function requires a (1) DTM of a corpus which can be obtained using any
popular text analysis package, or from the \code{dtm_builder()} function, and (2)
semantic directions as output from the \code{get_direction()}.
\code{CMDist()} works under the hood. Code modified from the \code{corclass} package.
}
\examples{

#' # load example word embeddings
data(ft_wv_sample)

# load example text
data(jfk_speech)

# minimal preprocessing
jfk_speech$sentence <- tolower(jfk_speech$sentence)
jfk_speech$sentence <- gsub("[[:punct:]]+", " ", jfk_speech$sentence)

# create DTM
dtm <- jfk_speech |> dtm_builder(sentence, sentence_id)

# create semantic directions
gen <- data.frame(
  add = c("woman"),
  subtract = c("man")
)

die <- data.frame(
   add = c("alive"),
   subtract = c("die")
)

gen.dir <- get_direction(anchors = gen, wv = ft_wv_sample)
die.dir <- get_direction(anchors = die, wv = ft_wv_sample)

sem_dirs <- rbind(gen.dir, die.dir)

classes <- CoCA(dtm = dtm,
                wv = ft_wv_sample,
                directions = sem_dirs,
               filter_sig = TRUE,
               filter_value = 0.05,
               zero_action = 'drop')

print(classes)

}
\references{
Taylor, Marshall A., and Dustin S. Stoltz.
(2020) 'Concept Class Analysis: A Method for Identifying Cultural
Schemas in Texts.' \emph{Sociological Science} 7:544-569.
\doi{10.15195/v7.a23}.\cr
Boutyline, Andrei. 'Improving the measurement of shared cultural
schemas with correlational class analysis: Theory and method.'
Sociological Science 4.15 (2017): 353-393.
\doi{10.15195/v4.a15}\cr
}
\seealso{
\code{\link[=CMDist]{CMDist()}}, \code{\link[=get_direction]{get_direction()}}
}
\author{
Dustin Stoltz and Marshall Taylor
}
