% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils-embedding.R
\name{get_direction}
\alias{get_direction}
\title{Word embedding semantic direction extractor}
\usage{
get_direction(anchors, wv, method = "paired", missing = "stop", n_dirs = 1L)
}
\arguments{
\item{anchors}{Two column data frame of juxtaposed 'anchor' terms}

\item{wv}{Matrix of word embedding vectors (a.k.a embedding model)
with rows as terms.}

\item{method}{Indicates the method used to generate vector offset.
Default is 'paired'. See details.}

\item{missing}{what action to take if terms are not in embeddings.
If action = "stop" (default), the function is stopped
and an error messages states which terms are missing.
If action = "remove",  missing terms or rows with missing
terms are removed. Missing terms will be printed as a message.}

\item{n_dirs}{If \code{method = "PCA"}, an integer indicating how many directions
to return. Default = \code{1L}, indicating a single,
bipolar, direction.}
}
\value{
returns a one row matrix
}
\description{
\code{get_direction()} outputs a vector corresponding to one pole of a
"semantic direction" built from sets of antonyms or juxtaposed terms.
The output can be used as an input to \code{\link[=CMDist]{CMDist()}} and \code{\link[=CoCA]{CoCA()}}.
}
\details{
Semantic directions can be estimated in using a few methods:
\itemize{
\item 'paired' (default): each individual term is subtracted from exactly one
other paired term. there must be the same number of
terms for each side of the direction (although one
word may be used more than once).
\item 'pooled': terms corresponding to one side of a direction are first
averaged, and then these averaged vectors are subtracted.
A different number of terms can be used for each side of
the direction.
\item 'L2': the vector is calculated the same as with 'pooled'
but is then divided by the L2 'Euclidean' norm
\item 'PCA': vector offsets are calculated for each paired terms, as with
'pooled', if \code{n_dirs = 1L} (the default) then the direction is the
first principal component. Users can return more than one direction
by increasing the \code{n_dirs} parameter.
}
}
\examples{

# load example word embeddings
data(ft_wv_sample)

# create anchor list
gen <- data.frame(
  add = c("woman"),
  subtract = c("man")
)

dir <- get_direction(anchors = gen, wv = ft_wv_sample)

dir <- get_direction(
  anchors = gen, wv = ft_wv_sample,
  method = "PCA", n = 1L
)
}
\references{
Bolukbasi, T., Chang, K. W., Zou, J., Saligrama, V., and Kalai, A. (2016).
Quantifying and reducing stereotypes in word embeddings. arXiv preprint
\url{https://arxiv.org/abs/1606.06121v1}.\cr
Bolukbasi, Tolga, Kai-Wei Chang, James Zou, Venkatesh Saligrama,
Adam Kalai (2016). 'Man Is to Computer Programmer as Woman Is to Homemaker?
Debiasing Word Embeddings.' Proceedings of the 30th International Conference
on Neural Information Processing Systems. 4356-4364.
\url{https://dl.acm.org/doi/10.5555/3157382.3157584}.\cr
Taylor, Marshall A., and Dustin S. Stoltz. (2020)
'Concept Class Analysis: A Method for Identifying Cultural
Schemas in Texts.' \emph{Sociological Science} 7:544-569.
\doi{10.15195/v7.a23}.\cr
Taylor, Marshall A., and Dustin S. Stoltz. (2020) 'Integrating semantic
directions with concept mover's distance to measure binary concept
engagement.' \emph{Journal of Computational Social Science} 1-12.
\doi{10.1007/s42001-020-00075-8}.\cr
Kozlowski, Austin C., Matt Taddy, and James A. Evans. (2019). 'The geometry
of culture: Analyzing the meanings of class through word embeddings.'
\emph{American Sociological Review} 84(5):905-949.
\doi{10.1177/0003122419877135}.\cr
Arseniev-Koehler, Alina, and Jacob G. Foster. (2020). 'Machine learning
as a model for cultural learning: Teaching an algorithm what it means to
be fat.' arXiv preprint \url{https://arxiv.org/abs/2003.12133v2}.\cr
}
\author{
Dustin Stoltz
}
