% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_transcript.R
\name{as_transcript}
\alias{as_transcript}
\title{Coerce Text toTranscripts Into R}
\usage{
as_transcript(text, person.regex = NULL, col.names = c("Person",
  "Dialogue"), text.var = NULL, merge.broke.tot = TRUE,
  header = FALSE, dash = "", ellipsis = "...",
  quote2bracket = FALSE, rm.empty.rows = TRUE, na = "", sep = NULL,
  skip = 0, comment.char = "", max.person.nchar = 20, ...)
}
\arguments{
\item{text}{Character string: if file is not supplied and this is, then data
are read from the value of text. Notice that a literal string can be used to
include (small) data sets within R code.}

\item{person.regex}{A capturing regex describing what is a person portion of
a string.}

\item{col.names}{A character vector specifying the column names of the
transcript columns.}

\item{text.var}{A character string specifying the name of the text variable
will ensure that variable is classed as character.  If \code{NULL}
\code{\link[textreadr]{read_transcript}} attempts to guess the text.variable
(dialogue).}

\item{merge.broke.tot}{logical.  If \code{TRUE} and if the file being read in
is .docx with broken space between a single turn of talk read_transcript
will attempt to merge these into a single turn of talk.}

\item{header}{logical.  If \code{TRUE} the file contains the names of the
variables as its first line.}

\item{dash}{A character string to replace the en and em dashes special
characters (default is to remove).}

\item{ellipsis}{A character string to replace the ellipsis special characters.}

\item{quote2bracket}{logical. If \code{TRUE} replaces curly quotes with curly
braces (default is \code{FALSE}).  If \code{FALSE} curly quotes are removed.}

\item{rm.empty.rows}{logical.  If \code{TRUE}
\code{\link[textreadr]{read_transcript}}  attempts to remove empty rows.}

\item{na}{A character string to be interpreted as an \code{NA} value.}

\item{sep}{The field separator character. Values on each line of the file are
separated by this character.  The default of \code{NULL} instructs
\code{\link[textreadr]{read_transcript}} to use a separator suitable for the file
type being read in.}

\item{skip}{Integer; the number of lines of the data file to skip before
beginning to read data.}

\item{comment.char}{A character vector of length one containing a single
character or an empty string. Use \code{""} to turn off the interpretation of
comments altogether.}

\item{max.person.nchar}{The max number of characters long names are expected
to be.  This information is used to warn the user if a separator appears beyond
this length in the text.}

\item{\ldots}{Further arguments to be passed to \code{\link[utils]{read.table}},
\code{\link[readxl]{read_excel}}, or \code{\link[textreadr]{read_doc}}.}
}
\value{
Returns a dataframe of dialogue and people.
}
\description{
Coerce text into a transcript.
}
\examples{
## EXAMPLE 1
as_transcript("34    The New York Times reports a lot of words here.
12    Greenwire reports a lot of words.
31    Only three words.
 2    The Financial Times reports a lot of words.
 9    Greenwire short.
13    The New York Times reports a lot of words again.",
    col.names = c("NO", "ARTICLE"), sep = "   ")

## EXAMPLE 2
as_transcript("34..    The New York Times reports a lot of words here.
12..    Greenwire reports a lot of words.
31..    Only three words.
 2..    The Financial Times reports a lot of words.
 9..    Greenwire short.
13..    The New York Times reports a lot of words again.",
    col.names = c("NO", "ARTICLE"), sep = "\\\\.\\\\.")

## EXAMPLE 3
as_transcript("JAKE The New York Times reports a lot of words here.
JIM Greenwire reports a lot of words.
JILL Only three words.
GRACE The Financial Times reports a lot of words.
JIM Greenwire short.
JILL The New York Times reports a lot of words again.",
   person.regex = '(^[A-Z]{3,})'
)
}
