% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_dir_transcript.R
\name{read_dir_transcript}
\alias{read_dir_transcript}
\title{Read In Multiple Transcript Files From a Directory}
\usage{
read_dir_transcript(path, col.names = c("Document", "Person", "Dialogue"),
  pattern = NULL, all.files = FALSE, recursive = FALSE, skip = 0,
  merge.broke.tot = TRUE, header = FALSE, dash = "", ellipsis = "...",
  quote2bracket = FALSE, rm.empty.rows = TRUE, na = "", sep = NULL,
  comment.char = "", max.person.nchar = 20, ignore.case = FALSE, ...)
}
\arguments{
\item{path}{Path to the directory.}

\item{col.names}{A character vector specifying the column names of the
transcript columns (document, person, dialogue).}

\item{pattern}{An optional regular expression. Only file names which match
the regular expression will be returned.}

\item{all.files}{Logical.   If \code{FALSE}, only the names of visible files
are returned. If \code{TRUE}, all file names will be returned.}

\item{recursive}{Logical. Should the listing recurse into directories?}

\item{skip}{Integer; the number of lines of the data file to skip before
beginning to read data.}

\item{merge.broke.tot}{logical.  If \code{TRUE} and if the file being read in
is .docx with broken space between a single turn of talk read_transcript
will attempt to merge these into a single turn of talk.}

\item{header}{logical.  If \code{TRUE} the file contains the names of the
variables as its first line.}

\item{dash}{A character string to replace the en and em dashes special
characters (default is to remove).}

\item{ellipsis}{A character string to replace the ellipsis special characters.}

\item{quote2bracket}{logical. If \code{TRUE} replaces curly quotes with curly
braces (default is \code{FALSE}).  If \code{FALSE} curly quotes are removed.}

\item{rm.empty.rows}{logical.  If \code{TRUE}
\code{\link[textreadr]{read_transcript}}  attempts to remove empty rows.}

\item{na}{A character string to be interpreted as an \code{NA} value.}

\item{sep}{The field separator character. Values on each line of the file are
separated by this character.  The default of \code{NULL} instructs
\code{\link[textreadr]{read_transcript}} to use a separator suitable for the file
type being read in.}

\item{comment.char}{A character vector of length one containing a single
character or an empty string. Use \code{""} to turn off the interpretation of
comments altogether.}

\item{max.person.nchar}{The max number of characters long names are expected
to be.  This information is used to warn the user if a separator appears beyond
this length in the text.}

\item{ignore.case}{logical.  If \code{TRUE} case in the \code{pattern} argument
will be ignored.}

\item{\ldots}{ignored.}
}
\value{
Returns a dataframe of documents, dialogue, and people.
}
\description{
Read in multiple transcript files from a directory and create a
\code{\link[base]{data.frame}}.
}
\examples{
skips <- c(0, 1, 1, 0, 0, 1)
path <- system.file("docs/transcripts", package = 'textreadr')
textreadr::peek(read_dir_transcript(path, skip = skips), Inf)

\dontrun{
## with additional  cleaning
library(tidyverse, textshape, textclean)

path \%>\%
    read_dir_transcript(skip = skips) \%>\%
    textclean::filter_row("Person", "^\\\\[") \%>\%
    mutate(
        Person = stringi::stri_replace_all_regex(Person, "(^/\\\\s*)|(:\\\\s*$)", "") \%>\%
            trimws(),
        Dialogue = stringi::stri_replace_all_regex(Dialogue, "(^/\\\\s*)", "")
    ) \%>\%
    peek(Inf)
}
}
\seealso{
read_transcript
}
