% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/string-cleaning.R
\name{diagnose_strings}
\alias{diagnose_strings}
\title{Diagnose String Column Quality}
\usage{
diagnose_strings(x, name = NULL)
}
\arguments{
\item{x}{Character vector to diagnose.}

\item{name}{Optional name for the variable (used in output). If NULL,
attempts to capture the variable name from the call.}
}
\value{
An S3 object of class \code{diagnose_strings} containing:
\describe{
\item{name}{Name of the variable}
\item{n_total}{Total number of elements}
\item{n_na}{Count of NA values}
\item{n_empty}{Count of empty strings ("")}
\item{n_whitespace_only}{Count of strings containing only whitespace}
\item{n_leading_ws}{Count of non-empty strings with leading whitespace}
\item{n_trailing_ws}{Count of non-empty strings with trailing whitespace}
\item{n_non_ascii}{Count of strings containing non-ASCII characters}
\item{n_case_variants}{Number of unique values that have case variants}
\item{case_variant_groups}{Number of groups of case-insensitive duplicates}
\item{case_variant_examples}{data.table with examples of case variants}
}
}
\description{
Audits a character vector for common data quality issues including missing
values, empty strings, whitespace problems, non-ASCII characters, and case
inconsistencies. Useful for understanding string data before cleaning.
}
\examples{
library(data.table)
firms <- c("Apple", "APPLE", "apple", "  Microsoft ", "Google", NA, "")
diagnose_strings(firms)

}
\seealso{
\code{\link[=audit_clean]{audit_clean()}} for auditing the effect of cleaning functions,
\code{\link[=diagnose_nas]{diagnose_nas()}} for missing value diagnostics
}
