% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/duplicate-count-colpair.R
\name{duplicate_count_colpair}
\alias{duplicate_count_colpair}
\title{Count duplicate values by column}
\usage{
duplicate_count_colpair(data, na.rm = TRUE, show_rates = TRUE)
}
\arguments{
\item{data}{Data frame.}

\item{na.rm}{Boolean. If \code{TRUE} (the default), any \code{NA} values in \code{data}'s
columns will be removed before checking for duplicates. This makes sure
that \code{NA} values in different columns will not be counted as duplicates of
each other.}

\item{show_rates}{Boolean. If \code{TRUE} (the default), adds columns \code{rate_x} and
\code{rate_y}. See value section. Set \code{show_rates} to \code{FALSE} for higher
performance.}
}
\value{
A tibble (data frame) with these columns ---
\itemize{
\item \code{x} and \code{y}: Each line contains a unique combination of \code{data}'s columns,
stored in the \code{x} and \code{y} output columns.
\item \code{count}: Number of "duplicates", i.e., values that are present in both \code{x}
and \code{y}.
\item \code{rate_x} and \code{rate_y} (added by default): \code{rate_x} is the proportion of \code{x}
values that are duplicated in \code{y}. Likewise, \code{rate_y} is the proportion of
\code{y} values that are duplicated in \code{x}. These two \verb{rate_*} columns will be
equal unless \code{NA} values are present.
}
}
\description{
\code{duplicate_count_colpair()} takes a data frame and checks each combination of
columns for duplicates. Results are presented in a tibble, ordered by the
number of duplicates.
}
\section{Summaries with \code{audit()}}{
 There is an S3 method for \code{audit()}, so
you can call \code{audit()} following \code{duplicate_count_colpair()} to get a
summary of \code{duplicate_count_colpair()}'s results. It is a tibble with a
single row and the columns below. If the tibble is too wide, call
\code{audit_list()} instead.
\itemize{
\item \code{n}: number of column pairs tested (index 1).
\item \code{count_min}, \code{count_max}, \code{count_mean}, \code{count_sd}, \code{count_median}:
Summary statistics of the duplicate \code{count} column (index 2 to 6).
\item \code{rate_x_min}, \code{rate_x_max}, \code{rate_x_mean}, \code{rate_x_sd}, \code{rate_x_median}:
Summary statistics of the \code{rate_x} column (index 7 to 11).
\item \code{rate_y_min}, \code{rate_y_max}, \code{rate_y_mean}, \code{rate_y_sd}, \code{rate_y_median}:
Summary statistics of the \code{rate_y} column (index 12 to 16).
}
}

\examples{
# Basic usage:
mtcars \%>\%
  duplicate_count_colpair()

# Summaries with `audit()`:
mtcars \%>\%
  duplicate_count_colpair() \%>\%
  audit()
}
\seealso{
\code{corrr::colpair_map()}, a versatile tool for pairwise column
analysis which the present function wraps.
}
