% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data_cleaning.R
\name{clean_data}
\alias{clean_data}
\title{Clean and Optionally Aggregate Environmental Data}
\usage{
clean_data(env_data, station, aggregate_daily = FALSE)
}
\arguments{
\item{env_data}{A data table in long format.
Must include columns:
\describe{
\item{Station}{Station identifier for the data.}
\item{Komponente}{Measured environmental component e.g. temperature, NO2.}
\item{Wert}{Measured value.}
\item{date}{Timestamp as Date-Time object (\verb{YYYY-MM-DD HH:MM:SS} format).}
\item{Komponente_txt}{Textual description of the component.}
}}

\item{station}{Character. Name of the station to filter by.}

\item{aggregate_daily}{Logical. If \code{TRUE}, aggregates data to daily mean values. Default is \code{FALSE}.}
}
\value{
A \code{data.table}:
\itemize{
\item If \code{aggregate_daily = TRUE}: Contains columns for station, component, day, year,
and the daily mean value of the measurements.
\item If \code{aggregate_daily = FALSE}: Contains cleaned data with duplicates removed.
}
}
\description{
Cleans a data table of environmental measurements by filtering for a specific
station, removing duplicates, and optionally aggregating the data on a daily
basis using the mean.
}
\details{
Duplicate rows (by \code{date}, \code{Komponente}, and \code{Station}) are removed. A warning is issued
if duplicates are found.
}
\examples{
# Example data
env_data <- data.table::data.table(
  Station = c("DENW094", "DENW094", "DENW006", "DENW094"),
  Komponente = c("NO2", "O3", "NO2", "NO2"),
  Wert = c(45, 30, 50, 40),
  date = as.POSIXct(c(
    "2023-01-01 08:00:00", "2023-01-01 09:00:00",
    "2023-01-01 08:00:00", "2023-01-02 08:00:00"
  )),
  Komponente_txt = c(
    "Nitrogen Dioxide", "Ozone", "Nitrogen Dioxide", "Nitrogen Dioxide"
  )
)

# Clean data for StationA without aggregation
cleaned_data <- clean_data(env_data, station = "DENW094", aggregate_daily = FALSE)
print(cleaned_data)
}
