% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/model_aggregate.R
\name{model_aggregate}
\alias{model_aggregate}
\title{Hierarchical aggregation via model specification}
\usage{
model_aggregate(
  data,
  sum_vars = NULL,
  fun_vars = NULL,
  fun = NULL,
  hierarchies = NULL,
  formula = NULL,
  dim_var = NULL,
  remove_empty = NULL,
  preagg_var = NULL,
  dummy = TRUE,
  pre_aggregate = dummy,
  list_return = FALSE,
  pre_return = FALSE,
  verbose = TRUE,
  mm_args = NULL,
  ...
)
}
\arguments{
\item{data}{A data frame containing data to be aggregated}

\item{sum_vars}{Variables to be summed. This will be done via matrix multiplication.}

\item{fun_vars}{Variables to be aggregated by supplied functions.
This will be done via \code{\link{aggregate_multiple_fun}} and \code{\link{dummy_aggregate}} and
\code{fun_vars} is specified as the parameter \code{vars}.}

\item{fun}{The \code{fun}         parameter to \code{\link{aggregate_multiple_fun}}}

\item{hierarchies}{The \code{hierarchies} parameter to \code{\link{ModelMatrix}}}

\item{formula}{The \code{formula}     parameter to \code{\link{ModelMatrix}}}

\item{dim_var}{The \code{dimVar}      parameter to \code{\link{ModelMatrix}}}

\item{remove_empty}{When non-NULL, the \code{removeEmpty} parameter to \code{\link{ModelMatrix}}.
Thus, the actual default value is \code{TRUE} with formula input without hierarchy and
otherwise \code{FALSE} (see \code{\link{ModelMatrix}}).}

\item{preagg_var}{Extra variables to be used as grouping elements in the pre-aggregate step}

\item{dummy}{The \code{dummy}       parameter to \code{\link{dummy_aggregate}}.
When \code{TRUE}, only 0s and 1s are assumed in the generated model matrix.
When \code{FALSE}, non-0s in this matrix are passed as an additional first input parameter to the \code{fun} functions.}

\item{pre_aggregate}{Whether to pre-aggregate data to reduce the dimension of the model matrix.
Note that all original \code{fun_vars} observations are retained in the aggregated dataset and \code{pre_aggregate} does not affect the final result.
However, \code{pre_aggregate} must be set to \code{FALSE} when the \code{dummy_aggregate} parameter \code{dummy} is set to \code{FALSE}
since then \code{\link{unlist}} will not be run.
An exception to this is if the \code{fun} functions are written to handle list data.}

\item{list_return}{Whether to return a list of separate components including the model matrix \code{x}.}

\item{pre_return}{Whether to return the pre-aggregate data as a two-component list. Can also be combined with \code{list_return} (see examples).}

\item{verbose}{Whether to print information during calculations.}

\item{mm_args}{List of further arguments passed to \code{ModelMatrix}.}

\item{...}{Further arguments passed to \code{dummy_aggregate}.}
}
\value{
A data frame or a list.
}
\description{
Internally a dummy/model matrix is created according to the model specification.
This model matrix is used in the aggregation process via matrix multiplication and/or the function \code{\link{aggregate_multiple_fun}}.
}
\details{
With formula input, limited output can be achieved by \code{\link{formula_selection}} (see example).
An attribute called \code{startCol} has been added to the output data frame to make this functionality work.
}
\examples{
z <- SSBtoolsData("sprt_emp_withEU")
z$age[z$age == "Y15-29"] <- "young"
z$age[z$age == "Y30-64"] <- "old"
names(z)[names(z) == "ths_per"] <- "ths"
z$y <- 1:18

my_range <- function(x) c(min = min(x), max = max(x))

out <- model_aggregate(z, 
   formula = ~age:year + geo, 
   sum_vars = c("y", "ths"), 
   fun_vars = c(sum = "ths", mean = "y", med = "y", ra = "ths"), 
   fun = c(sum = sum, mean = mean, med = median, ra = my_range))

out

# Limited output can be achieved by formula_selection
formula_selection(out, ~geo)


# Using the single unnamed variable feature.
model_aggregate(z, formula = ~age, fun_vars = "y", 
                fun = c(sum = sum, mean = mean, med = median, n = length))


# To illustrate list_return and pre_return 
for (pre_return in c(FALSE, TRUE)) for (list_return in c(FALSE, TRUE)) {
  cat("\n=======================================\n")
  cat("list_return =", list_return, ", pre_return =", pre_return, "\n\n")
  out <- model_aggregate(z, formula = ~age:year, 
                         sum_vars = c("ths", "y"), 
                         fun_vars = c(mean = "y", ra = "y"), 
                         fun = c(mean = mean, ra = my_range), 
                         list_return = list_return,
                         pre_return = pre_return)
  cat("\n")
  print(out)
}


# To illustrate preagg_var 
model_aggregate(z, formula = ~age:year, 
sum_vars = c("ths", "y"), 
fun_vars = c(mean = "y", ra = "y"), 
fun = c(mean = mean, ra = my_range), 
preagg_var = "eu",
pre_return = TRUE)[["pre_data"]]


# To illustrate hierarchies 
geo_hier <- SSBtoolsData("sprt_emp_geoHier")
model_aggregate(z, hierarchies = list(age = "All", geo = geo_hier), 
                sum_vars = "y", 
                fun_vars = c(sum = "y"))

####  Special non-dummy cases illustrated below  ####

# Extend the hierarchy to make non-dummy model matrix  
geo_hier2 <- rbind(data.frame(mapsFrom = c("EU", "Spain"), 
                              mapsTo = "EUandSpain", sign = 1), geo_hier[, -4])

# Warning since non-dummy
# y and y_sum are different 
model_aggregate(z, hierarchies = list(age = "All", geo = geo_hier2), 
                sum_vars = "y", 
                fun_vars = c(sum = "y"))

# No warning since dummy since unionComplement = TRUE (see ?HierarchyCompute)
# y and y_sum are equal   
model_aggregate(z, hierarchies = list(age = "All", geo = geo_hier2), 
                sum_vars = "y", 
                fun_vars = c(sum = "y"),
                mm_args = list(unionComplement = TRUE))

# Non-dummy again, but no warning since dummy = FALSE
# Then pre_aggregate is by default set to FALSE (error when TRUE) 
# fun with extra argument needed (see ?dummy_aggregate)
# y and y_sum2 are equal
model_aggregate(z, hierarchies = list(age = "All", geo = geo_hier2), 
                sum_vars = "y", 
                fun_vars = c(sum2 = "y"),
                fun = c(sum2 = function(x, y) sum(x * y)),
                dummy = FALSE) 
                
}
