#' export
#'
#' Export andromeda generated by \link[TreatmentPatterns]{computePathways}
#' object to sharable csv-files and/or a zip archive.
#'
#' @export
#'
#' @template param_andromeda
#' @template param_outputPath
#' @template param_ageWindow
#' @template param_minCellCount
#' @template param_censorType
#' @template param_archiveName
#'
#' @return (`invisible(NULL)`)
#'
#' @examples
#' \donttest{
#' library(TreatmentPatterns)
#' library(CDMConnector)
#' library(dplyr)
#'
#' if (require("CirceR", character.only = TRUE, quietly = TRUE)) {
#'   withr::local_envvar(
#'     EUNOMIA_DATA_FOLDER = Sys.getenv("EUNOMIA_DATA_FOLDER", unset = tempfile())
#'   )
#'
#'   downloadEunomiaData(overwrite = TRUE)
#'
#'   con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir())
#'   cdm <- cdmFromCon(con, cdmSchema = "main", writeSchema = "main")
#'
#'   cohortSet <- readCohortSet(
#'     path = system.file(package = "TreatmentPatterns", "exampleCohorts")
#'   )
#'
#'   cdm <- generateCohortSet(
#'     cdm = cdm,
#'     cohortSet = cohortSet,
#'     name = "cohort_table"
#'   )
#'
#'   cohorts <- cohortSet %>%
#'     # Remove 'cohort' and 'json' columns
#'     select(-"cohort", -"json") %>%
#'     mutate(type = c("event", "event", "event", "event", "exit", "event", "event", "target")) %>%
#'     rename(
#'       cohortId = "cohort_definition_id",
#'       cohortName = "cohort_name",
#'     ) %>%
#'     select("cohortId", "cohortName", "type")
#'
#'   outputEnv <- computePathways(
#'     cohorts = cohorts,
#'     cohortTableName = "cohort_table",
#'     cdm = cdm
#'   )
#'
#'   export(
#'     andromeda = outputEnv,
#'     outputPath = tempdir()
#'   )
#'
#'   Andromeda::close(outputEnv)
#'   DBI::dbDisconnect(con, shutdown = TRUE)
#' }
#' }
export <- function(andromeda, outputPath, ageWindow = 10, minCellCount = 5, censorType = "minCellCount", archiveName = NULL) {
  collection <- checkmate::makeAssertCollection()
  checkmate::assertTRUE(Andromeda::isAndromeda(andromeda), add = collection)
  checkmate::assertPathForOutput(outputPath, overwrite = TRUE, add = collection)
  checkmate::assertIntegerish(ageWindow, min.len = 1, any.missing = FALSE, unique = TRUE, add = collection)
  checkmate::assertIntegerish(minCellCount, len = 1, lower = 1, add = collection)
  checkmate::assertChoice(censorType, choices = c("minCellCount", "remove", "mean"))
  checkmate::assertCharacter(archiveName, len = 1, add = collection, null.ok = TRUE)
  checkmate::reportAssertions(collection)
  
  nrows <- andromeda$treatmentHistory %>%
    dplyr::summarize(n()) %>%
    dplyr::pull()
  
  if (nrows == 0) {
    message("Treatment History table is empty. Nothing to export.")
    return(invisible(NULL))
  }
  
  if (!dir.exists(outputPath)) {
    dir.create(outputPath)
  }
  
  treatmentHistory <- andromeda$treatmentHistory %>%
    dplyr::collect() %>%
    dplyr::select(
      "personId", "indexYear", "age", "sex", "eventCohortName", "eventCohortId", "eventSeq", "durationEra")
  
  treatmentHistory <- dplyr::bind_rows(
    treatmentHistory,
    getFilteredSubjects(andromeda)
  )
  
  # metadata
  metadataPath <- file.path(outputPath, "metadata.csv")
  message(sprintf("Writing metadata to %s", metadataPath))
  metadata <- andromeda$metadata %>% dplyr::collect()
  write.csv(metadata, file = metadataPath, row.names = FALSE)
  
  # Treatment Pathways
  treatmentPathwaysPath <- file.path(outputPath, "treatmentPathways.csv")
  message(sprintf("Writing treatmentPathways to %s", treatmentPathwaysPath))
  treatmentPathways <- computeTreatmentPathways(
    treatmentHistory,
    ageWindow,
    minCellCount,
    censorType
  ) %>% dplyr::distinct()
  
  write.csv(treatmentPathways, file = treatmentPathwaysPath, row.names = FALSE)
  
  # Summary statistics duration
  statsTherapyPath <- file.path(outputPath, "summaryStatsTherapyDuration.csv")
  message(sprintf("Writing summaryStatsTherapyDuration to %s", statsTherapyPath))
  statsTherapy <- computeStatsTherapy(treatmentHistory)
  write.csv(statsTherapy, file = statsTherapyPath, row.names = FALSE)
  
  # Counts
  counts <- computeCounts(treatmentHistory, minCellCount)
  
  countsYearPath <- file.path(outputPath, "countsYear.csv")
  message(sprintf("Writing countsYearPath to %s", countsYearPath))
  write.csv(counts$year, file = countsYearPath, row.names = FALSE)
  
  countsAgePath <- file.path(outputPath, "countsAge.csv")
  message(sprintf("Writing countsAgePath to %s", countsAgePath))
  write.csv(counts$age, file = countsAgePath, row.names = FALSE)
  
  countsSexPath <- file.path(outputPath, "countsSex.csv")
  message(sprintf("Writing countsSexPath to %s", countsSexPath))
  write.csv(counts$sex, file = countsSexPath, row.names = FALSE)
  
  if (!is.null(archiveName)) {
    zipPath <- file.path(outputPath, archiveName)
    
    message(sprintf("Zipping files to %s", zipPath))
    
    utils::zip(
      zipfile = zipPath,
      files = c(
        treatmentPathwaysPath,
        countsYearPath,
        countsAgePath,
        countsSexPath,
        statsTherapyPath
      ),
      flags = "-j"
    )
  }
  return(invisible(NULL))
}

#' computeStatsTherapy
#' 
#' @noRd
#'
#' @template param_treatmentHistory
#'
#' @return (`data.frame()`)
computeStatsTherapy <- function(treatmentHistory) {
  stats <- treatmentHistory %>%
    mutate(treatmentType = dplyr::case_when(
      nchar(.data$eventCohortId) > 1 ~ "combination",
      .default = "monotherapy"
    )) %>%
    dplyr::group_by(.data$treatmentType) %>%
    dplyr::summarise(
      avgDuration = mean(.data$durationEra, na.rm = TRUE),
      medianDuration = stats::median(.data$durationEra, na.rm = TRUE),
      sd = stats::sd(.data$durationEra, na.rm = TRUE),
      min = min(.data$durationEra, na.rm = TRUE),
      max = max(.data$durationEra, na.rm = TRUE),
      count = n()
    )
  
  return(stats)
}

countYear <- function(treatmentHistory, minCellCount) {
  treatmentHistory %>%
    dplyr::group_by(.data$personId) %>%
    dplyr::slice(which.min(.data$indexYear)) %>%
    dplyr::group_by(.data$indexYear) %>%
    dplyr::count() %>%
    dplyr::ungroup() %>%
    dplyr::mutate(n = case_when(
      .data$n < minCellCount ~ sprintf("<%s", minCellCount),
      .default = as.character(.data$n)
    ))
}

countSex <- function(treatmentHistory, minCellCount) {
  treatmentHistory %>%
    dplyr::group_by(.data$personId) %>%
    dplyr::slice(which.min(.data$indexYear)) %>%
    dplyr::group_by(.data$sex) %>%
    dplyr::count() %>%
    dplyr::ungroup() %>%
    dplyr::mutate(n = case_when(
      .data$n < minCellCount ~ sprintf("<%s", minCellCount),
      .default = as.character(.data$n)
    ))
}

countAge <- function(treatmentHistory, minCellCount) {
  treatmentHistory %>%
    dplyr::group_by(.data$personId) %>%
    dplyr::slice(which.min(.data$indexYear)) %>%
    dplyr::group_by(.data$age) %>%
    dplyr::count() %>%
    dplyr::ungroup() %>%
    dplyr::mutate(n = case_when(
      .data$n < minCellCount ~ sprintf("<%s", minCellCount),
      .default = as.character(.data$n)
    ))
}

#' computeCounts
#'
#' @noRd
#'
#' @template param_treatmentHistory
#' @template param_minCellCount
#'
#' @return (`list()`)
computeCounts <- function(treatmentHistory, minCellCount) {
  # n per Year
  list(
    year = countYear(treatmentHistory, minCellCount),
    age = countAge(treatmentHistory, minCellCount),
    sex = countSex(treatmentHistory, minCellCount)
  )
}

#' censorminCellCount
#' @param treatmentPathways data.frame()
#' @param minCellCount numeric(1)
#' 
#' @noRd
censorminCellCount <- function(treatmentPathways, minCellCount) {
  treatmentPathways %>%
    dplyr::mutate(freq = dplyr::case_when(
      .data$freq >= minCellCount ~ .data$freq,
      .data$freq < minCellCount ~ minCellCount,
      .default = .data$freq))
}

#' censorRemove
#' @param treatmentPathways data.frame()
#' @param minCellCount numeric(1)
#' 
#' @noRd
censorRemove <- function(treatmentPathways, minCellCount) {
  treatmentPathways %>%
    dplyr::filter(.data$freq >= minCellCount)
}

#' censorRemove
#' @param treatmentPathways data.frame()
#' @param minCellCount numeric(1)
#' @param meanCount numeric(1)
#' 
#' @noRd
censorMean <- function(treatmentPathways, minCellCount) {
  meanFreq <- treatmentPathways %>%
    dplyr::filter(.data$freq < minCellCount) %>%
    dplyr::pull(.data$freq) %>%
    mean() %>%
    round()
  
  treatmentPathways %>%
    dplyr::mutate(freq = dplyr::case_when(
      .data$freq >= minCellCount ~ .data$freq,
      .data$freq < minCellCount ~ meanFreq,
      .default = .data$freq))
}

#' censorData
#' @param treatmentPathways data.frame()
#' @param minCellCount numeric(1)
#' @param censorType character(1)
#' 
#' @noRd
censorData <- function(treatmentPathways, minCellCount, censorType) {
  nCensored <- treatmentPathways %>%
    dplyr::filter(.data$freq < minCellCount) %>%
    nrow()
  
  treatmentPathways <- switch(
    censorType,
    "minCellCount" = {
      message(sprintf("Censoring %s pathways with a frequency <%s to %s.", nCensored, minCellCount, minCellCount))
      censorminCellCount(treatmentPathways, minCellCount)
    },
    "remove" = {
      message(sprintf("Removing %s pathways with a frequency <%s.", nCensored, minCellCount))
      censorRemove(treatmentPathways, minCellCount)
    },
    "mean" = {
      message(sprintf("Censoring %s pathways with a frequency <%s to mean.", nCensored, minCellCount))
      censorMean(treatmentPathways, minCellCount)
    })
  return(treatmentPathways)
}

#' makeAgeWindow
#' 
#' @param ageWindow numeric(n)
#' 
#' @noRd
makeAgeWindow <- function(ageWindow) {
  if (length(ageWindow) > 1) {
    return(ageWindow)
  } else {
    return(seq(0, 150, ageWindow))
  }
}

#' groupByAgeWindow
#' 
#' @param treatmentHistory data.frame()
#' @param ageWindow numeric(n)
#'
#' @noRd
groupByAgeWindow <- function(treatmentHistory, ageWindow) {
  treatmentHistory %>%
    dplyr::rowwise() %>%
    dplyr::mutate(
      ageBin = paste(
        unlist(stringr::str_extract_all(as.character(cut(.data$age, makeAgeWindow(ageWindow))), "\\d+")),
        collapse = "-"
      )
    )
}

#' computeTreatmentPathways
#'
#' @param treatmentHistory data.frame()
#' @param ageWindow numeric(n)
#' @param minCellCount numeric(1)
#' @param censorType character(1)
#'
#' @return (`data.frame()`)
#' 
#' @noRd
computeTreatmentPathways <- function(treatmentHistory, ageWindow, minCellCount, censorType) {
  treatmentPathways <- groupByAgeWindow(treatmentHistory, ageWindow)
  
  treatmentPathways <- treatmentPathways %>%
    dplyr::mutate(indexYear = as.character(.data$indexYear))
  
  treatmentPathways <- stratisfy(treatmentPathways)
  
  treatmentPathways[is.na(treatmentPathways)] <- "all"
  
  treatmentPathways <- censorData(treatmentPathways, minCellCount, censorType)
  
  treatmentPathways$path[treatmentPathways$path == "NA"] <- "None"
  
  return(treatmentPathways)
}

collapsePaths <- function(treatmentHistory) {
  treatmentHistory %>%
    dplyr::group_by(.data$personId, .data$indexYear) %>%
    dplyr::mutate(
      pathway = list(.data$eventCohortName[.data$eventSeq]),
      .groups = "drop"
    ) %>%
    dplyr::ungroup() %>%
    dplyr::group_by(.data$indexYear, .data$pathway) %>%
    dplyr::mutate(freq = length(.data$personId), .groups = "drop") %>%
    ungroup() %>%
    rowwise() %>%
    mutate(path = paste(.data$pathway, collapse = "-")) %>%
    dplyr::group_by(.data$personId) %>%
    dplyr::slice(which.min(.data$indexYear))
}

stratisfyAgeSexYear <- function(treatmentHistory) {
  collapsePaths(treatmentHistory) %>%
    group_by(.data$path, .data$ageBin, .data$sex, .data$indexYear) %>%
    summarise(freq = n(), .groups = "drop") %>%
    mutate(
      indexYear = as.character(.data$indexYear)
    )
}

# All
stratAll <- function(treatmentPathways) {
  treatmentPathways %>%
    group_by(path) %>%
    summarize(freq = sum(freq)) %>%
    mutate(indexYear = "all", sex = "all", ageBin = "all")
}

# sex
stratSex <- function(treatmentPathways) {
  dplyr::bind_rows(
    treatmentPathways %>%
      group_by(.data$path, .data$indexYear, .data$ageBin) %>%
      summarize(freq = sum(.data$freq), .groups = "drop") %>%
      mutate(sex = "all"),
    treatmentPathways %>%
      group_by(.data$path, .data$ageBin) %>%
      summarize(freq = sum(.data$freq), .groups = "drop") %>%
      mutate(sex = "all", indexYear = "all"),
    treatmentPathways %>%
      group_by(.data$path, .data$indexYear) %>%
      summarize(freq = sum(.data$freq), .groups = "drop") %>%
      mutate(sex = "all", ageBin = "all")
  )
}

stratAgeBin <- function(treatmentPathways) {
  dplyr::bind_rows(
    treatmentPathways %>%
      group_by(.data$path, .data$indexYear, .data$sex) %>%
      summarize(freq = sum(.data$freq), .groups = "drop") %>%
      mutate(ageBin = "all"),
    treatmentPathways %>%
      group_by(.data$path, .data$sex) %>%
      summarize(freq = sum(.data$freq), .groups = "drop") %>%
      mutate(ageBin = "all", indexYear = "all"),
    treatmentPathways %>%
      group_by(.data$path, .data$indexYear) %>%
      summarize(freq = sum(.data$freq), .groups = "drop") %>%
      mutate(ageBin = "all", sex = "all")
  )
}

stratIndexYear <- function(treatmentPathways) {
  dplyr::bind_rows(
    treatmentPathways %>%
      group_by(.data$path, .data$sex, .data$ageBin) %>%
      summarize(freq = sum(.data$freq), .groups = "drop") %>%
      mutate(indexYear = "all"),
    treatmentPathways %>%
      group_by(.data$path, .data$ageBin) %>%
      summarize(freq = sum(.data$freq), .groups = "drop") %>%
      mutate(sex = "all", indexYear = "all"),
    treatmentPathways %>%
      group_by(.data$path, .data$sex) %>%
      summarize(freq = sum(.data$freq), .groups = "drop") %>%
      mutate(indexYear = "all", ageBin = "all")
  )
}

stratisfy <- function(treatmentHistory) {
  treatmentPathways <- stratisfyAgeSexYear(treatmentHistory)
  dplyr::bind_rows(
    treatmentPathways,
    stratAll(treatmentPathways),
    stratAgeBin(treatmentPathways),
    stratSex(treatmentPathways),
    stratIndexYear(treatmentPathways)
  ) %>%
    mutate(sex = tolower(.data$sex)) %>%
    rename(age = "ageBin") %>%
    relocate("path", "freq", "age", "sex", "indexYear")
}

#' getFilteredSubjects
#' 
#' @noRd
#' 
#' @param andromeda andromeda
#' 
#' @return data.frame()
getFilteredSubjects <- function(andromeda) {
  targetCohortId <- andromeda$cohorts %>%
    dplyr::filter(.data$type == "target") %>%
    dplyr::pull(.data$cohortId)
  
  out <- andromeda$currentCohorts %>%
    dplyr::anti_join(andromeda$treatmentHistory, join_by(personId == personId)) %>%
    dplyr::filter(.data$cohortId == targetCohortId) %>%
    dplyr::mutate(
      indexYear = floor(.data$startDate / 365.25) + 1970,
      eventCohortName = "None",
      eventCohortId = "-1",
      durationEra = 0,
      eventSeq = 1) %>%
    dplyr::select("personId", "indexYear", "age", "sex", "eventCohortName", "eventCohortId", "eventSeq") %>%
    dplyr::collect()
  
  if (nrow(out) == 0) {
    return(NULL)
  } else {
    return(out)
  }
}
