#' @title Structural Break Tests
#' @name structural_breaks
#' @description Functions for testing structural breaks in the price relationship.
NULL

#' Test for Structural Breaks
#'
#' Performs multiple structural break tests on the aggregated time series
#' relationship between direct and production prices.
#'
#' @param panel_data Data frame in panel format.
#' @param chow_years Integer vector of candidate break years for Chow test.
#'   Default NULL uses 1986, 1997, 2001, 2008 if present.
#' @param min_segment Integer minimum observations per segment. Default 10.
#'
#' @return A list containing:
#' \describe{
#'   \item{chow}{Chow test results for candidate years}
#'   \item{cusum}{CUSUM test results}
#'   \item{mosum}{MOSUM test results}
#'   \item{supf}{supremum F test results}
#'   \item{breakpoints}{Estimated breakpoint dates}
#'   \item{aggregated_data}{The aggregated time series used}
#' }
#'
#' @details
#' This function aggregates panel data to a single time series by taking
#' means across sectors, then applies various structural break tests from
#' the strucchange package.
#'
#' @examples
#' \donttest{
#' if (requireNamespace("strucchange", quietly = TRUE)) {
#'   set.seed(123)
#'   years <- 1980:2020
#'   panel <- data.frame(
#'     year = rep(years, 5),
#'     sector = rep(LETTERS[1:5], each = length(years)),
#'     log_direct = rnorm(length(years) * 5, 5, 0.5),
#'     log_production = rnorm(length(years) * 5, 5, 0.5)
#'   )
#'
#'   break_tests <- test_structural_breaks(panel)
#'   print(break_tests$cusum)
#' }
#' }
#'
#' @export
test_structural_breaks <- function(panel_data,
                                    chow_years = NULL,
                                    min_segment = 10L) {

    check_package("strucchange", "structural break tests")

    validate_panel_data(panel_data, require_log = TRUE)

    ts_data <- aggregate_to_timeseries(
        panel_data,
        vars = c("log_direct", "log_production")
    )
    ts_data <- ts_data[order(ts_data$year), ]

    n_obs <- nrow(ts_data)

    if (n_obs < 20L) {
        warning("Fewer than 20 observations; break test results may be unreliable.")
    }

    if (is.null(chow_years)) {
        candidate_years <- c(1986L, 1997L, 2001L, 2008L)
        chow_years <- candidate_years[candidate_years %in% ts_data$year]
    }

    chow_results <- run_chow_tests(ts_data, chow_years, min_segment)

    cusum_result <- run_cusum_test(ts_data)

    mosum_result <- run_mosum_test(ts_data)

    supf_result <- run_supf_test(ts_data)

    breakpoints <- extract_breakpoints(ts_data)

    list(
        chow = chow_results,
        cusum = cusum_result,
        mosum = mosum_result,
        supf = supf_result,
        breakpoints = breakpoints,
        aggregated_data = ts_data
    )
}


#' Run Chow Tests at Candidate Break Years
#'
#' Internal function to perform Chow tests.
#'
#' @param ts_data Aggregated time series data.
#' @param break_years Vector of candidate break years.
#' @param min_segment Minimum segment size.
#'
#' @return Data frame with Chow test results.
#'
#' @keywords internal
run_chow_tests <- function(ts_data, break_years, min_segment) {

    if (length(break_years) == 0L) {
        return(NULL)
    }

    base_fit <- stats::lm(
        log_production_mean ~ log_direct_mean,
        data = ts_data
    )
    ssr_full <- sum(stats::residuals(base_fit)^2)
    k <- length(stats::coef(base_fit))
    n <- nrow(ts_data)

    results <- list()

    for (yr in break_years) {

        if (!(yr %in% ts_data$year)) {
            next
        }

        idx_left <- which(ts_data$year <= yr)
        n_left <- length(idx_left)
        n_right <- n - n_left

        if (n_left < min_segment || n_right < min_segment) {
            next
        }

        df_left <- ts_data[idx_left, ]
        df_right <- ts_data[-idx_left, ]

        fit_left <- stats::lm(log_production_mean ~ log_direct_mean, data = df_left)
        fit_right <- stats::lm(log_production_mean ~ log_direct_mean, data = df_right)

        ssr_left <- sum(stats::residuals(fit_left)^2)
        ssr_right <- sum(stats::residuals(fit_right)^2)
        ssr_split <- ssr_left + ssr_right

        numerator <- (ssr_full - ssr_split) / k
        denominator <- ssr_split / (n_left + n_right - 2L * k)

        f_stat <- numerator / denominator
        p_value <- 1 - stats::pf(f_stat, df1 = k, df2 = (n_left + n_right - 2L * k))

        results[[length(results) + 1L]] <- data.frame(
            year_break = yr,
            n_left = n_left,
            n_right = n_right,
            F_statistic = f_stat,
            p_value = p_value,
            stringsAsFactors = FALSE
        )
    }

    if (length(results) == 0L) {
        return(NULL)
    }

    result_df <- do.call(rbind, results)
    result_df[order(result_df$p_value), ]
}


#' Run CUSUM Test
#'
#' Internal function to perform recursive CUSUM test.
#'
#' @param ts_data Aggregated time series data.
#'
#' @return List with test statistic and p-value.
#'
#' @keywords internal
run_cusum_test <- function(ts_data) {

    efp_cusum <- tryCatch(
        strucchange::efp(
            log_production_mean ~ log_direct_mean,
            data = ts_data,
            type = "Rec-CUSUM"
        ),
        error = function(e) NULL
    )

    if (is.null(efp_cusum)) {
        return(list(statistic = NA, p_value = NA, available = FALSE))
    }

    test_result <- tryCatch(
        strucchange::sctest(efp_cusum),
        error = function(e) NULL
    )

    if (is.null(test_result)) {
        return(list(statistic = NA, p_value = NA, available = FALSE))
    }

    list(
        statistic = as.numeric(test_result$statistic),
        p_value = test_result$p.value,
        available = TRUE
    )
}


#' Run MOSUM Test
#'
#' Internal function to perform OLS-MOSUM test.
#'
#' @param ts_data Aggregated time series data.
#'
#' @return List with test statistic and p-value.
#'
#' @keywords internal
run_mosum_test <- function(ts_data) {

    efp_mosum <- tryCatch(
        strucchange::efp(
            log_production_mean ~ log_direct_mean,
            data = ts_data,
            type = "OLS-MOSUM"
        ),
        error = function(e) NULL
    )

    if (is.null(efp_mosum)) {
        return(list(statistic = NA, p_value = NA, available = FALSE))
    }

    test_result <- tryCatch(
        strucchange::sctest(efp_mosum),
        error = function(e) NULL
    )

    if (is.null(test_result)) {
        return(list(statistic = NA, p_value = NA, available = FALSE))
    }

    list(
        statistic = as.numeric(test_result$statistic),
        p_value = test_result$p.value,
        available = TRUE
    )
}


#' Run Supremum F Test
#'
#' Internal function to perform supF test for unknown break date.
#'
#' @param ts_data Aggregated time series data.
#'
#' @return List with test statistic and p-value.
#'
#' @keywords internal
run_supf_test <- function(ts_data) {

    fstats <- tryCatch(
        strucchange::Fstats(
            log_production_mean ~ log_direct_mean,
            data = ts_data,
            from = 0.15
        ),
        error = function(e) NULL
    )

    if (is.null(fstats)) {
        return(list(statistic = NA, p_value = NA, available = FALSE))
    }

    test_result <- tryCatch(
        strucchange::sctest(fstats),
        error = function(e) NULL
    )

    if (is.null(test_result)) {
        return(list(statistic = NA, p_value = NA, available = FALSE))
    }

    list(
        statistic = as.numeric(test_result$statistic),
        p_value = test_result$p.value,
        available = TRUE
    )
}


#' Extract Estimated Breakpoints
#'
#' Internal function to estimate breakpoint locations.
#'
#' @param ts_data Aggregated time series data.
#'
#' @return Vector of estimated breakpoint years, or NULL.
#'
#' @keywords internal
extract_breakpoints <- function(ts_data) {

    bp <- tryCatch(
        strucchange::breakpoints(
            log_production_mean ~ log_direct_mean,
            data = ts_data,
            h = 0.15
        ),
        error = function(e) NULL
    )

    if (is.null(bp) || all(is.na(bp$breakpoints))) {
        return(NULL)
    }

    bp_indices <- bp$breakpoints
    bp_years <- ts_data$year[bp_indices]

    bp_years
}


#' Format Structural Break Results
#'
#' Creates a formatted summary of structural break test results.
#'
#' @param break_results Result from test_structural_breaks.
#' @param alpha Significance level for interpretation. Default 0.05.
#'
#' @return A data frame with formatted test summaries.
#'
#' @examples
#' \donttest{
#' if (requireNamespace("strucchange", quietly = TRUE)) {
#'   set.seed(123)
#'   years <- 1980:2020
#'   panel <- data.frame(
#'     year = rep(years, 5),
#'     sector = rep(LETTERS[1:5], each = length(years)),
#'     log_direct = rnorm(length(years) * 5, 5, 0.5),
#'     log_production = rnorm(length(years) * 5, 5, 0.5)
#'   )
#'
#'   break_tests <- test_structural_breaks(panel)
#'   summary_df <- format_break_results(break_tests)
#'   print(summary_df)
#' }
#' }
#'
#' @export
format_break_results <- function(break_results, alpha = 0.05) {

    results <- list()

    if (!is.null(break_results$chow) && nrow(break_results$chow) > 0L) {
        best_chow <- break_results$chow[1L, ]
        results$chow <- data.frame(
            test = "Chow",
            statistic = round(best_chow$F_statistic, 3),
            p_value = round(best_chow$p_value, 4),
            significant = best_chow$p_value < alpha,
            details = sprintf("Year: %d", best_chow$year_break),
            stringsAsFactors = FALSE
        )
    }

    if (!is.null(break_results$cusum) && break_results$cusum$available) {
        results$cusum <- data.frame(
            test = "CUSUM",
            statistic = round(break_results$cusum$statistic, 3),
            p_value = round(break_results$cusum$p_value, 4),
            significant = break_results$cusum$p_value < alpha,
            details = "",
            stringsAsFactors = FALSE
        )
    }

    if (!is.null(break_results$mosum) && break_results$mosum$available) {
        results$mosum <- data.frame(
            test = "MOSUM",
            statistic = round(break_results$mosum$statistic, 3),
            p_value = round(break_results$mosum$p_value, 4),
            significant = break_results$mosum$p_value < alpha,
            details = "",
            stringsAsFactors = FALSE
        )
    }

    if (!is.null(break_results$supf) && break_results$supf$available) {
        results$supf <- data.frame(
            test = "supF",
            statistic = round(break_results$supf$statistic, 3),
            p_value = round(break_results$supf$p_value, 4),
            significant = break_results$supf$p_value < alpha,
            details = "",
            stringsAsFactors = FALSE
        )
    }

    if (!is.null(break_results$breakpoints)) {
        bp_str <- paste(break_results$breakpoints, collapse = ", ")
        results$breakpoints <- data.frame(
            test = "Breakpoints",
            statistic = NA,
            p_value = NA,
            significant = NA,
            details = sprintf("Estimated: %s", bp_str),
            stringsAsFactors = FALSE
        )
    }

    if (length(results) == 0L) {
        return(data.frame(
            test = character(0),
            statistic = numeric(0),
            p_value = numeric(0),
            significant = logical(0),
            details = character(0)
        ))
    }

    do.call(rbind, results)
}


#' Interpret Break Test Results
#'
#' Provides textual interpretation of structural break tests.
#'
#' @param break_results Result from test_structural_breaks.
#' @param alpha Significance level. Default 0.05.
#'
#' @return Character string with interpretation.
#'
#' @examples
#' \donttest{
#' if (requireNamespace("strucchange", quietly = TRUE)) {
#'   set.seed(123)
#'   years <- 1980:2020
#'   panel <- data.frame(
#'     year = rep(years, 5),
#'     sector = rep(LETTERS[1:5], each = length(years)),
#'     log_direct = rnorm(length(years) * 5, 5, 0.5),
#'     log_production = rnorm(length(years) * 5, 5, 0.5)
#'   )
#'
#'   break_tests <- test_structural_breaks(panel)
#'   interpretation <- interpret_break_tests(break_tests)
#'   cat(interpretation)
#' }
#' }
#'
#' @export
interpret_break_tests <- function(break_results, alpha = 0.05) {

    messages <- character(0)

    n_significant <- 0L

    if (!is.null(break_results$cusum) && break_results$cusum$available) {
        if (break_results$cusum$p_value < alpha) {
            n_significant <- n_significant + 1L
            messages <- c(messages,
                          "CUSUM test rejects stability (systematic parameter drift detected).")
        }
    }

    if (!is.null(break_results$mosum) && break_results$mosum$available) {
        if (break_results$mosum$p_value < alpha) {
            n_significant <- n_significant + 1L
            messages <- c(messages,
                          "MOSUM test rejects stability (structural instability detected).")
        }
    }

    if (!is.null(break_results$supf) && break_results$supf$available) {
        if (break_results$supf$p_value < alpha) {
            n_significant <- n_significant + 1L
            messages <- c(messages,
                          "supF test suggests a structural break at unknown date.")
        }
    }

    if (!is.null(break_results$chow) && nrow(break_results$chow) > 0L) {
        sig_chow <- break_results$chow[break_results$chow$p_value < alpha, ]
        if (nrow(sig_chow) > 0L) {
            years_str <- paste(sig_chow$year_break, collapse = ", ")
            messages <- c(messages,
                          sprintf("Chow test significant at year(s): %s.", years_str))
        }
    }

    if (!is.null(break_results$breakpoints)) {
        bp_str <- paste(break_results$breakpoints, collapse = ", ")
        messages <- c(messages,
                      sprintf("Estimated breakpoint(s): %s.", bp_str))
    }

    if (n_significant == 0L && length(messages) == 0L) {
        return("No significant evidence of structural breaks detected.")
    }

    paste(messages, collapse = "\n")
}
