# Dependent version of Cliff's d-statistic
#   weighted version of Cliff's dependent-groups d: Cliff_dependent().
#   If using tables then Cliff_compute_from_table()

#' Computes Cliff's dependent d-statistics based on a dominance matrix.
#'
#' Takes the dominance matrix provided and computes the d-statistics:
#' dw - within-subjects d-statistic
#' db - between-subjects d-statistic
#' db_dw - sum of dw and db, omnibus test of whether one group is higher than the other
#' Cliff, N. (1993). Dominance statistics: Ordinal analyses to answer ordinal questions.
#' Psychological Bulletin, 114(3), 494-509.
#' Cliff, N. (1996). Ordinal methods for behavioral data analysis.  Mawhaw NJ: Lawerence Erlbaum.
#' @importFrom stats var
#' @importFrom stats cov
#' @param d_matrix N x N within-subjects dominance matrix
#' @returns a list containing
#'    dw: within-subjects d-statistic
#'    sigma_dw: SE of dw
#'    z_dw: z-score for dw
#'    db: between-subjects d-statistic
#'    sigma_db: SE of db
#'    z_db: z-score for db
#'    db_dw: sum db + dw, omnibus measure
#'    sigma_db_dw: SE of db + dw
#'    z_db_dw: z-score of db _ dw
#'    cov_db_dw: covariance between db and dw
#' @export
#' @examples
#' Cliff_dependent(interference_control_1)
Cliff_dependent <- function(d_matrix) {
  n = nrow(d_matrix)
  m = ncol(d_matrix)
  if (m != n) {
    stop(paste("d-matrix must be square, nrows =", n, ", ncols =", m))
  }

  dii <- vector("numeric", n)
  dx <- vector("numeric", n)
  dy <- vector("numeric", n)
  d_total <- 0
  dii_total <- 0
  three_way <- 0
  for (i in 1:n) {
    dii[i] <- d_matrix[i, i]
    dii_total <- dii_total + dii[i]
    dx[i] <- 0
    dy[i] <- 0
    for (h in 1:n) {
      d_total <- d_total + d_matrix[i, h]
      dx[i] <- dx[i] + d_matrix[i, h]
      dy[i] <- dy[i] + d_matrix[h, i]
    }
    three_way <- three_way + (dx[i] + dy[i]) * dii[i]
  }

  dw <- dii_total / n
  db <- (d_total - dii_total) / (n * (n - 1))

  dx_star <-vector("numeric", 0)
  dy_star <- vector("numeric", 0)
  dij_star <- matrix(nrow=n, ncol=m)
  dx2 <- 0
  dy2 <- 0
  dxy <- 0
  d_ij2 <- 0
  for (i in 1:n) {
    dx_star[i] <- dx[i]/n - db
    dy_star[i] <- dy[i]/n - db
    dx2 <- dx2 + dx_star[i]^2
    dy2 <- dy2 + dy_star[i]^2
    dxy <- dxy + dx_star[i] * dy_star[i]
    for (j in 1:n) {
      dij_star[i, j] <- d_matrix[i, j] - db
      d_ij2 <- d_ij2 + dij_star[i, j] * dij_star[i, j]
    }
  }

  d_ij_ji <- 0
  dij_ji <- vector("numeric", n * n)
  index = 1
  for (i in 1:n) {
    for (j in 1:n) {
      d_ij_ji <- d_ij_ji + dij_star[i, j] * dij_star[j, i]
      dij_ji[index] <- dij_star[i, j] * dij_star[j, i]
      index <- index + 1
    }
  }

  var_dw <- var(dii) / n
  s_dw <- sqrt(var_dw)
  z_dw <- dw / s_dw

  var_db <- (((n - 1)^2 *
               (dx2 + dy2 + 2 * dxy) - d_ij2 - d_ij_ji) /
            (n * (n - 1) * (n - 2) * (n - 3)))

  min_value <- (1.0 - db^2) / (n^2 - 1)
  if (is.na(var_db) || var_db < min_value) {
    var_db <- min_value
  }
  s_db <- sqrt(var_db)
  z_db <- db / s_db

  cov_dw_db <- (cov(dii - db, dx / n) + cov(dii - db, dy / n)) / n

  var_dw_db <- var_dw + var_db + 2 * cov_dw_db
  s_dw_db <- sqrt(var_dw_db)
  z_dw_db <- (dw + db) / s_dw_db

  list(dw=dw, sigma_dw=s_dw, z_dw=z_dw,
       db=db, sigma_db=s_db, z_db=z_db,
       db_dw=dw+db, sigma_db_dw=s_dw_db, z_db_dw=z_dw_db,
       cov_db_dw=cov_dw_db)
}

#' Converts two vectors containing scores and integer frequencies (cell counts) into a d-matrix
#'
#' @param scores vector of scores, typically 1:r
#' @param cells vector of integer weights, i.e. cell frequencies
#' @param nrow number of score categories in table. Default is NULL.
#' If NULL, takes 1:length(scores)
#' @returns d-matrix of results
#' @export
Cliff_as_d_matrix <- function(scores, cells, nrow=NULL) {
  if (is.null(nrow)) {
    nrow <- length(scores)
  }
  n <- sum(cells)
  x_scores <- vector("numeric", n)
  y_scores <- vector("numeric", n)
  index <- 0
  for (i in 1:nrow) {
    for (j in scores) {
      cell <- cells[i, j]
      if (cell > 0) {
        for (k in 1:cell) {
          index <- index + 1
          x_scores[index] <- i
          y_scores[index] <- j
        }
      }
    }
  }

  d_matrix <- matrix(nrow=n, ncol=n)
  for (i in seq_along(x_scores)) {
    for (j in seq_along(y_scores)) {
      d_matrix[i, j] <- sign(x_scores[i] - y_scores[j])
    }
  }
  d_matrix
}

#' Computes Cliff's dependent d-statistics based on a dominance matrix.
#'
#' Takes the dominance matrix provided and computes the d-statistics:
#' dw - within-subjects d-statistic
#' db - between-subjects d-statistic
#' db_dw - sum of db and dw, omnibus test of whether one group is higher than the other
#' Cliff, N. (1993). Dominance statistics: Ordinal analyses to answer ordinal questions.
#' Psychological Bulletin, 114(3), 494-509.
#' Cliff, N. (1996). Ordinal methods for behavioral data analysis.  Mawhaw NJ: Lawerence-Erlbaum.
#' @importFrom stats var
#' @importFrom stats cov
#' @param d_matrix N x N within-subjects dominance matrix
#' @returns a list containing
#'    dw: within-subjects d-statistic
#'    sigma_dw: SE of dw
#'    z_dw: z-score for dw
#'    db: between-susbjects d-statistic
#'    sigma_db: SE of db
#'    z_db: z-score for db
#'    db_dw: sum db + dw, omnibus measure
#'    sigma_db_dw: SE of db + dw
#'    z_db_dw: z-score of db _ dw
#'    cov_db_dw: covariance between db and dw
#' @export
#' @examples
#' Cliff_dependent_compute_from_matrix(interference_control_1)
Cliff_dependent_compute_from_matrix  <- function(d_matrix) {
  nx <- nrow(d_matrix)
  ny <- ncol(d_matrix)
  if (nx != ny) {
    stop(paste("d-matrix must be square, nrows =", nx, ", ncols =", ny))
  }
  maxVal <- max(d_matrix)
  minVal <- min(d_matrix)
  if (minVal < -1 || 1 < maxVal) {
    stop(paste("the matrix is not a valid dominance matrix\n", d_matrix))
  }

  n <- nx

  dx <- rowSums(d_matrix) / n
  dy <- colSums(d_matrix) / n
  d <- sum(d_matrix) / n^2
  dii <- diag(d_matrix)
  dw_bar <- mean(dii)
  var_dw <- var(dii) / n
  sigma_dw <- sqrt(var_dw)

  db_bar <- sum(sum(d_matrix) - sum(dii)) / (n * (n - 1))
  sum_dx2 <- sum((dx - db_bar)^2)
  sum_dy2 <- sum((dy - db_bar)^2)
  sum_dx_dy <- sum((dx - db_bar)*(dy - db_bar))
  sum_squares <- sum((d_matrix - db_bar)^2)
  sum_products <- 0.0
  for (i in 1:n) {
    for (j in 1:n) {
      sum_products <- sum_products +
        (d_matrix[i, j] - db_bar) * (d_matrix[j, i] - db_bar)
    }
  }
  var_db <- ((n-1)^2 *(sum_dx2 + sum_dy2 + 2 * sum_dx_dy)
                       - sum_squares - sum_products) /
                      (n * (n - 1) * (n - 2) * (n - 3))

  min_value = (1 - db_bar^2) / (n^2 - 1)
  if (is.na(var_db) || var_db < min_value) {
    var_db <- min_value
  }
  sigma_db <- sqrt(var_db)

  sum_x <- rowSums(d_matrix)
  sum_y <- colSums(d_matrix)
  cov_db_dw = (cov(dii, dx) + cov(dii, dy)) / n  # (6.5)
  var_db_dw <- var_dw + var_db + 2 * cov_db_dw

  sigma_db_dw <- sqrt(var_db_dw)
  list(dw=dw_bar, sigma_dw=sigma_dw, z_dw=dw_bar/sigma_dw,
       db=db_bar, sigma_db=sigma_db, z_db=db_bar/sigma_db,
       db_dw=db_bar + dw_bar, sigma_db_dw=sigma_db_dw,
       z_db_dw=(db_bar + dw_bar) / sigma_db_dw,
       cov_db_dw=cov_db_dw)
}

#' Compute the sum in the covariance of db+dw
#'
#' @param d_matrix d-matrix of dominances
#' @return the sum for the covariance term
Cliff_dependent_compute_cov_from_d <- function(d_matrix) {
  sum_x <- rowSums(d_matrix)
  sum_y <- colSums(d_matrix)
  dii <- sign(diag(d_matrix))
  sum((sum_x + sum_y) * dii)
}


#' Computes Cliff's dependent d-statistics based on cell frequencies.
#'
#' Computes d-matrix and then analyzes it. This can be time consuming.  Try
#' Cliff_dependent_from_table() instead.
#' The current function is provided mainly for comparison & validation.
#' For an example, compare running this function on vision_data to running
#' Cliff_dependent_from_table(vision_data).
#'
#' dw - within-subjects d-statistic
#' db - between-subjects d-statistic
#' db_dw - sum of dw and db, omnibus test of whether one group is higher than the other
#' Cliff, N. (1993). Dominance statistics: Ordinal analyses to answer ordinal questions.
#' Psychological Bulletin, 114(3), 494-509.
#' Cliff, N. (1996). Ordinal methods for behavioral data analysis.  Mawhaw NJ: Lawerence-Erlbaum.
#' @param cells r x r matrix of frequencies
#' @returns a list containing
#'    dw: within-subjects d-statistic
#'    sigma_dw: SE of dw
#'    z_dw: z-score for dw
#'    db: between-subjects d-statistic
#'    sigma_db: SE of db
#'    z_db: z-score for db
#'    db_dw: sum db + dw, omnibus measure
#'    sigma_db_dw: SE of db + dw
#'    z_db_dw: z-score of db _ dw
#'    cov_db_dw: covariance between db and dw
#' @export
#' @examples
#' Cliff_dependent_compute_paired_d(movies)
#' @seealso [Cliff_dependent_compute_from_table()]
Cliff_dependent_compute_paired_d <- function(cells) {
  n_cats <- nrow(cells)
  if (n_cats != ncol(cells)) {
    stop(paste("matrix must be square, nrows =", n_cats, ", ncols =", ncol(cells)))
  }
  scores <- 1:n_cats
  d_matrix <- Cliff_as_d_matrix(scores, cells)
  result <- Cliff_dependent_compute_from_matrix(d_matrix)
  result
}


#' Computes Cliff's dependent d-statistics based on a table of frequency counts.
#'
#' Takes the r X r table and returns:
#' dw - within-subjects d-statistic
#' db - between-subjects d-statistic
#' db_dw - sum of dw and db, omnibus test of whether one group is higher than the other
#' No intermediate dominance matrix is computed, so this is much faster than
#' Cliff_dependent_compute_from_matrix().
#' Large number of terms are needed to compute intermediate d_ij_ji.  These are contained
#' in separate functions for r <= 6.  Results for r [7, 10] are available, but the files are
#' so large that they cause an error if included in the library.
#'
#' See:
#' Cliff, N. (1993). Dominance statistics: Ordinal analyses to answer ordinal questions.
#' Psychological Bulletin, 114(3), 494-509.
#' Cliff, N. (1996). Ordinal methods for behavioral data analysis. Mawhaw NJ: Lawerence-Erlbaum.
#' @importFrom stats weighted.mean
#' @param mij an r x r table of paired observations
#' @returns a list containing
#'    dw: within-subjects d-statistic
#'    sigma_dw: SE of dw
#'    z_dw: z-score for dw
#'    db: between-susbjects d-statistic
#'    sigma_db: SE of db
#'    z_db: z-score for db
#'    db_dw: sum db + dw, omnibus measure
#'    sigma_db_dw: SE of db + dw
#'    z_db_dw: z-score of db _ dw
#'    cov_db_dw: covariance between db and dw
#' @export
#' @examples
#' Cliff_dependent_compute_from_table(movies)
#' @seealso [Cliff_dependent_compute_paired_d()]
Cliff_dependent_compute_from_table <- function(mij) {
  if (nrow(mij) != ncol(mij)) {
    stop(paste("table must be square, nrows =", nrow(mij), ", ncols =", ncol(mij)))
  }
  n <- sum(mij)
  n_cats <- nrow(mij)
  n_col <- ncol(mij)
  scores <- 1:n_cats
  x <- rep(scores, each=n_cats)
  y <- rep(scores, n_cats)

  # mij <- matrix(unlist(cell_counts), nrow=n_cats, byrow=TRUE)
  w.x <- rowSums(mij)
  w.y <- colSums(mij)
  wd <- Cliff_weighted_d_matrix(x=scores, y=scores, w.x=w.x, w.y=w.y)


  d_ii <- sign(x - y)
  weights <- vector("double", n_cats * n_cats)
  index <- 1
  for (i in 1:n_cats) {
    weights[index:(index + n_cats - 1)] <- mij[i,]
    index <- index + n_cats
  }
  dw_bar <- weighted.mean(d_ii, weights / n)
  sigma_dw <- sqrt(weighted_var(d_ii, as.vector(mij)) / n)
  db_bar <- (sum(wd) - dw_bar * n) / (n * (n - 1))

  d.x <- rowSums(wd) / n / w.x
  d.y <- colSums(wd) / n / w.y
  var_dx <- weighted_var(d.x, w.x)
  var_dy <- weighted_var(d.y, w.y)

  sum_dx2 <- sum(w.x * (d.x - db_bar)^2)
  sum_dy2 <- sum(w.y * (d.y - db_bar)^2)
  sum_dxdy <- 0.0
  d_ij2 <- 0.0
  d_ij_ji <- 0
  vec <- vector("numeric", 0)

  w_ij_star = vector("numeric", 0)
  d_ij_star = vector("numeric", 0)
  d_ij_ji <- 0
  for (r in 1:n_cats) {
    for (c in 1:n_cats) {
      sum_dxdy <- sum_dxdy + mij[r, c] * (d.x[r] - db_bar) * (d.y[c] - db_bar)
      d_ij2 <- d_ij2 + w.x[r] * w.y[c] * (sign(r - c) - db_bar)^2
     }
  }

  if (n_cats == 2) {
    counts <- Cliff_counts_2(mij)
  } else if (n_cats == 3) {
    counts <- Cliff_counts_3(mij)
  } else if (n_cats == 4) {
    counts <- Cliff_counts_4(mij)
  } else if (n_cats == 5) {
    counts <- Cliff_counts_5(mij)
  } else if (n_cats == 6) {
    counts <- Cliff_counts_6(mij)
  # } else if (n_cats == 7) {
  #   counts <- Cliff_counts_7()
  # } else if (n_cats == 8) {
  #   counts <- Cliff_counts_8()
  # } else if (n_cats == 9) {
  #   counts <- Cliff_counts_9()
  # } else if (n_cats == 10) {
  #   counts <- Cliff_counts_10()
  } else {
    stop(paste("More than 6 levels not currently supported due to devtools throwing error",
        "due to size of files.  Use Cliff_dependent_compute_paired_d().",
        "This will run longer than necessary, but we live in an imperfect world."))
  }

  wm1m1 <- counts$wm1m1
  wm10 <- counts$wm10
  wm11 <- counts$wm11
  w00 <- counts$w00
  w01 <- counts$w01
  w11 <- counts$w11

  # print(paste(w00, w11, wm10, w01, wm11, wm1m1))

  d_ij_ji <- w11 * (1 - db_bar) ^2
  d_ij_ji <- d_ij_ji + wm1m1 * (-1 - db_bar)^2
  d_ij_ji <- d_ij_ji + w00 * (0 - db_bar)^2
  d_ij_ji <- d_ij_ji + wm11 * (-1 - db_bar) * (1 - db_bar)
  d_ij_ji <- d_ij_ji + wm10 * (-1 - db_bar) * (0 - db_bar)
  d_ij_ji <- d_ij_ji + w01 * (1 - db_bar) * (0 - db_bar)

  var_db = ((n - 1)^2 * (sum_dx2 + sum_dy2 + 2 * sum_dxdy) -
    d_ij2 - d_ij_ji) / (n * (n - 1) * (n - 2) * (n - 3))

  min_value <- (1.0 - db_bar^2) / (n^2 - 1)
  if (is.na(var_db) || var_db < min_value) {
    var_db <- min_value
  }
  sigma_db <- sqrt(var_db)

  wd <- Cliff_weighted_d_matrix(x, y, weights, weights)
  cov <- Cliff_dependent_compute_cov(wd)
  cov_dw_db <- (cov  - 2 * n * (n - 1) * db_bar * dw_bar) / (n * (n - 1) * (n - 2))
  sigma_dw_db <- sqrt(sigma_dw^2 + sigma_db^2 + 2 * cov_dw_db)

  list(dw=dw_bar, sigma_dw=sigma_dw, z_dw=dw_bar / sigma_dw,
       db=db_bar, sigma_db=sigma_db, z_db=db_bar / sigma_db,
       db_dw=dw_bar + db_bar, sigma_db_dw=sigma_dw_db, z_db_dw=(dw_bar + db_bar) / sigma_dw_db,
       cov_db_dw=cov_dw_db)
}

#' Computes sum term in covariance db-dw for weighted dominance matrix.
#'
#' @param wd weighted dominance matrix
Cliff_dependent_compute_cov <- function(wd) {
  d.x <- rowSums(wd)
  d.y <- colSums(wd)
  dii <- sign(diag(wd))
  sum((d.x + d.y) * dii)
}


#' Computes between groups dominance matrix "d".
#'
#' @param x first vector of scores
#' @param y second vector of scores
#' @returns N X N dominance matrix
#' @export
Cliff_compute_d <- function(x, y) {
  n <- length(x)
  m <- length(y)
  d <- matrix(nrow = n, ncol = m)
  for (i in 1:n) {
    for (j in 1:m) {
      d[i, j] = sign(x[i] - y[j])
    }
  }
  d
}


#' Computes weighted version of dominance matrix "d"
#'
#' Arguments are scores and associated weights. Not useful for tables.
#' Use Cliff_compute_d_matrix instead.
#' @param x first vector of scores
#' @param y second vector of scores
#' @param w.x first vector of weights, to apply to x. Defaults to vector of 1.0
#' @param w.y second vector of weights, to apply to y. Defaults to vector of 1.0
#' @returns an n X m d-matrix, where n is length(x) and m is length(y)
#' @export
Cliff_weighted_d_matrix <- function(x, y, w.x=rep(1, length(x)),
                              w.y=rep(1, length(y))) {
  n <- length(x)  # n_cats
  m <- length(y)  # n_cats

  # initialize storage
  d <- matrix(nrow = n, ncol = m)
  diag <- vector("double", n)
  wd <- matrix(nrow = n, ncol = m)

  for (i in 1:n) {
    for (j in 1:m) {
      d[i, j] = sign(x[i] - y[j])
      wd[i, j] = w.x[i] * w.y[j] * d[i, j]
    }
  }
  wd
}
