#=====================================================================
#=====================================================================

#' @title Fortran wrapper for pedigree reconstruction
#'
#' @description Call main Fortran part of sequoia, and convert its output to a
#'   list with dataframes.
#'
#' @param ParSib either "par" to call parentage assignment, or "sib" to call the
#'   rest of the algorithm.
#' @param Specs a named vector with parameter values, as generated by
#'   \code{\link{SeqPrep}}.
#' @param ErrM  3x3 matrix with genotyping error probabilities; rows=actual,
#'   columns=observed
#' @param GenoM matrix with genotype data, size nInd x nSnp
#' @param LhIN  life history data: ID - sex - birth year
#' @param AgePriors matrix with agepriors, size Specs["nAgeClasses"] by 8.
#' @param Parents  matrix with rownumbers of assigned parents, size nInd by 2
#' @param quiet suppress messages
#'
#' @return A list with
#' \item{PedigreePar or Pedigree}{the pedigree}
#' \item{MaybeParent or MaybeRel}{Non-assigned likely relatives}
#' \item{MaybeTrio}{Non-assigned parent-parent-offspring trios}
#' \item{DummyIDs}{Info on dummies}
#' \item{TotLikParents or TotLikSib}{Total log-likelihood per iteration}
#'
#' For a detailed description of the output see \code{\link{sequoia}}
#'
#' @useDynLib sequoia, .registration = TRUE
#'
#' @importFrom plyr dlply
#'
#' @keywords internal

SeqParSib <- function(ParSib = "par",
                      Specs = NULL,
                      ErrM = NULL,
                      GenoM = NULL,
                      LhIN = NULL,
                      AgePriors = NULL,
                      Parents = NULL,
                      quiet = FALSE)
{
  on.exit(.Fortran(deallocall), add=TRUE) #, PACKAGE = "sequoia"))

  Ng <- FacToNum(Specs[,"NumberIndivGenotyped"])
  gID <- rownames(GenoM)
  GenoV <- as.integer(GenoM)
  LHF <- orderLH(LhIN[LhIN$Sex %in% c(1:3), ], gID)
  DumPfx <- Specs[,c("DummyPrefixFemale", "DummyPrefixMale")]
  if (!is.null(Parents)) {
    if (Specs[["Complexity"]] == "herm") {
      Parents <- herm_clone_Ped(Ped = Parents, LH = LhIN[,1:3], herm.suf=c("f", "m"))
      Parents <- Parents[match(rownames(GenoM),Parents[,1]), ]
    }
    PedPar <- IDToNum(Parents, gID, DumPrefix=paste0(DumPfx,"0"))[, 2:3]
    PedPar <- c(as.matrix(PedPar))
    if (length(PedPar) != Ng*2) stop("'PedPar' wrong length")
  } else {
    PedPar <- rep(0, Ng*2)
  }
  Complx <- switch(Specs[,"Complexity"], full = 2, simp = 1, mono = 0, herm = 4)
  UAge <- switch(Specs[,"UseAge"], extra = 2, yes = 1, no = 0)
  PrSb <- switch(ParSib, par = 1, sib = 2)   # dup = 0,

  SpecsIntGlb <- c(nSnp = FacToNum(Specs[,"NumberSnps"]),           # 1
                   MaxMisDUP = FacToNum(Specs[,"MaxMismatchDUP"]),  # 2
                   MaxMisOH = FacToNum(Specs[,"MaxMismatchOH"]),    # 3
                   MaxMisME = FacToNum(Specs[,"MaxMismatchME"]),    # 4
                   SMax = as.integer(FacToNum(Specs[,"MaxSibshipSize"])),   # 5
                   Complx = as.integer(Complx),                 # 6
                   quiet = as.integer(quiet),                   # 7
                   nAgeCl = FacToNum(Specs[,"nAgeClasses"]))    # 8

  SpecsIntMkPed <- c(ParSib = as.integer(PrSb),                   # 1
                     MaxSibIter = FacToNum(Specs[,"MaxSibIter"]), # 2
                     AgeEffect = as.integer(UAge),                # 3
                     CalcLLR = as.integer(as.logical(Specs[,"CalcLLR"])))  # 4

  SpecsDbl <- c(TF = FacToNum(Specs[,"Tfilter"]),
                TA = FacToNum(Specs[,"Tassign"]))

  if (is.null(AgePriors) & UAge==0) {
    AP <- MakeAgePrior(MaxAgeParent = Specs[,"nAgeClasses"] -1, Plot=FALSE, quiet=TRUE)
  } else if (length(as.double(AgePriors)) < 5*FacToNum(Specs[,"nAgeClasses"])) {
    stop("'AgePriors' matrix should have (at least) size nAgeClasses * 5")
  } else {
    if (all(c("M", "P", "FS", "MS", "PS") %in% colnames(AgePriors))) {
      AP <- AgePriors[, c("M", "P", "FS", "MS", "PS")]
    } else {
      stop("'AgePriors' matrix should have at least columns M-P-FS-MS-PS")
    }
  }
  MaxMaxAgePO <- 100  # if changed, change in fortran too!

  TMP <- .Fortran(makeped,   # Error: object 'makeped_' not found
                  ng = as.integer(Ng),
                  specsintglb = as.integer(SpecsIntGlb),
                  specsintmkped = as.integer(SpecsIntMkPed),
                  specsdbl = as.double(SpecsDbl),
                  errv = as.double(ErrM),
                  genofr = as.integer(GenoV),
                  sexrf = as.integer(LHF$Sex),
                  byrf = as.integer(c(LHF$BirthYear, LHF$BY.min, LHF$BY.max)),
                  aprf = as.double(AP),

                  parentsrf = as.integer(PedPar),
                  lrrf = double(3*Ng),
                  ohrf = integer(3*Ng),
                  nd = integer(2),
                  dumparrf = integer(2*Ng),
                  dumlrrf = double(3*Ng),
                  dumbyrf = integer(3*Ng),
                  totll = double(42),
									apout = double((3*MaxMaxAgePO)*5*3))
#                  PACKAGE = "sequoia")

  TMP$lrrf[abs(TMP$lrrf - 999) < 0.1] <- NA
  TMP$dumlrrf[abs(TMP$dumlrrf - 999) < 0.1] <- NA
  TMP$lrrf <- round(TMP$lrrf, 2)
  TMP$dumlrrf <- round(TMP$dumlrrf, 2)
  TMP$ohrf[TMP$ohrf < 0] <- NA

  #=========================
  # pedigree
	dID <- cbind(paste0(DumPfx[1], formatC(1:min(Ng,9999), width=4, flag=0)),
							 paste0(DumPfx[2], formatC(1:min(Ng,9999), width=4, flag=0)))

  Pedigree <- data.frame(id = gID,
                         VtoM(TMP$parentsrf),
                         VtoM(TMP$lrrf, nc=3),
                         VtoM(TMP$ohrf, nc=3),
                         stringsAsFactors=FALSE)
  names(Pedigree) <- c("id", "dam", "sire", "LLRdam", "LLRsire", "LLRpair",
                       "OHdam", "OHsire", "MEpair")
  for (k in 1:2) Pedigree[, k+1] <- NumToID(Pedigree[, k+1], k, gID, dID)

  if (any(LhIN$Sex==4)) {  # hermaphrodites
    Pedigree <- herm_unclone_Ped(Pedigree, LH=LhIN, herm.suf=c("f", "m"))
  }

  if (quiet<1) {
    if (grepl("par", ParSib)) {
      message("assigned ", sum(!is.na(Pedigree$dam)), " dams and ",
           sum(!is.na(Pedigree$sire)), " sires to ", nrow(Pedigree), " individuals")
    } else {
     message("assigned ", sum(!is.na(Pedigree$dam)), " dams and ",
           sum(!is.na(Pedigree$sire)), " sires to ", Ng, " + ", sum(TMP$nd),
           " individuals (real + dummy)")
    }
  }

  #=========================
  # dummies
  if (grepl("sib", ParSib) && any(TMP$nd>0)) {
    nd <- TMP$nd
    NgOdd <- Ng%%2==1
		DumPfx <- paste0(DumPfx, "0")
		DPnc <- nchar(DumPfx)[1]
    NumOff <- with(Pedigree, list("mat" = table(dam[which(substr(dam,1,DPnc) == DumPfx[1])]),
                                  "pat" = table(sire[substr(sire,1,DPnc) == DumPfx[2]])))
    MaxOff <- max(unlist(NumOff))
    OffIDs <- c(dlply(Pedigree, "dam", function(df) df$id),
                dlply(Pedigree, "sire", function(df) df$id))
    OffIDs <- OffIDs[c(names(NumOff[[1]]), names(NumOff[[2]]))]

    DummyIDs <- data.frame(id=c(dID[s(nd[1]), 1], dID[s(nd[2]), 2]),
                           VtoM(TMP$dumparrf, sum(nd), 2, NgOdd),
                           VtoM(TMP$dumlrrf, sum(nd), 3, NgOdd),
                           sex=rep(1:2, nd),
                           VtoM(TMP$dumbyrf, sum(nd),3, NgOdd),
                           unlist(NumOff),
                           stringsAsFactors=FALSE)
    names(DummyIDs) <-  c("id", "dam", "sire", "LLRdam", "LLRsire", "LLRpair",
                          "Sex", "BY.est", "BY.lo", "BY.hi", "NumOff")
    DummyIDs <- cbind(DummyIDs,
                      setNames(t(sapply(OffIDs, "[", i=1:MaxOff)),
                               paste0("O", 1:MaxOff)))
    for (k in 1:2) DummyIDs[, k+1] <- NumToID(DummyIDs[, k+1], k, gID, dID)

    Pedigree <- rbind(Pedigree,
											cbind(DummyIDs[, 1:6],
														OHdam = NA, OHsire = NA, MEpair = NA) )
  } else  DummyIDs <- NULL

	#=========================
	# returned ageprior w columns for GP + avuncular

	if (grepl("par", ParSib) | grepl("sib", ParSib)) {
		APM <- VtoM(TMP$apout, nc=15)
		colnames(APM) <- c("M", "P", "FS", "MS", "PS",
											 "MGM", "MGF", "MFA", "MMA", "MPA",
											 "PGM", "PGF", "PFA", "PMA", "PPA")
		MaxAgePO <- ifelse(any(APM[,"FS"]==0),
											 mean(which(APM[,"FS"]>0)) -1,  # sibs are always symmetrical around 0
											 min(which(APM[,"M"]>0)) -2)  # may be flat, tail may be cut off
		rownames(APM) <- c(1:nrow(APM)) -MaxAgePO -1
		MaxRow <- min(max(which(apply(APM, 1, function(x) any(x>0)))) +1, nrow(APM))
		APM <- APM[1:MaxRow, ]
	}

	#=========================
	# update lifehist w. inferred sex & estimated birth year

	LhOUT <- data.frame(LHF,
											Sexx = TMP$sexrf,
											BY.est = TMP$byrf[1:Ng],
											BY.lo = TMP$byrf[1:Ng + Ng],
											BY.hi = TMP$byrf[1:Ng + 2*Ng],
											stringsAsFactors = FALSE)
  LhOUT$BY.est[LhOUT$BY.est < 0] <- NA
  names(LhOUT)[names(LhOUT)=="ID"] <- "id"

  #=========================
  # output
  rownames(Pedigree) <- 1:nrow(Pedigree)

  if (grepl("par", ParSib)) {
    OUT <- list(PedigreePar = Pedigree,
                TotLikPar = TMP$totll[s(sum(TMP$totll!=0))],
                AgePriorExtra = APM,
								LifeHistPar = LhOUT)

  } else if (grepl("sib", ParSib)) {
    OUT <- list(Pedigree = Pedigree,
                DummyIDs = DummyIDs,
                TotLikSib = TMP$totll[s(sum(TMP$totll!=0))],
                AgePriorExtra = APM,
								LifeHistSib = LhOUT)
  }
  return(OUT[!sapply(OUT, is.null)])
}



#============================================================================
#============================================================================
#' @title Check data for duplicates.
#'
#' @description Check the genotype and life history data for duplicate IDs (not
#'   permitted) and duplicated genotypes (not advised), and count how many
#'   individuals in the genotype data are not included in the life history data
#'   (permitted). The order of IDs in the genotype and life history data is not
#'   required to be identical.
#'
#' @param GenoM matrix with genotype data, size nInd x nSnp
#' @param Specs The 1-row dataframe with parameter values
#' @param ErrM  3x3 matrix with genotyping error probabilities; rows=actual,
#'   columns=observed
#' @param LhIN  life history data
#' @param quiet suppress messages
#'
#' @return A list with one or more of the following elements:
#' \item{DupGenoID}{Dataframe, row numbers of duplicated IDs in genotype data.
#'   Please do remove or relabel these to avoid downstream confusion.}
#' \item{DupGenotype}{Dataframe, duplicated genotypes (with or without
#'   identical IDs). The specified number of maximum mismatches is allowed,
#'   and this dataframe may include pairs of closely related individuals.
#'   Mismatch = number of SNPs at which genotypes differ, LLR = likelihood
#'   ratio between 'self' and most likely non-self.}
#' \item{DupLifeHistID}{Dataframe, row numbers of duplicated IDs in life
#'   history data}
#' \item{NoLH}{Vector, IDs (in genotype data) for which no life history data is
#' provided}
#'
#' @useDynLib sequoia, .registration = TRUE
# @useDynLib sequoia duplicates
#'
#' @keywords internal

DuplicateCheck <- function(GenoM = NULL,
                           Specs = NULL,
													 ErrM = NULL,
                           LhIN = NULL,
                           quiet = FALSE)
{
	on.exit(.Fortran(deallocall), add=TRUE)
  gID <- rownames(GenoM)
  Ng <- nrow(GenoM)
  GenoV <- as.integer(GenoM)
	Complx <- switch(Specs[,"Complexity"], full = 2, simp = 1, mono = 0, herm = 4)  # affects alternatives

	SpecsInt <- c(nSnp = FacToNum(Specs[,"NumberSnps"]),           # 1
	              MaxMisDUP = FacToNum(Specs[,"MaxMismatchDUP"]),  # 2
	              MaxMisOH = FacToNum(Specs[,"MaxMismatchOH"]),    # 3
	              MaxMisME = FacToNum(Specs[,"MaxMismatchME"]),    # 4
	              SMax = as.integer(FacToNum(Specs[,"MaxSibshipSize"])),   # 5
	              Complx = as.integer(Complx),                 # 6
	              quiet = as.integer(quiet),                   # 7
	              nAgeCl = FacToNum(Specs[,"nAgeClasses"]))    # 8

	SpecsDbl <- c(TF = FacToNum(Specs[,"Tfilter"]),
	              TA = FacToNum(Specs[,"Tassign"]))

	AP <- MakeAgePrior(MaxAgeParent = Specs[,"nAgeClasses"] -1, Plot=FALSE, quiet=TRUE)

	DUP <- .Fortran(duplicates,
                  ng = as.integer(Ng),
                  specsint = as.integer(SpecsInt),
                  specsdbl = as.double(SpecsDbl),
									errv = as.double(ErrM),
                  genofr = as.integer(GenoV),

                  sexrf = as.integer(rep(3, Ng)),
                  byrf = as.integer(rep(-999, 3*Ng)),
                  aprf = as.double(AP),

									ndupgenos = as.integer(0),
                  dupgenos = integer(2*Ng),
                  nmismatch = integer(Ng),
									snpdboth = integer(Ng),
                  duplr = double(Ng))

	Duplicates <- list()
  if (any(duplicated(gID))) {
    r1 <- which(duplicated(gID))
    r2 <- which(duplicated(gID, fromLast=TRUE))
    Duplicates$DupGenoID <- data.frame(row1 = r1,
                                       row2 = r2,
                                       ID = gID[r1])
  }
  if(DUP$ndupgenos>0) {
    tmp <- VtoM(DUP$dupgenos, DUP$ndupgenos)
    Duplicates$DupGenotype <- data.frame(row1 = tmp[, 1],
                                         row2 = tmp[, 2],
                                         ID1 = gID[tmp[, 1]],
                                         ID2 = gID[tmp[, 2]],
                                         Mismatch = DUP$nmismatch[s(DUP$ndupgenos)],
																				 SnpdBoth = DUP$snpdboth[s(DUP$ndupgenos)],
                                         LLR = DUP$duplr[s(DUP$ndupgenos)])
    Duplicates$DupGenotype <- Duplicates$DupGenotype[order(Duplicates$DupGenotype$LLR,
                                                           decreasing=TRUE), ]
  }
  if (!is.null(LhIN)) {
    names(LhIN) <- c("ID", "Sex", "BirthYear")
    LhIN$ID <- as.character(LhIN$ID)
    if (any(duplicated(LhIN[,1]))) {
      r1 <- which(duplicated(LhIN[,1]))
      r2 <- which(duplicated(LhIN[,1], fromLast=TRUE))
      Duplicates$DupLifeHistID <- data.frame(row1 = r1,
                                             row2 = r2,
                                             ID = LhIN[r1, "ID"],
                                             Sex1 = LhIN[r1, "Sex"],
                                             Sex2 = LhIN[r2, "Sex"],
                                             BirthYear1 = LhIN[r1, "BirthYear"],
                                             BirthYear2 = LhIN[r2, "BirthYear"])
    }
    NoLH <- setdiff(gID, LhIN$ID)
    if (length(NoLH)>0) Duplicates$NoLH <- NoLH
  }

  # print warnings
  if (quiet <1) {
    if (any(duplicated(gID))) message("duplicate IDs found in genotype data, please remove to avoid confusion")
    if (DUP$ndupgenos>0 && DUP$ndupgenos > sum(duplicated(gID))) {
      message("There were ",DUP$ndupgenos, " likely duplicate genotypes found, consider removing")
    }
    if (any(duplicated(LhIN[,1]))) message("duplicate IDs found in lifehistory data, first entry will be used")
  }

  return( Duplicates )
}



#=====================================================================
#=====================================================================

#' @title Order lifehistory data
#'
#' @description Order lifehistory data to match order of IDs in genotype data,
#'   filling in gaps with missing values
#'
#' @param LH dataframe with lifehistory information:
#' \describe{
#'  \item{ID}{max. 30 characters long,}
#'  \item{Sex}{1 = females, 2 = males, other numbers = unknown,}
#'  \item{Birth Year}{(or hatching year) Use negative numbers to denote
#'  missing values.}
#'  \item{BY.min}{minimum birth year (optional)}
#'  \item{BY.max}{maximum birth year (optional)}}
#' @param gID character vector with IDs in genotype data, in order of occurrence
#'
#' @return a dataframe with the same 5 columns, but with individuals in exactly
#'   the same order as gID, including padding with 'empty' rows if an individual
#'   in gID was not in the input-LH. Missing values are recoded to 3 for the
#'   'Sex' column, and -999 for the birth year columns.
#'
#' @keywords internal

orderLH <- function(LH=NULL, gID=NULL) {
  if (!all(gID %in% LH$ID)) {
    LHX <- data.frame(ID = gID[!gID %in% LH$ID],
                      Sex = 3,
                      BirthYear = -999,
                      BY.min = -999,
                      BY.max = -999)
  }
  if (is.null(LH)) {
    LHF <- LHX
  } else {
    if (!all(gID %in% LH$ID)) {
      LH <- merge(LHX, LH, all = TRUE)
    }
    LHF <- LH[match(gID, LH$ID), ]
    # fill in gaps: some gID may not be in LH
    LHF$Sex[is.na(LHF$Sex)] <- 3
    for (y in c("BirthYear", "BY.min", "BY.max")) {
      LHF[is.na(LHF[,y]), y] <- -999
    }
  }
  return( LHF )
}
