#' Create a biodiversity profile of the retrieved data
#'
#' @description
#' This function creates a biodiversity profile of the downloaded data using [bold.fetch()].
#'
#' @param bold_df A data frame obtained from [bold.fetch()].
#' @param taxon_rank A single character string specifying the taxonomic hierarchical rank. Needs to be provided by default.
#' @param taxon_name A single or multiple character vector specifying the taxonomic names associated with the ‘taxon_rank’. Default value is NULL.
#' @param site_type A character string specifying one of two broad categories of `sites` (`locations` or `grids`). Needs to be provided by default.
#' @param location_type A single character vector specifying the geographic category if `locations` is selected as the `site_type` and for which a community matrix should be created. Default value is NULL.
#' @param gridsize A numeric value of the size of the grid if `grids` is selected as the `site_type`. Size is in sq.m. Default value is NULL.
#' @param presence_absence A logical value specifying whether the generated matrix should be converted into a ’presence-absence’ matrix. Default value is FALSE.
#' @param diversity_profile A character string specifying the type of diversity profile ("richness","preston","shannon","beta","all"). Needs to be provided by default.
#' @param beta_index A character vector specifying the type of beta diversity index (’jaccard’ or ’sorensen’ available) if `beta` or `all` `diversity_profile` selected. Default value is NULL.
#'
#' @details `bold.analyze.diversity` estimates the richness, Shannon diversity and beta diversity from the BIN counts or presence-absence data. Internally, the function converts the downloaded BCDM data into a community matrix (site X species) which is also provided as a part of the output. `taxon_rank` refers to a specific taxonomic rank (Ex. class, order, family etc or even BINs) and the `taxon_name` to one or more names of organisms in that specific rank. `taxon_rank` cannot be NULL while all the data will be used if `taxon_name` = `NULL` for a specified `taxon_rank`. The `site_type`=`locations` followed by providing a `location_type` refers to any geographic field (country.ocean,province.state etc.; for more information check the `bold.fields.info()` function help). `site_type`=`grids` generates grids based on BIN occurrence data (latitude, longitude) with grid size determined by the user in square meters using the `gridsize` argument. `site_type`=`grids` converts the Coordinate Reference System (CRS) of the data to a ‘Mollweide’ projection by which distance-based grid can be correctly specified (Gott III et al. 2007).Each grid is assigned a cell id, with the lowest number given to the lowest latitudinal point in the dataset. Rows lacking latitude and longitude data (NULL values) are removed when `site_type`=`grids`. Conversely, NULL entries are permitted when `site_type`=`locations`, even if latitude and longitude values are missing. This distinction exists because grids rely on bounding boxes, which require latitude and longitude values. This filtering could impact the richness values and other analyses, as all records for the selected `taxon_rank` that contain `location` information but lack latitude and longitude will be excluded if `site_type`=`grids`. This means that the same dataset could yield different results depending on the chosen `site_type`. `location_type` has to be specified when `site_type`=`locations` to avoid errors. The community matrix generated based on the sites/grids is then used to create richness profiles using `BAT::alpha.accum()` and Preston and Shannon diversity analyses using `vegan::prestondistr()` and `vegan::diversity()` respectively. The `BAT::alpha.accum()` currently offers various richness estimators, including Observed diversity (Obs); Singletons (S1); Doubletons (S2); Uniques (Q1); Duplicates (Q2); Jackknife1 abundance (Jack1ab); Jackknife1 incidence (Jack1in); Jackknife2 abundance (Jack2ab); Jackknife2 incidence (Jack2in); Chao1 and Chao2. The results depend on the input data (true abundances vs counts vs incidences) and users should be careful in the subsequent interpretation. Preston plots are generated using the data from the `prestondistr` results in `ggplot2` featuring cyan bars for observed species (or equivalent taxonomic group) and orange dots for expected counts. Beta diversity values are calculated using `BAT::beta()` function, which partitions the data using the Podani & Schmera (2011)/Carvalho et al. (2012) approach. These results are stored as distance matrices in the output.
#'
#' \emph{Note on the community matrix}: Each cell in this matrix contains the counts (or abundances) of the specimens whose sequences have an assigned BIN, in a given `site_type` (`locations` or `grids`). These counts can be generated at any taxonomic hierarchical level, applicable to one or multiple taxa including `bin_uri`. The `presence_absence` argument converts these counts (or abundances) to 1s and 0s.
#'
#' \emph{Important Note}: Results, including counts, adapt based on `taxon_rank` argument.
#'
#' @returns An 'output' list containing results based on the profile selected:
#'
#' #Common to all
#' *	comm.matrix = site X species like matrix required for the biodiversity results
#' #Common to all if `site_type`=`grids`
#' *	comm.matrix = site X species like matrix required for the biodiversity results
#'
#' #Based on the type of diversity profile
#' #1. richness
#' *	richness = A richness profile matrix
#' #2. shannon
#' *	Shannon_div = Shannon diversity values for the given sites/grids (from gen.comm.mat)
#' #3. preston
#' *	preston.res = a Preston plot numerical data output
#' *	preston.plot = a ggplot2 visualization of the preston.plot
#' #4. beta
#' *	total.beta = beta.total
#' *	replace = beta.replace (replacement)
#' *	richnessd = beta.richnessd (richness difference)
#' #5. all
#' *  All of the above results
#'
#' @references
#' Carvalho, J.C., Cardoso, P. & Gomes, P. (2012) Determining the relative roles of species replace- ment and species richness differences in generating beta-diversity patterns. Global Ecology and Biogeography, 21, 760-771.
#'
#' Podani, J. & Schmera, D. (2011) A new conceptual and methodological framework for exploring and explaining pattern in presence-absence data. Oikos, 120, 1625-1638.
#'
#' Richard Gott III, J., Mugnolo, C., & Colley, W. N. (2007). Map projections minimizing distance errors. Cartographica: The International Journal for Geographic Information and Geovisualization, 42(3), 219-234.
#'
#' @examples
#' \dontrun{
#' # Search for ids
#' comm.mat.data <- bold.public.search(taxonomy = list("Poecilia"))
#'
#' # Fetch the data using the ids.
#' #1. api_key must be obtained from BOLD support before using `bold.fetch()` function.
#' #2. Use the `bold.apikey()` function  to set the apikey in the global env.
#'
#' bold.apikey('apikey')
#'
#' BCDMdata <- bold.fetch(get_by = "processid",
#'                        identifiers = comm.mat.data$processid)
#'
#' # Remove rows which have no species data
#' BCDMdata <- BCDMdata[!BCDMdata$species== "",]
#'
#' #1. Analyze richness data
#' res.rich <- bold.analyze.diversity(bold_df=BCDMdata,
#'                                    taxon_rank = "species",
#'                                    site_type = "locations",
#'                                    location_type = 'country.ocean',
#'                                    diversity_profile = "richness")
#'
#' # Community matrix (BCDM data converted to community matrix)
#' res.rich$comm.matrix
#'
#' # richness results
#' res.rich$richness
#'
#' #2. Shannon diversity (based on grids)
#' res.shannon <- bold.analyze.diversity(bold_df=BCDMdata,
#'                                       taxon_rank = "species",
#'                                       site_type = "grids",
#'                                       gridsize = 1000000,
#'                                       diversity_profile = "shannon")
#'
#' # Shannon diversity results
#' res.shannon$shannon_div
#'
#' # Grid data (sf)
#' res.shannon$grids.data
#'
#' # grid map
#' res.shannon$grid.map
#'
#' #3. Preston plots and results
#' pres.res <- bold.analyze.diversity(bold_df=BCDMdata,
#'                                    taxon_rank = "species",
#'                                    site_type = "locations",
#'                                    location_type = 'country.ocean',
#'                                    diversity_profile = "preston")
#'
#' # Preston plot
#' pres.res$preston.plot
#'
#' # Preston plot data
#' pres.res$preston.res
#'
#' #4. beta diversity
#' beta.res <- bold.analyze.diversity(bold_df=BCDMdata,
#'                                    taxon_rank = "species",
#'                                    site_type = "locations",
#'                                    location_type = 'country.ocean',
#'                                    diversity_profile = "beta",
#'                                    beta_index = "jaccard")
#'
#' #Total diversity
#' beta.res$total.beta
#'
#' #Replacement
#' beta.res$replace
#'
#' #Richness difference
#' beta.res$richnessd
#'
#' #5. All profiles
#' all.diversity.res<-bold.analyze.diversity(bold_df=BCDMdata,
#'                                           taxon_rank = "species",
#'                                           site_type = "locations",
#'                                           location_type = 'country.ocean',
#'                                           diversity_profile = "all",
#'                                           beta_index = "jaccard")
#' #Explore all results
#' all.diversity.res
#'}
#'
#' @importFrom BAT alpha.accum
#' @importFrom vegan diversity
#' @importFrom vegan prestondistr
#' @importFrom ggplot2 geom_line
#' @importFrom ggplot2 theme_classic
#' @importFrom ggplot2 sym
#' @importFrom ggplot2 geom_bar
#' @importFrom ggplot2 scale_y_continuous
#' @importFrom ggplot2 element_blank
#' @importFrom stats fitted
#' @importFrom BAT beta
#'
#' @export
#'
bold.analyze.diversity <- function(bold_df,
                                   taxon_rank,
                                   taxon_name=NULL,
                                   site_type = c("locations","grids"),
                                   location_type=NULL,
                                   gridsize=NULL,
                                   presence_absence=FALSE,
                                   diversity_profile = c("richness","preston","shannon","beta","all"),
                                   beta_index=NULL)

{

  # Check if data is a non empty data frame object

 df_checks(bold_df)

  # Check if taxon_rank is empty

  if(is.null(taxon_rank)) stop ("Taxon rank cannot be left empty.")

  # Empty output list

  output = list()

  # Condition to check if grids.cat is specified or whether site.cat will be used

  switch(site_type,

         "locations"=

           {
             if(!is.null(gridsize)) stop("Grid size must only be specified when site_type='grids'")

             bin.comm.res = gen.comm.mat(bold.df=bold_df,
                                         taxon.rank=taxon_rank,
                                         taxon.name=taxon_name,
                                         site.cat=location_type)

             bin.comm = bin.comm.res$comm.matrix

           },

         "grids"=

           {
             if(is.null(gridsize)) stop("When site_type='grids',gridsize must be specified.")

             bin.comm.res = gen.comm.mat(bold.df=bold_df,
                                         taxon.rank=taxon_rank,
                                         taxon.name=taxon_name,
                                         site.cat=NULL,
                                         grids=TRUE,
                                         gridsize=gridsize,
                                         view.grids=TRUE)

             bin.comm = bin.comm.res$comm.matrix

             grids.map=bin.comm.res$grid_plot

             grids.data= bin.comm.res$grids

             output$grids.data=grids.data

             output$grid.map=grids.map
             }
  )

  # Check if the data is presence-absence

  if(presence_absence)

  {
    bin.comm=ifelse(bin.comm>=1,1,0)%>%data.frame(.)
  }

  if (all(bin.comm==0|bin.comm==1)) warning("Data is presence absence data. Preston and/or Shannon results if calculated, are based on the assumption that the community data has counts.")

  # Output the community data

  output$comm.matrix = bin.comm

  # Diversity results based on the profile selection

  switch(diversity_profile,

         "richness"=
           {

             # species richness estimation

             richness_res= richness_profile(df=bin.comm)

             output$richness = richness_res

           },

         "preston"=

           {

             tryCatch({

               preston_results<-preston_profile(df=bin.comm,
                                                y_label = taxon_rank)

               output$preston.plot = preston_results$preston.plot

               output$preston.res = preston_results$preston.res

             },
             error = function (e)
             {
               message("The following error is for the preston results due to an issue with the input data. Please re-check if the values used are abundances or presence-absences: ",e$message)
             }
             )
           },

         "shannon"=

           {
             # Shannon diversity

             shannon_results=shannon_div_profile(df=bin.comm)

             output$shannon_div = round(shannon_results,2)

           },

         "beta"=

           {

             beta_div_results=beta_div_profile(df=bin.comm,
                                               beta.index=beta_index,
                                               pre_abs=presence_absence)

             # Add results to output

             output$total.beta = beta_div_results$total.beta

             output$replace = beta_div_results$replace

             output$richnessd = beta_div_results$richnessd
           },

         "all"=

           {

             richness_res = richness_profile(df=bin.comm)


             preston_results = preston_profile(df=bin.comm,
                                              y_label = taxon_rank)

             shannon_results = shannon_div_profile(df=bin.comm)

             beta_div_results = beta_div_profile(df=bin.comm,
                                               beta.index=beta_index,
                                               pre_abs=presence_absence)

             output$richness = richness_res

             output$preston.plot = preston_results$preston.plot

             output$preston.res = preston_results$preston.res

             output$shannon_div = round(shannon_results,2)

             output$total.beta = beta_div_results$total.beta

             output$replace = beta_div_results$replace

             output$richnessd = beta_div_results$richnessd

           }
         )

  invisible(output)

}
