% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/old_taxa--taxmap--parsers.R
\name{lookup_tax_data}
\alias{lookup_tax_data}
\title{Convert one or more data sets to taxmap}
\usage{
lookup_tax_data(
  tax_data,
  type,
  column = 1,
  datasets = list(),
  mappings = c(),
  database = "ncbi",
  include_tax_data = TRUE,
  use_database_ids = TRUE,
  ask = TRUE
)
}
\arguments{
\item{tax_data}{A table, list, or vector that contain sequence IDs, taxon
IDs, or taxon names.
* tables: The `column` option must be used to specify which column
contains the sequence IDs, taxon IDs, or taxon names.
* lists: There must be only one item per list entry unless the `column`
option is used to specify what item to use in each list entry.
* vectors: simply a vector of sequence IDs, taxon IDs, or taxon names.}

\item{type}{What type of information can be used to look up the
classifications. Takes one of the following values:
* `"seq_id"`: A database sequence ID with an associated classification
(e.g. NCBI accession numbers).
* `"taxon_id"`: A reference database taxon ID (e.g. a NCBI taxon ID)
* `"taxon_name"`: A single taxon name (e.g. "Homo sapiens" or "Primates")
* `"fuzzy_name"`: A single taxon name, but check for misspellings first.
Only use if you think there are misspellings. Using `"taxon_name"` is
faster.}

\item{column}{(`character` or `integer`) The name or index of the column that
contains information used to lookup classifications. This only applies when
a table or list is supplied to `tax_data`.}

\item{datasets}{Additional lists/vectors/tables that should be included in
the resulting `taxmap` object. The `mappings` option is use to specify how
these data sets relate to the `tax_data` and, by inference, what taxa apply
to each item.}

\item{mappings}{(named `character`) This defines how the taxonomic
information in `tax_data` applies to data in `datasets`. This option
should have the same number of inputs as `datasets`, with values
corresponding to each dataset. The names of the character vector specify
what information in `tax_data` is shared with info in each `dataset`, which
is specified by the corresponding values of the character vector. If there
are no shared variables, you can add `NA` as a placeholder, but you could
just leave that data out since it is not benefiting from being in the
taxmap object. The names/values can be one of the following:
* For tables, the names of columns can be used.
* `"{{index}}"` : This means to use the index of rows/items
* `"{{name}}"`  : This means to use row/item names.
* `"{{value}}"` : This means to use the values in vectors or lists. Lists
will be converted to vectors using [unlist()].}

\item{database}{(`character`) The name of a database to use to look up
 classifications. Options include "ncbi", "itis", "eol", "col", "tropicos",
and "nbn".}

\item{include_tax_data}{(`TRUE`/`FALSE`) Whether or not to include `tax_data`
as a dataset, like those in `datasets`.}

\item{use_database_ids}{(`TRUE`/`FALSE`) Whether or not to use downloaded
database taxon ids instead of arbitrary, automatically-generated taxon ids.}

\item{ask}{(`TRUE`/`FALSE`) Whether or not to prompt the user for input.
Currently, this would only happen when looking up the taxonomy of a taxon
name with multiple matches. If `FALSE`, taxa with multiple hits are treated
as if they do not exist in the database. This might change in the future if
we can find an elegant way of handling this.}
}
\description{
Looks up taxonomic data from NCBI sequence IDs, taxon IDs, or taxon names
that are present in a table, list, or vector. Also can incorporate additional
associated datasets.
}
\section{Failed Downloads}{
 If you have invalid inputs or a download fails for
  another reason, then there will be a "unknown" taxon ID as a placeholder
  and failed inputs will be assigned to this ID. You can remove these using
  [filter_taxa()] like so: `filter_taxa(result, taxon_ids != "unknown")`. Add
  `drop_obs = FALSE` if you want the input data, but want to remove the
  taxon.
}

\examples{
\dontrun{

  # Look up taxon names in vector from NCBI
  lookup_tax_data(c("homo sapiens", "felis catus", "Solanaceae"),
                  type = "taxon_name")

  # Look up taxon names in list from NCBI
  lookup_tax_data(list("homo sapiens", "felis catus", "Solanaceae"),
                  type = "taxon_name")

  # Look up taxon names in table from NCBI
  my_table <- data.frame(name = c("homo sapiens", "felis catus"),
                         decency = c("meh", "good"))
  lookup_tax_data(my_table, type = "taxon_name", column = "name")

  # Look up taxon names from NCBI with fuzzy matching
  lookup_tax_data(c("homo sapienss", "feles catus", "Solanacese"),
                  type = "fuzzy_name")

  # Look up taxon names from a different database
  lookup_tax_data(c("homo sapiens", "felis catus", "Solanaceae"),
                  type = "taxon_name", database = "ITIS")

  # Prevent asking questions for ambiguous taxon names
  lookup_tax_data(c("homo sapiens", "felis catus", "Solanaceae"),
                  type = "taxon_name", database = "ITIS", ask = FALSE)

  # Look up taxon IDs from NCBI
  lookup_tax_data(c("9689", "9694", "9643"), type = "taxon_id")

  # Look up sequence IDs from NCBI
  lookup_tax_data(c("AB548412", "FJ358423", "DQ334818"),
                  type = "seq_id")

  # Make up new taxon IDs instead of using the downloaded ones
  lookup_tax_data(c("AB548412", "FJ358423", "DQ334818"),
                  type = "seq_id", use_database_ids = FALSE)


  # --- Parsing multiple datasets at once (advanced) ---
  # The rest is one example for how to classify multiple datasets at once.

  # Make example data with taxonomic classifications
  species_data <- data.frame(tax = c("Mammalia;Carnivora;Felidae",
                                     "Mammalia;Carnivora;Felidae",
                                     "Mammalia;Carnivora;Ursidae"),
                             species = c("Panthera leo",
                                         "Panthera tigris",
                                         "Ursus americanus"),
                             species_id = c("A", "B", "C"))

  # Make example data associated with the taxonomic data
  # Note how this does not contain classifications, but
  # does have a varaible in common with "species_data" ("id" = "species_id")
  abundance <- data.frame(id = c("A", "B", "C", "A", "B", "C"),
                          sample_id = c(1, 1, 1, 2, 2, 2),
                          counts = c(23, 4, 3, 34, 5, 13))

  # Make another related data set named by species id
  common_names <- c(A = "Lion", B = "Tiger", C = "Bear", "Oh my!")

  # Make another related data set with no names
  foods <- list(c("ungulates", "boar"),
                c("ungulates", "boar"),
                c("salmon", "fruit", "nuts"))

  # Make a taxmap object with these three datasets
  x = lookup_tax_data(species_data,
                      type = "taxon_name",
                      datasets = list(counts = abundance,
                                      my_names = common_names,
                                      foods = foods),
                      mappings = c("species_id" = "id",
                                   "species_id" = "{{name}}",
                                   "{{index}}" = "{{index}}"),
                      column = "species")

  # Note how all the datasets have taxon ids now
  x$data

  # This allows for complex mappings between variables that other functions use
  map_data(x, my_names, foods)
  map_data(x, counts, my_names)
}
}
\seealso{
Other parsers: 
\code{\link{extract_tax_data}()},
\code{\link{parse_dada2}()},
\code{\link{parse_edge_list}()},
\code{\link{parse_greengenes}()},
\code{\link{parse_mothur_tax_summary}()},
\code{\link{parse_mothur_taxonomy}()},
\code{\link{parse_newick}()},
\code{\link{parse_phyloseq}()},
\code{\link{parse_phylo}()},
\code{\link{parse_qiime_biom}()},
\code{\link{parse_rdp}()},
\code{\link{parse_silva_fasta}()},
\code{\link{parse_tax_data}()},
\code{\link{parse_ubiome}()},
\code{\link{parse_unite_general}()}
}
\concept{parsers}
