## ---- echo = FALSE-------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

## ----eval = FALSE--------------------------------------------------------
#  install.packages("devtools")

## ----eval = FALSE--------------------------------------------------------
#  devtools::install_github("edwardslee/alphabetr")

## ------------------------------------------------------------------------
library(alphabetr)

## ----eval = FALSE--------------------------------------------------------
#  install.packages("alphabetr")

## ------------------------------------------------------------------------
# using a string to be read as a csv file (ignore this and see next line of code)
strcsv <-'"chain","well","cdr3"\n"alpha","1","CAVTGGDKLIF"\n"alpha","1","CALDGDKIIF"\n"alpha","2","CAVTGGDKLIF"\n"beta","1","CASGLARAEQYF"\n"beta","2","CASSEGDKVIF"\n"beta","2","CSEVHTARTQYF"'
con <- textConnection(strcsv)
csv3 <- read.csv(con)

# looking at the csv file
head(csv3)

## ------------------------------------------------------------------------
# TCRA file
strcsv <-'"well","cdr3"\n"1","CAVTGGDKLIF"\n"1","CALDGDKIIF"\n"2","CAVTGGDKLIF"'
con_alpha <- textConnection(strcsv)
csv2_alpha <- read.csv(con_alpha)

# TCRB file
strcsv <-'"well","cdr3"\n"1","CASGLARAEQYF"\n"2","CASSEGDKVIF"\n"2","CSEVHTARTQYF"'
con_beta <- textConnection(strcsv)
csv2_beta <- read.csv(con_beta)

head(csv2_alpha)
head(csv2_beta)

## ----eval = FALSE--------------------------------------------------------
#  # if using one 3 column csv file, using the data argument
#  dat <- read_alphabetr(data = "alphabetr_data.csv")
#  
#  # if using two 2 col csv files, use the data_alpha and data_beta arguments
#  dat <- read_alphabetr(data_alpha = "alphabetr_data_alpha.csv"
#                        data_beta  = "alphabetr_data_beta.csv")

## ------------------------------------------------------------------------
# using a string to be read as a csv file (ignore this and see next line of code)
strcsv <-'"chain","well","cdr3"\n"alpha","1","CAVTGGDKLIF"\n"alpha","1","CALDGDKIIF"\n"alpha","2","CAVTGGDKLIF"\n"beta","1","CASGLARAEQYF"\n"beta","2","CASSEGDKVIF"\n"beta","2","CSEVHTARTQYF"'
con <- textConnection(strcsv)

# importing the csv file into the format that alphabetr will use
dat <- read_alphabetr(data = con)
dat

## ------------------------------------------------------------------------
# the cdr3 sequence of alpha_2
dat$alpha_lib[2]

# the cdr3 sequence of beta_3
dat$beta_lib[3]

## ------------------------------------------------------------------------
set.seed(290)   # to make the results reproducible
clonal <- create_clones(numb_beta = 1000,
                        dual_beta = 0.05,
                        dual_alpha = .3,
                        alpha_sharing = c(0.80, 0.15, 0.05),
                        beta_sharing  = c(0.75, 0.20, 0.05))

## ------------------------------------------------------------------------
# Clones ordered by beta index
ordered_clones <- clonal$ordered

# Clones randomly ordered in order to assign random clonal frequencies later 
random_clones <- clonal$TCR

# Clones that express two different alpha chains and two different beta chains
dual_alpha <- clonal$dual_alpha
dual_beta  <- clonal$dual_beta

## ------------------------------------------------------------------------
# single TCR clones
random_clones[5:7, ]

# dual TCR-alpha clones
random_clones[49:50, ]

# dual TCR-beta clones
random_clones[47:48, ]

## ------------------------------------------------------------------------
# Sharing vectors; with this, 1692 beta chains results in 2100 unique clones
share_alph <- c(.816, .085, .021, .007, .033, .005, .033)
share_beta <- c(.859, .076, .037, .019, .009)

# Creating a population of 2100 clones with specified sharing and
# 30% dual-alpha clones and 6% dual-beta clones
set.seed(258)   # reproducibility for the vignette
TCR_pairings <- create_clones(numb_beta = 1787,
                              dual_beta = 0.06,
                              dual_alpha = 0.3,
                              alpha_sharing = share_alph,
                              beta_sharing = share_beta)
TCR_clones <- TCR_pairings$TCR

## ------------------------------------------------------------------------
# 5 plates (= 480 wells), every well has a sample size of 50 cells
numb_cells <- matrix(c(50, 480), ncol = 2)

# 1 plate (= 96 wells), 48 wells with 100 cells/well, 48 wells with 200 cells/well
numb_cells <- matrix(c(100, 200, 48, 48), ncol = 2)

## ------------------------------------------------------------------------
# Different experimental parameters
number_plates <- 5        # five 96-well plates
err_drop <- c(0.15, .01)  # drop error rate distribution ~ lognormal(.15, .1)
err_seq  <- c(0.02, .005) # in frame error rate dist ~ lognormal(.02, .005)
err_mode <- c("constant", "constant") # lognormal error distributions
number_skewed <- 25       # 25 clones representing the top proportion of population
pct_top <- 0.5            # top of population represents 50% of population
dis_behavior <- "linear"  # only option avaiable is linear at the moment

# Mixed sampling strategy: 128 wells of 20 cells/well, 64 wells of 50 cells/well,
# 96 wells of 100 cells/well, 200 cells/well, 300 cells/well each
numb_cells <- matrix(c(20,  50, 100, 200, 300,
                       128, 64,  96,  96,  96), ncol = 2)

# Creating the data sets
data_tcr <- create_data(TCR = TCR_clones,
                        plates = number_plates,
                        error_drop = err_drop,
                        error_seq = err_seq,
                        error_mode = err_mode,
                        skewed = number_skewed,
                        prop_top = pct_top,
                        dist = dis_behavior,
                        numb_cells = numb_cells)

# Saving the data for alpha chains and data for beta chains
data_alph <- data_tcr$alpha
data_beta <- data_tcr$beta

## ------------------------------------------------------------------------
# Normally you would want to set rep = 100 or more
pairs <- bagpipe(alpha = data_alph, beta = data_beta, rep = 5)

## ------------------------------------------------------------------------
head(pairs)

## ------------------------------------------------------------------------
# remove candidate pairs that don't appear in more than 30% of replicates
pairs <- pairs[pairs[, "prop_replicates"] > .3, ]
head(pairs)

## ------------------------------------------------------------------------
freq_init <- freq_estimate(alpha = data_alph,
                           beta = data_beta,
                           pair = pairs,
                           error = 0.15,
                           numb_cells = numb_cells)
head(freq_init)

## ------------------------------------------------------------------------
# determining duals in the top; note the use of the error rate
common_dual <- dual_top(alpha = data_alph,
                        beta = data_beta,
                        pair = freq_init,
                        error = err,
                        numb_cells = numb_cells)

# determining duals in the tail; note that this does NOT use the error rate
tail_dual <- dual_tail(alpha = data_alph,
                       beta = data_beta,
                       freq_results = freq_init,
                       numb_cells = numb_cells)

## ------------------------------------------------------------------------
clones_dual <- rbind(common_dual, tail_dual)
head(clones_dual)

## ------------------------------------------------------------------------
# Find the frequencies of the newly identified dual clones
freq_dual <- freq_estimate(alpha = data_alph,
                           beta = data_beta,
                           pair = clones_dual,
                           error = 0.15,
                           numb_cells = numb_cells)

# Remove the candidate beta-sharing clones and replace with the dual clones
tcrpairs <- combine_freq_results(freq_init, freq_dual)

## ------------------------------------------------------------------------
head(tcrpairs)

## ------------------------------------------------------------------------
dual_res <- dual_eval(duals = clones_dual,
                      pair = freq_init,
                      TCR = TCR_pairings$TCR,
                      number_skewed = number_skewed,
                      TCR_dual = TCR_pairings$dual_alpha)

# listing the false dual rate and the adjusted dual depths of the top and tail clones
dual_res$fdr
dual_res$adj_depth_top
dual_res$adj_depth_tail

## ------------------------------------------------------------------------
freq_res <- freq_eval(freq = tcrpairs,
                      number_skewed = number_skewed,
                      TCR = TCR_pairings$TCR,
                      numb_clones = nrow(TCR_pairings$TCR),
                      prop_top = pct_top)

# CV of estimates and the proportion of CIs with correct frequency
freq_res$cv
freq_res$correct