## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.dim = c(6, 4) ) ## ----setup-------------------------------------------------------------------- library(bakR) library(ggplot2) library(corrplot) set.seed(123) ## ----------------------------------------------------------------------------- # Simulate a nucleotide recoding dataset sim_data <- Simulate_relative_bakRData(1000, depth = 1000000, nreps = 2) # This will simulate 1000 features, 1000000 reads, 2 experimental conditions, # and 2 replicates for each experimental condition # See ?Simulate_relative_bakRData for details regarding tunable parameters # Run the efficient model Fit <- bakRFit(sim_data$bakRData) # Quality control checks QC <- QC_checks(Fit) ## ----fig.align='center'------------------------------------------------------- # Barplots of raw mutation rates QC$raw_mutrates ## ----fig.align='center'------------------------------------------------------- # Barplots of inferred mutation rates QC$conversion_rates ## ----fig.align='center'------------------------------------------------------- # Barplots of inferred mutation rates ggplot(Fit$Fast_Fit$Fn_Estimates, aes(x = logit_fn, color = as.factor(sample))) + geom_density() + theme_classic() + scale_color_viridis_d() + xlab("logit(fn) estimates") + ylab("density") ## ----fig.align='center'------------------------------------------------------- # Barplots of inferred mutation rates # Numerical indices are ordered as they appear in QC_checks() output message # So this is for replicate 1 and 2 of experimental ID 1 QC$correlation_plots[[1]] ## ----fig.align='center'------------------------------------------------------- # Using function from corrplot package corrplot.mixed(QC$correlation_matrix, upper = "square", lower = "number", addgrid.col = "black", tl.col = "black") ## ----------------------------------------------------------------------------- # Seed for reproducibility set.seed(321) # Simulate a nucleotide recoding dataset sim_data <- Simulate_bakRData(1000, nreps = 2, fn_mean = -4) # This will simulate 1000 features, 2 experimental conditions, # and 2 replicates for each experimental condition # The average logit(fn) will be -4, which corresponds to an average fn of just under 0.02. # Run the efficient model # Check the pnew estimates, which should all be around 0.05 Fit <- bakRFit(sim_data$bakRData) ## ----------------------------------------------------------------------------- # Run QC_checks and read messages QC <- QC_checks(Fit) ## ----------------------------------------------------------------------------- # Rerun with Stan-based pnew estimation # This will take a couple minutes to run Fit_s <- bakRFit(Fit, FastRerun = TRUE, StanRateEst = TRUE) ## ----fig.align='center'------------------------------------------------------- # Simulated ground truth sim_truth <- sim_data$sim_list # Features that made it past filtering XFs <- unique(Fit$Fast_Fit$Effects_df$XF) # Simulated logit(fraction news) from features making it past filtering true_fn <- sim_truth$Fn_rep_sim$Logit_fn[sim_truth$Fn_rep_sim$Feature_ID %in% XFs & sim_truth$Fn_rep_sim$Exp_ID == 2] # Estimated logit(fraction news) est_fn <- Fit$Fast_Fit$Fn_Estimates$logit_fn[Fit$Fast_Fit$Fn_Estimates$Exp_ID == 2] # Compare estimate to truth plot(true_fn, est_fn, xlab = "True logit(fn)", ylab = "Estimated logit(fn)", main = "Default pnew estimation", xlim = c(-8, 6), ylim = c(-8, 6)) abline(0, 1, col = "red") # Estimated logit(fraction news) est_fn <- Fit_s$Fast_Fit$Fn_Estimates$logit_fn[Fit_s$Fast_Fit$Fn_Estimates$Exp_ID == 2] # Compare estimate to truth plot(true_fn, est_fn, xlab = "True logit(fn)", ylab = "Estimated logit(fn)", main = "Alternative pnew estimation", xlim = c(-8, 6), ylim = c(-8, 6)) abline(0, 1, col = "red") ## ----------------------------------------------------------------------------- # Rerun with Stan-based pnew estimation # This will take a couple minutes to run Fit_u <- bakRFit(Fit, FastRerun = TRUE, pnew = 0.05) ## ----fig.align='center'------------------------------------------------------- # Estimated logit(fraction news) est_fn <- Fit_u$Fast_Fit$Fn_Estimates$logit_fn[Fit_u$Fast_Fit$Fn_Estimates$Exp_ID == 2] # Compare estimate to truth plot(true_fn, est_fn, xlab = "True logit(fn)", ylab = "Estimated logit(fn)", main = "User provided pnew", xlim = c(-8, 6), ylim = c(-8, 6)) abline(0, 1, col = "red")