## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.dim = c(6, 4) ) ## ----setup-------------------------------------------------------------------- library(bakR) # Packages that are NOT automatically installed when bakR is installed library(DESeq2) library(pheatmap) # Packages which are installed when bakR is installed library(dplyr) library(magrittr) library(ggplot2) library(stats) # Set the seed for reproducibility set.seed(123) ## ----------------------------------------------------------------------------- ### Only differential synthesis # Simulate a nucleotide recoding dataset sim_data <- Simulate_relative_bakRData(1000, 1000000, num_ks_DE = c(0, 300), num_kd_DE = c(0, 0)) # This will simulate 1000 features, 1 million reads, 2 experimental conditions # and 3 replicates for each experimental condition. 300 features will be differentially # transcribed, and there will be no differential stability # See ?Simulate_bakRData for details regarding tunable parameters # Extract simulated bakRData object bakRData <- sim_data$bakRData ## Run the efficient model Fit_s <- bakRFit(bakRData) ### Only differential stability # Simulate a nucleotide recoding dataset sim_data <- Simulate_relative_bakRData(1000, 1000000, num_ks_DE = c(0, 0), num_kd_DE = c(0, 300)) # This will simulate 1000 features, 1 million reads, 2 experimental conditions # and 3 replicates for each experimental condition. 300 features will be differentially # transcribed, and there will be no differential stability # See ?Simulate_bakRData for details regarding tunable parameters # Extract simulated bakRData object bakRData <- sim_data$bakRData ## Run the efficient model Fit_d <- bakRFit(bakRData) ## ----------------------------------------------------------------------------- ### Only differential synthesis dataset # Get the count matrix from bakR Counts <- Fit_s$Data_lists$Count_Matrix # Experimental conditions for each sample # There are 6 s4U treated samples (3 replicates of each condition) # In addition, there are 2 -s4U control samples (1 for each condition) ## s4U conditions # 1st three samples are reference (ref) samples # Next three samples are experimental (exp) samples conditions_s4U <- as.factor(rep(c("ref", "exp"), each = 3)) ## -s4U control conditions # 1st sample is reference, next is experimental conditions_ctl <- as.factor(c("ref", "exp")) # Combined s4U and -s4U control conditions conditions <- c(conditions_s4U, conditions_ctl) # Make the colData input for DESeq2 colData <- data.frame(conditions = conditions) rownames(colData) <- colnames(Counts) # Make DESeq2 data object dds_s <- DESeqDataSetFromMatrix(countData = Counts, colData = colData, design = ~conditions) # Fit DESeq2 model ddso_s <- DESeq(dds_s) # Extract results of experimental vs. reference comparison reso_s <- results(ddso_s, contrast = c("conditions", "exp", "ref")) ### Only differential stability dataset # Get the other count matrix from bakR Counts <- Fit_d$Data_lists$Count_Matrix # Make DESeq2 data object dds_d <- DESeqDataSetFromMatrix(countData = Counts, colData = colData, design = ~conditions) # Fit DESeq2 model ddso_d <- DESeq(dds_d) # Extract results of experimental vs. reference comparison reso_d <- results(ddso_d, contrast = c("conditions", "exp", "ref")) ## ----results = 'hide'--------------------------------------------------------- ### Only differential synthesis: # Convert to data frame reso_s <- as.data.frame(reso_s) # Make data frame DE_df_s <- data.frame(XF = row.names(reso_s), L2FC_RNA = reso_s$log2FoldChange, DE_score = reso_s$stat, DE_se = reso_s$lfcSE, DE_pval = reso_s$pval, DE_padj = reso_s$padj) ### Only differential degradation: # Convert to data frame reso_d <- as.data.frame(reso_d) # Make data frame DE_df_d <- data.frame(XF = row.names(reso_d), L2FC_RNA = reso_d$log2FoldChange, DE_score = reso_d$stat, DE_se = reso_d$lfcSE, DE_pval = reso_d$pval, DE_padj = reso_d$padj) ## ----results = 'hide'--------------------------------------------------------- # Decreasing sims parameter to speed up; wouldn't normally suggest this if you # want higher precision mechanism p-values, discussed later Mechs_s <- DissectMechanism(Fit_s, DE_df_s, sims = 1000000) Mechs_d <- DissectMechanism(Fit_d, DE_df_d, sims = 1000000) ## ----fig.align='center'------------------------------------------------------- # Nice red to blue color gradient # Feel free to use any coloring your heart desires col <- c("#053061", "#2166AC", "#4393C3", "#92C5DE", "#D1E5F0", "#F7F7F7", "#FDDBC7", "#F4A582", "#D6604D", "#B2182B", "#67001F") # Heatmap for differential synthesis only dataset pheatmap(Mechs_s$Heatmap_df, cluster_cols = FALSE, show_rownames = FALSE, color = col) # Heatmap for differential degradation only dataset pheatmap(Mechs_d$Heatmap_df, cluster_cols = FALSE, show_rownames = FALSE, color = col) ## ----fig.align='center'------------------------------------------------------- # Scatter plot of L2FC(kdeg) vs. L2FC(RNA) colored by mechanism test stat # Gotta transform the mech_stat because it spans many orders of magnitude ggplot(Mechs_s$Mechanism_df, aes(x = L2FC_kdeg, y = L2FC_RNA, color = log10(abs(mech_stat) + 1)*sign(mech_stat))) + geom_point() + theme_classic() + scale_color_viridis_c() + xlab("L2FC(kdeg) from bakR") + ylab("L2FC(RNA) from DESeq2") + labs(color = "Mechanism") ## ----fig.align='center'------------------------------------------------------- # Scatter plot of L2FC(kdeg) vs. L2FC(RNA) colored by mechanism test stat # Gotta transform the mech_stat because it spans many orders of magnitude ggplot(Mechs_d$Mechanism_df, aes(x = L2FC_kdeg, y = L2FC_RNA, color = log10(abs(mech_stat) + 1)*sign(mech_stat))) + geom_point() + theme_classic() + scale_color_viridis_c() + xlab("L2FC(kdeg) from bakR") + ylab("L2FC(RNA) from DESeq2") + labs(color = "Mechanism") ## ----fig.align='center'------------------------------------------------------- ## Grid to plot on # Grid size in each axis n <- 300 # DE z-scores DE_score <- rep(seq(from = -10, to = 10, length.out = n), times = n) # bakR z-scores bakR_score <- rep(seq(from = -10, to = 10, length.out = n), each = n) grid_df <- tibble(DE = DE_score, bakR = bakR_score) # Mechanism score grid_df <- grid_df %>% mutate(Mech = ifelse(DE > 0, (bakR + 2)*DE, (bakR - 2)*DE)) # Calculate p-value grid_df <- grid_df %>% mutate(Mech_pval = stats::df(abs(Mech), df1 = 2, df2 = 2, ncp = 2)) # Visualize test statistic ggplot(grid_df, aes(x = bakR, y = DE, z = 0.03*Mech)) + geom_contour_filled() + theme_classic() + xlab("bakR score") + ylab("DE score")