Commit e63b5b2d authored by Erwan DELAGE's avatar Erwan DELAGE
Browse files

Remplacment des EnhancedVolcano plot par les volcano interactifs de Philippe

parent a7e1dbde
......@@ -5,9 +5,11 @@ channels:
dependencies:
- bioconductor-deseq2
- r-docopt
- bioconductor-enhancedvolcano
- r-rjson
- r-ggplot2
- r-tidyverse
- r-hrbrthemes
- r-svglite
\ No newline at end of file
- r-svglite
- r-ggrepel
- r-ggiraph
- r-rcolorbrewer
\ No newline at end of file
......@@ -20,9 +20,11 @@ library(docopt, quietly = TRUE)
#library(docstring)
library(rjson, quietly = TRUE)
library(ggplot2, quietly = TRUE)
library(EnhancedVolcano, quietly = TRUE)
library(tidyverse, quietly = TRUE)
library(hrbrthemes, quietly = TRUE)
library("ggrepel", quietly=TRUE)
library("ggiraph", quietly=TRUE)
library("RColorBrewer", quietly=TRUE)
#######################
# FILTERING FUNCTIONS #
......@@ -79,43 +81,136 @@ filter_on_prevalence_per_condition <- function(abundance_table, prevalence_thres
# PLOTTING FUNCTIONS #
######################
enhanced_volcano_deseq2 <- function(deseq2_res, title="", outfile=""){
#' Export volcano plot of feature fold change
prepare_volcano <- function(deseq2_res, taxo, taxoID=FALSE, adjPValThreshold=0.05, diffThreshold=0, maxLabels=30){
#' Prepare data (deseq2 results + taxonomy) for volcano plot
#'
#' @description Filter abundance table based on feature prevalence. All features found in more than @prevalence_threshold samples
#' in at least one group of samples are kept, others are discarded.
#' @description Prepare data (deseq2 results + taxonomy) for volcano plot
#'
#' @param deseq2_res array deseq2 results
#' @param title string plot title
#' @param outfile string output plot destination
# Create EnhancedVolcano plot
volcano <- EnhancedVolcano(deseq2_res,
lab = rownames(deseq2_res),
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.05,
col=c('black', 'black', 'black', 'red3'),
colAlpha = 0.8,
labSize = 0,
title = title,
drawConnectors = FALSE,
widthConnectors = 0.2,
colConnectors = 'grey30')
#' @param deseq2_res DESeq2 results object
#' @param taxo DataFrame associating taxonomy to ASVs
#' @param taxoID Whereas DESeq2 has been run on ASVs (default=FALSE) or at higher taxonomic rank (TRUE)
#' @param adjPValThreshold p-value threshold used to retain significant features (default=0.05)
#' @param diffThreshold log foldchange threshold used to retain significant features (default=0, i.e. all features retained)
#' @param maxLabels maximum number of feature labels displayed on the graph (default=30)
#' @return formated data ready to be feeded to volcano plot function.
# Plot graph if no outfile specified (testing mode)
if (outfile == "")
{
return(p)
}
# Otherwise save it
else
{
ggsave(file=outfile, plot = volcano)
}
# Turn deseq2 results object into a data frame
df <- as.data.frame(deseq2_res)
# Create a column DE classifying features in 3 categories : Upregulated, Downregulated or Non-Significant
df <- cbind(df, DE = ifelse((df[["padj"]] <= adjPValThreshold) & (abs(df$log2FoldChange) >= diffThreshold),
ifelse(df$log2FoldChange >= 0, "UP", "DOWN"), "NS"))
df <- df %>% replace_na(list(DE = "NS"))
df$DE <- factor(df$DE, levels = c("DOWN", "UP", "NS"))
# We can perform DE analysis at several taxonomic levels. Whereas the ASVs identifier are md5 hashcodes of
# their sequence, higher taxorank identifiers are simply the taxo string (e.g. d_Bacteria;p_Bacteoridetes...).
# Here we format what will be displayed as labels and in tooltip accordingly.
if (taxoID){ # Taxostring
df$Label <- sapply(strsplit(rownames(df),";"), tail, 1)
df$Tooltip <- rownames(df) %>% str_replace_all(";", "\n")
} else { # Feature ID is md5 (ASV)
df <- df %>%
bind_cols(Label=rownames(df)) %>% # Create a Label column with feature IDs
bind_cols(Tooltip=taxo[rownames(df),"Taxon"]) # Create a Tooltip column with taxonomic information
df$Tooltip <- df$Tooltip %>% str_replace_all(";", "\n") # Format taxonomic information for tooltip visualization
df$Tooltip <- paste(df$Label, "\n", df$Tooltip) # Add label before taxonomy
}
# Set uninteresting labels to blank
# Sort features according to increasing pvalues (used to only set the label of the top maxLabels features)
df <- df %>% arrange(!!sym("padj"))
maxLabels = min(maxLabels, sum(df["DE"] != "NS")) + 1
df[maxLabels:dim(df)[1],"Label"] = ""
return(df)
}
volcano <- function(df, cond1, cond2, diffThreshold=0, adjPValThreshold=0.05, showLabels=TRUE){
#' Volcano plot to visualize differential abundance
#'
#' @description Volcano plot to visualize differential abundance
#'
#' @param df DataFrame with DA results
#' @param cond1 First condition
#' @param cond2 Second condition
#' @param adjPValThreshold p-value threshold used to retain significant features (default=0.05)
#' @param diffThreshold log foldchange threshold used to retain significant features (default=0, i.e. all features retained)
#' @param showLabels whether to display node labels on graph or not (default=TRUE)
#' @return ggplot object
# Set color palette
palette12 <- brewer.pal(n = 12, name = "Paired")
palette <- list("down" = palette12[4], # green
"downBg" = palette12[3], # light green
"up" = palette12[6], # red
"upBg" = palette12[5], # light red
"ns" = "grey75"
)
# Build plot
p <- ggplot(data=df, aes(x=log2FoldChange, y=-log10(!!sym("padj")))) +
# We add lines to split into areas (NS, UP, DOWN)
geom_hline(aes(yintercept=-log10(adjPValThreshold))) +
geom_vline(aes(xintercept=diffThreshold)) +
geom_vline(aes(xintercept=-diffThreshold)) +
# We color up regulated and down regulated areas
annotate("rect", xmin=diffThreshold, xmax=Inf, ymin=-log10(adjPValThreshold), ymax=Inf, fill=palette[["upBg"]], alpha=0.1) +
annotate("rect", xmin = -Inf, xmax = -diffThreshold, ymin = -log10(adjPValThreshold), ymax = Inf, fill=palette[["downBg"]], alpha=0.1) +
# We separate the dot ploting into 2 layers in order to put rare dots on first plan
geom_point_interactive(data = df %>% filter(DE == "NS"), size = 1.5, aes(color = DE, tooltip = Tooltip)) +
geom_point_interactive(data = df %>% filter(DE != "NS"), size = 1.5, aes(color = DE, tooltip = Tooltip)) +
# We set the axes labels
scale_y_continuous("adj. pvalue",
breaks= c(0, -log10(0.5), -log10(0.25), -log10(0.1), -log10(0.05), -log10(0.01)),
labels= c(signif(10^(-0), 2), 0.5, 0.25, 0.1, 0.05, 0.01)) +
xlab(expression(~log[2] ~ ~"(Fold-Change)")) +
# We set the legend
scale_color_manual(name="Over-abundant in", # define title in legend
values = list("UP" = palette[["up"]], "DOWN" = palette[["down"]], "NS" = palette[["ns"]]), # define color,
labels = list("UP" = paste0(cond1, ' (n=', nrow(filter(df, DE == "UP")), ')'),
"DOWN" = paste0(cond2, ' (n=', nrow(filter(df, DE == "DOWN")), ')'),
"NS" = paste0("Not significant", ' (n=', nrow(filter(df, DE == "NS")), ')')
),
breaks=c("DOWN", "UP", "NS"),
drop = FALSE) +
# We set the plot title
labs(title = paste0("DESeq2 : ", cond1," vs ", cond2),
subtitle = paste0(nrow(filter(df,! DE %in% c("NS"))) ,"/", nrow(df)," Over-abundant taxa")) +
# Theme
theme_bw() + theme(legend.position="right",
plot.title = element_text(size=10),
plot.subtitle = element_text(size=8),
axis.title = element_text(size=10),
text = element_text(size=10))
# Add the node labels
if (showLabels) {
p <- p + geom_text_repel(
data = df,
aes(label = Label, fontface="italic"),
size = 3,
point.padding = unit(0.1, "lines")
)
}
return(p)
}
boxplot_significant_features <- function(feature_id, abundance_table, context_table, context_condition, taxon, outfile="") {
#' Create boxplot of raw abundance for a givean feature
#'
......@@ -173,12 +268,11 @@ boxplot_significant_features <- function(feature_id, abundance_table, context_ta
arguments <- docopt(doc, version = 'DESeq2 microSysMics 1.0')
# Input test parameters
#arguments = vector()
#arguments["<abundance_table>"] = "/Users/delage-e/workspace/microSysMics/out_dada2/export_table_tsv/abundance_table.tsv"
#arguments["<abundance_table>"] = "/Users/delage-e/workspace/microSysMics/out_dada2/taxorank_tables/genus_table.tsv"
#arguments["<contextual_table>"] = "/Users/delage-e/workspace/microSysMics/test/context.tsv"
#arguments["<config>"] = "/Users/delage-e/workspace/microSysMics/config.json"
#arguments["<outdir>"] = "/Users/delage-e/workspace/microSysMics/out_dada2/deseq2_test"
#arguments["<outdir>"] = "/Users/delage-e/workspace/microSysMics/out_dada2/deseq2_test"
#arguments["<taxo>"] = "/Users/delage-e/workspace/microSysMics/out_dada2/taxonomy/taxons/metadata.tsv"
#arguments["taxo"] = "/Users/delage-e/workspace/microSysMics/out_dada2/taxonomy/taxons/metadata.tsv"
# Load abundance table
abundance <- read.csv(arguments[["<abundance_table>"]],row.names=1, check.names = FALSE, sep="\t")
......@@ -203,10 +297,15 @@ if (is.null(deseq2_params[["sftype"]])) deseq2_params[["sftype"]] <- "poscounts"
dir.create(arguments[["<outdir>"]], showWarnings = FALSE)
# Load taxo file (if provided)
if (!is.null(arguments["taxo"][[1]])){
if (is.null(arguments["taxo"][[1]])){
taxoProvided=FALSE
}else
{
taxo <- read.csv(toString(arguments["taxo"]), sep="\t", check.names = FALSE, row.names = 1)
taxoProvided=TRUE
}
##########################
# FILTER ABUNDANCE TABLE #
##########################
......@@ -272,7 +371,7 @@ for(i in 1:(length(category)-1)){
resSig <- resSig[,c(ncol(resSig),1:(ncol(resSig)-1))]
# Add taxo info if provided
if (!is.null(arguments["taxo"][[1]])){
if (taxoProvided){
resSig <- merge(as.data.frame(resSig), taxo, by="row.names",all.x=TRUE)
rownames(resSig) <- resSig$Row.names
resSig <- resSig[2:length(resSig)]
......@@ -286,8 +385,13 @@ for(i in 1:(length(category)-1)){
write.csv(resSig, file=paste0(outdir,"/deseq2_results.csv"))
# Volcano plot
enhanced_volcano_deseq2(res, title=paste(deseq2_params[["condition"]], ":", category[i], "vs", category[j]), outfile=paste0(outdir,"/volcano_plot.svg"))
res_volcano <- prepare_volcano(res, taxo, taxoID = !taxoProvided)
p <- volcano(res_volcano, category[i], category[j])
# Export volcano
h <- girafe(ggobj = p, pointsize = 16, width_svg = 12, height_svg = 10)
htmlwidgets::saveWidget(h, paste0(outdir,"/volcano_plot.html"), selfcontained = TRUE)
# Put boxplot in subdirectory
outdir = paste0(outdir, "/boxplot_tss_abundance")
dir.create(outdir, showWarnings = FALSE)
......@@ -296,7 +400,7 @@ for(i in 1:(length(category)-1)){
for (feature_id in rownames(resSig)){
# Get taxo info if provided
if (!is.null(arguments["taxo"][[1]])){
if (taxoProvided){
taxon <- resSig[feature_id,"Taxon"]
}
else{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment