...
 
......@@ -68,6 +68,13 @@
},
"quality-filter" : {
"type" : "object"
},
"ancom": {
"type": "object",
"properties": {
"m-metadata-column" : { "type": "string" }
},
"required": ["m-metadata-column"]
}
},
"required": ["global"],
......
......@@ -12,6 +12,7 @@ from git import Repo
# Otherwise you can provide a config file when calling snakemake with the --configfile flag
config_file = "config.json"
LOGFILE = "microSysMics-conf.log"
###########################
## ERROR MESSAGES ##
......@@ -70,8 +71,8 @@ def initialize_pipeline(arguments):
# Set default values is some are missing in the configuration file
set_default_values()
# Log the run configuration
log_config()
# Get old log
old_log()
# Whether to parallelize importing, filtering and denoising steps (considerably speed up the process)
parallel_process()
......@@ -94,6 +95,20 @@ def init_logger():
microlog.addHandler(streamHandler)
def old_log():
"""
Get old log to keep a trace of pipeline run
"""
global oldlog
oldlog = list()
try:
oldlog = [line for line in open(config["global"]["outdir"] + "/" + LOGFILE)]
except FileNotFoundError:
pass
def log_config():
"""
Log the content of the config file + possible default values
......@@ -101,9 +116,15 @@ def log_config():
# Export log to a log file
os.makedirs(config["global"]["outdir"], exist_ok=True)
fileHandler = logging.FileHandler(config["global"]["outdir"] + "/microSysMics-conf.log")
fileHandler = logging.FileHandler(config["global"]["outdir"] + "/" + LOGFILE)
microlog.addHandler(fileHandler)
# Write old log to keep a trace
outF = open(config["global"]["outdir"] + "/" + LOGFILE, "w")
for line in oldlog:
outF.write(line)
outF.close()
# Set run start time in the log
now = datetime.datetime.now()
microlog.info("-------- " + now.strftime("%Y-%m-%d %H:%M") + " --------")
......@@ -344,7 +365,8 @@ def parallel_process():
samples_ids = list(set(df["sample-id"]))
global tool_before_abundance_table
tool_before_abundance_table = ["split_manifest", "import", "cutadapt", "join_pairs", "quality_filter", "merge_qualityfilter_stats", "merge_qualityfilter_stats_2", "deblur", "dada2"]
tool_before_abundance_table = ["split_manifest", "import", "cutadapt", "join_pairs",
"quality_filter", "merge_qualityfilter_stats", "merge_qualityfilter_stats_2", "deblur", "dada2"]
def prepare_fastqc():
......@@ -467,6 +489,9 @@ def getInput(tool, given_file=""):
else:
return getOutput("merge_denoiser_results", "table.qza") if config["global"]["parallel-samples"] else getOutput("denoiser", given_file)
if tool == "ancom":
return getOutput("merge_denoiser_results", "table.qza") if config["global"]["parallel-samples"] else getOutput("denoiser", given_file)
def getOutput(tool, given_file=""):
"""
......@@ -560,6 +585,7 @@ def format_all_rule():
Format the all rule according to config file
"""
all_input = list()
all_input.append(config["global"]["outdir"] + "/" + LOGFILE)
all_input.append(getOutput("multiqc"))
if "denoiser" in config["global"]:
......@@ -579,6 +605,9 @@ def format_all_rule():
if "alpha-rarefaction" in config.keys():
all_input.append(getOutput("alpha_rarefaction", "results/index.html"))
if "ancom" in config.keys():
all_input.append(getOutput("ancom", "volcano/index.html"))
return all_input
......@@ -600,6 +629,14 @@ initialize_pipeline(arguments)
rule all:
input: format_all_rule()
rule log_config:
input: json_config_path[1]
output: config["global"]["outdir"] + "/" + LOGFILE
priority: 5
run:
log_config()
###########################
## FASTQC ##
###########################
......@@ -611,6 +648,7 @@ rule fastqc:
output: getOutput("fastqc")
params: options = getOptions("fastqc")
conda: "conda/fastqc.yml"
priority: 1
shell: """
mkdir -p `dirname {output[0]}`
perl `which fastqc` {params.options} --outdir `dirname {output[0]}` {input}
......@@ -619,9 +657,12 @@ rule fastqc:
rule multiqc:
input: getInput("multiqc")
output: getOutput("multiqc")
params: logfile = config["global"]["outdir"] + "/" + LOGFILE
conda: "conda/multiqc.yml"
priority: 1
shell: """
multiqc -f {outdir}/fastqc --outdir `dirname {output[0]}`
echo "\nMultiQc.....OK" >> {params.logfile}
"""
if "denoiser" in config["global"]:
......@@ -768,12 +809,15 @@ if "denoiser" in config["global"]:
output: getOutput("summarize_table", "table.qzv"),
getOutput("summarize_table", "rep-seqs.qzv"),
getOutput("summarize_table")
params: logfile = config["global"]["outdir"] + "/" + LOGFILE
priority: 1
conda: qiime2_yml
shell: """
qiime feature-table summarize --i-table {input[0]} --o-visualization {output[0]}
qiime feature-table tabulate-seqs --i-data {input[1]} --o-visualization {output[1]}
qiime tools export --input-path {output[0]} --output-path {output[2]}table_html
qiime tools export --input-path {output[1]} --output-path {output[2]}seqs_html
echo "\nDenoising.....OK" >> {params.logfile}
"""
# Export OTU table in tsv format
......@@ -908,7 +952,8 @@ if "denoiser" in config["global"]:
jaccard = getOutput("diversity", "pcoa/jaccard"),
unweighted_unifrac = getOutput("diversity", "pcoa/unweighted_unifrac"),
html = getOutput("diversity", "pcoa/weighted_unifrac/emperor.html")
params: options = getOptions("diversity")
params: options = getOptions("diversity"),
logfile = config["global"]["outdir"] + "/" + LOGFILE
conda: qiime2_yml
shell: """
qiime diversity core-metrics-phylogenetic --i-phylogeny {input[0]} --i-table {input[1]} --m-metadata-file {input[2]} --o-rarefied-table {output[0]} --o-faith-pd-vector {output[1]} --o-observed-otus-vector {output[2]} --o-shannon-vector {output[3]} --o-evenness-vector {output[4]} --o-unweighted-unifrac-distance-matrix {output[5]} --o-weighted-unifrac-distance-matrix {output[6]} --o-jaccard-distance-matrix {output[7]} --o-bray-curtis-distance-matrix {output[8]} --o-unweighted-unifrac-pcoa-results {output[9]} --o-weighted-unifrac-pcoa-results {output[10]} --o-jaccard-pcoa-results {output[11]} --o-bray-curtis-pcoa-results {output[12]} --o-unweighted-unifrac-emperor {output[13]} --o-weighted-unifrac-emperor {output[14]} --o-jaccard-emperor {output[15]} --o-bray-curtis-emperor {output[16]} {params.options}
......@@ -916,6 +961,7 @@ if "denoiser" in config["global"]:
qiime tools export --input-path {output[15]} --output-path {output.jaccard}
qiime tools export --input-path {output[13]} --output-path {output.unweighted_unifrac}
qiime tools export --input-path {output[14]} --output-path `dirname {output.html}`
echo "\nDiversity.....OK" >> {params.logfile}
"""
rule alpha_group_significance:
......@@ -964,14 +1010,38 @@ if "denoiser" in config["global"]:
getOutput("taxonomy", "taxons/index.html"),
getOutput("taxonomy", "barplot/index.html"),
conda: qiime2_yml
params: options = getOptions("taxonomy", ["classifier"]),
logfile = config["global"]["outdir"] + "/" + LOGFILE
shell: """
qiime feature-classifier classify-sklearn --i-classifier {input[0]} --i-reads {input[1]} --o-classification {output[0]}
qiime feature-classifier classify-sklearn --i-classifier {input[0]} --i-reads {input[1]} --o-classification {output[0]} {params.options}
qiime metadata tabulate --m-input-file {output[0]} --o-visualization {output[1]}
qiime taxa barplot --i-table {input[2]} --i-taxonomy {output[0]} --m-metadata-file {input[3]} --o-visualization {output[2]}
qiime tools export --input-path {output[1]} --output-path `dirname {output[3]}`
qiime tools export --input-path {output[2]} --output-path `dirname {output[4]}`
echo "\nTaxonomy.....OK" >> {params.logfile}
"""
# ANCOM (differential expression)
if 'ancom' in config.keys():
rule ancom:
input:
getInput("ancom", "table.qza"),
config["global"]["context"]
output:
getOutput("ancom", "table-pseudocount.qza"),
getOutput("ancom", "ancom.qzv"),
getOutput("ancom", "volcano/index.html")
conda: qiime2_yml
params: options = getOptions("ancom")
shell: """
qiime composition add-pseudocount --i-table {input[0]} --o-composition-table {output[0]}
qiime composition ancom --i-table {output[0]} --m-metadata-file {input[1]} --o-visualization {output[1]} {params.options}
qiime tools export --input-path {output[1]} --output-path `dirname {output[2]}`
"""
###########################
## CLEAN RULES ##
###########################
......