Commit f7dcbeee authored by Erwan DELAGE's avatar Erwan DELAGE
Browse files

Merge branch 'qiime2' into 'develop'

Première version Qiime2 du pipeline sur Jenkins

See merge request bird_pipeline_registry/Deblur_Pipeline!3
parents e528ff4c 269850ae
......@@ -13,9 +13,9 @@ else
fi
export PATH=${1}/miniconda/bin:$PATH
# Activation of DeblurPipeline
echo "=== activate the DeblurPipeline conda environment ==="
source activate DeblurPipeline
# Activation of MicrobiomePipeline
echo "=== activate the microbiome conda environment ==="
source activate microbiome
# Configure json environment : Single End or Pair Ends
echo "=== configure the pipeline environement ==="
......@@ -27,19 +27,14 @@ snakemake -p --latency-wait 60 --jobs ${6} --jobscript deblur.sh
# check the result
echo "=== check the result ==="
export checksum=`md5sum /mnt/data/projetTest${4}/results/F3D0_S188_L001_R1_001/all.seqs.fa | awk '{print $1}'`
export checksum_ref="89c0a3d1cc26f67c5262c0f6b7a6f341"
if [[ "$checksum" == "$checksum_ref" ]]; then
echo "succes"
if [ -f /mnt/data/projetTest${4}/results/abundance_matrix/abundance_matrix.tsv ]; then
echo "success"
else
echo "failure"
echo "checksum = $checksum"
echo "checksum_ref = $checksum_ref"
exit 1
fi
# Desactivation of DeblurPipeline
echo "=== deactivate the DeblurPipeline conda environment ==="
# Desactivation of microbiome
echo "=== deactivate the microbiome conda environment ==="
source deactivate
......@@ -9,7 +9,7 @@ rm -Rf ${1}/.conda
rm -Rf ${1}/miniconda
wget --quiet https://repo.continuum.io/miniconda/Miniconda3-4.1.11-Linux-x86_64.sh -O ~/miniconda.sh
wget --quiet https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
/bin/bash ${1}/miniconda.sh -b -p ${1}/miniconda
rm -f ${1}/miniconda.sh
......@@ -19,11 +19,9 @@ export PATH=${1}/miniconda/bin:$PATH
if [ ${3} = "Sources" ]
then
echo "=== install conda environments from the git sources ==="
# Install of the DeblurPipeline environment
conda env create -n DeblurPipeline -qf ${2}/conda/DeblurEnv.yml
# Install of the microbiome environment
conda env create -n microbiome -f ${2}/conda/microbiome.yml
# Install of the QiimeEnv environment
conda env create -qf ${2}/conda/QiimeEnv.yml
elif [ ${3} = "Package" ]
then
......
import glob
import ntpath
import sys
import json
# Load config file
wdPath = os.path.dirname(workflow.snakefile)
configfile: wdPath + "/config.json"
def getOptionalKey(key, defaultValue=""):
###########################
## FUNCTIONS ##
###########################
def getOptions(tool, exclude=[]):
options = ""
for (key, value) in config[tool].items():
if key not in exclude:
options += "--" + key + " " + str(value) + " "
return options
def getInputFiles():
try:
value = config[key]
if value is None:
return defaultValue
else:
return value
inputfiles = os.listdir(config["global"]["inputdir"])
inputfiles = [config["global"]["inputdir"] + "/" +
x for x in inputfiles if not x.startswith(".")]
return inputfiles
except KeyError:
return defaultValue
inFiles = set() # set of all input files (only file names)
# set at True if paired-end (False if single-end)
fastqType = (config["FASTQ_TYPE"] == "pairedEnd")
# creation of the logs subdirectory
if not os.path.exists(wdPath + "/logs"):
os.mkdir(wdPath + "/logs")
# test of the fastq path provided in the config.json file
if not os.path.exists(config["FASTQ_PATH"]):
print("The directory " + config["FASTQ_PATH"] +
" doesn't exist. Check the field FASTQ_PATH into the config.json file.")
sys.exit(0)
else:
# If the path ends by /, the / is suppressed
pathway = config["FASTQ_PATH"]
if (pathway[-1:] == "/"):
print("The pathway FASTQ_PATH is updated.")
config["FASTQ_PATH"] = pathway[:-1]
with open("config.json", "r+") as jsonFile:
data = json.load(jsonFile)
data["FASTQ_PATH"] = pathway[:-1]
jsonFile.seek(0) # rewind
jsonFile.write(json.dumps(data))
jsonFile.truncate() # if the new json object is shorter than the previous one
# test of the output path provided in the config.json file
if not os.path.exists(config["OUTPUT_PATH"]):
print("The directory " + config["OUTPUT_PATH"] +
" doesn't exist. Check the field OUTPUT_PATH into the config.json file.")
sys.exit(0)
else:
# If the path ends by /, the / is suppressed
pathway = config["OUTPUT_PATH"]
if (pathway[-1:] == "/"):
print("The pathway OUTPUT_PATH is updated.")
config["OUTPUT_PATH"] = pathway[:-1]
with open("config.json", "r+") as jsonFile:
data = json.load(jsonFile)
data["OUTPUT_PATH"] = pathway[:-1]
jsonFile.seek(0) # rewind
jsonFile.write(json.dumps(data))
jsonFile.truncate() # if the new json object is shorter than the previous one
# creation of the split subdirectory
if not os.path.exists(config["OUTPUT_PATH"] + "/split"):
os.mkdir(config["OUTPUT_PATH"] + "/split")
# fastq.gz or fastq files detection
if fastqType:
inPathsGz = glob.glob(config["FASTQ_PATH"] + "/*_R1*.fastq.gz")
inPathsNotGz = glob.glob(config["FASTQ_PATH"] + "/*_R1*.fastq")
else:
inPathsGz = glob.glob(config["FASTQ_PATH"] + "/*.fastq.gz")
inPathsNotGz = glob.glob(config["FASTQ_PATH"] + "/*.fastq")
if ((bool(inPathsGz) + bool(inPathsNotGz)) == 1):
if inPathsGz:
gzDetection = True
# compute .fastq.gz input path
inPaths = inPathsGz
else:
gzDetection = False
# compute .fastq input path
inPaths = inPathsNotGz
else:
if ((bool(inPathsGz) + bool(inPathsNotGz)) == 2):
print("Both .fastq and .fastq.gz files detected in the directory : " +
config["FASTQ_PATH"])
sys.exit(0)
print("You have to specifie an input directory in the JSON config file.")
def getOutput(tool, extension=""):
if extension:
return outdir + "/" + tool + "/" + tool + extension
else:
print("No .fastq or .fastq.gz files detected in the directory : " +
config["FASTQ_PATH"])
sys.exit(0)
return outdir + "/" + tool
###########################
## PRE-TREATMENT ##
###########################
# extract file names from paths
for p in inPaths:
inFiles.add(os.path.basename(p).replace(
(gzDetection and ".fastq.gz" or ".fastq"), ""))
# Get output directory (mandatory)
try:
outdir = config["global"]["outdir"]
except KeyError:
print("You have to specifie an output directory in the JSON config file.")
# Get input files (mandatory)
inputFiles = getInputFiles()
###########################
## RULES ##
###########################
##############
rule all:
input: config["OUTPUT_PATH"] + "/merged_otu_table.tsv"
input: getOutput("abundance_matrix", ".tsv"),
getOutput("fastqc")
rule fastqc:
input: inputFiles
output: getOutput("fastqc")
params: options = getOptions("fastqc")
shell: """
mkdir -p {output}
perl `which fastqc` {params.options} --outdir {output} {input}
"""
##############
rule split:
input: config["FASTQ_PATH"] + "/{filename}" + (gzDetection and ".fastq.gz" or ".fastq")
output: config["OUTPUT_PATH"] + "/split/{filename}/seqs.fna"
params: qiime_metadata = getOptionalKey("QIIME_METADATA"), qiime_barcode = getOptionalKey("QIIME_BARCODE"), qiime_quality_threshold = config["QIIME_QUALITY_THRESHOLD"], fastq_path = config["FASTQ_PATH"], output_path = config["OUTPUT_PATH"]
rule import:
input: config["global"]["inputdir"]
output: getOutput("import", ".qza")
params: options = getOptions("import")
shell: """
source activate QiimeEnv
echo $HOSTNAME
echo $PATH
echo $(which conda)
mkdir -p {params.output_path}/split/{wildcards.filename}
if [ -z {params.qiime_metadata} ]
then
if [ -z {params.qiime_barcode} ]
then
echo "No metadata and no barcode for QIIME2."
split_libraries_fastq.py -i {input} --sample_ids {wildcards.filename} -o {params.output_path}/split/{wildcards.filename} -q {params.qiime_quality_threshold} --barcode_type not-barcoded
else
echo "No metadata but barcode for QIIME2."
split_libraries_fastq.py -i {input} -o {params.output_path}/split/{wildcards.filename} -b {params.qiime_barcode} -q {params.qiime_quality_threshold}
fi
else
if [ -z {params.qiime_barcode} ]
then
echo "No barcode but metadata arguments for QIIME2."
split_libraries_fastq.py -i {input} --sample_ids {wildcards.filename} -m {params.qiime_metadata} -o {params.output_path}/split/{wildcards.filename} -q {params.qiime_quality_threshold} --barcode_type not-barcoded
else
echo "Barcode and metadata arguments for QIIME2."
split_libraries_fastq.py -i {input} -m {params.qiime_metadata} -o {params.output_path}/split/{wildcards.filename} -b {params.qiime_barcode} -q {params.qiime_quality_threshold}
fi
fi
mkdir -p $(dirname {output})
qiime tools import --input-path {input} --output-path {output} {params.options}
"""
rule quality_filter:
input: getOutput("import", ".qza")
output: getOutput("quality_filter")
params: options = getOptions("quality_filter")
shell: """
mkdir -p $(dirname {output})
qiime quality-filter q-score --i-demux {input} --output-dir {output} {params.options}
"""
rule deblur:
input: config["OUTPUT_PATH"] + "/split/{filename}/seqs.fna"
output: config["OUTPUT_PATH"] + "/{filename}/all.biom"
params: deblur_trimming = config["DEBLUR_TRIMMING"], deblur_min_reads = config["DEBLUR_MIN_READS"], deblur_cpu = config["DEBLUR_CPU"], output_path = config["OUTPUT_PATH"]
input: getOutput("quality_filter")
output: getOutput("deblur")
params: options = getOptions("deblur")
shell: """
mkdir -p $(dirname {output})
qiime deblur denoise-16S --i-demultiplexed-seqs {input}/filtered_sequences.qza --output-dir {output} {params.options}
"""
rule abundance_matrix:
input: getOutput("deblur")
output: getOutput("abundance_matrix", ".tsv")
shell: """
mkdir -p {params.output_path}/{wildcards.filename}
deblur workflow --seqs-fp {input} --output-dir {params.output_path}/{wildcards.filename} -t {params.deblur_trimming} --min-reads {params.deblur_min_reads} -O {params.deblur_cpu} -w
mkdir -p $(dirname {output})
qiime tools export {input}/table.qza --output-dir $(dirname {output})
biom convert -i $(dirname {output})/feature-table.biom -o {output} --to-tsv
"""
rule merge:
input: expand(config["OUTPUT_PATH"] + "/{filename}/all.biom", filename=inFiles)
output: config["OUTPUT_PATH"] + "/merged_otu_table.tsv"
params: bioms = ",".join(expand(config["OUTPUT_PATH"] + "/{filename}/all.biom", filename=inFiles))
rule clean:
shell: """
source activate QiimeEnv
merge_otu_tables.py -i {params.bioms} -o {output}.biom
biom convert -i {output}.biom -o {output} --to-tsv
rm {output}.biom
rm -r {outdir}
"""
name: DeblurEnv
channels:
- conda-forge
- bird
- bioconda
- r
- defaults
dependencies:
- biom-format=2.1.5=py35_3
- mafft=7.221=0
- python-dateutil=2.3=py35_0
- snakemake=4.0.0=py35_1
- sortmerna=2.0=1
- vsearch=2.0.3=0
- deblur=1.0.2=py35_0
- scikit-bio=0.5.1=py35_0
- aioeasywebdav=2.2.0=py35_0
- aiohttp=2.0.7=py35_0
- asn1crypto=0.22.0=py35_0
- async-timeout=1.2.1=py35_0
- blas=1.1=openblas
- ca-certificates=2017.7.27.1=0
- certifi=2017.7.27.1=py35_0
- cffi=1.10.0=py35_0
- chardet=3.0.4=py35_0
- click=6.7=py35_0
- cryptography=1.9=py35_0
- cycler=0.10.0=py35_0
- dbus=1.10.10=3
- decorator=4.1.2=py35_0
- docutils=0.13.1=py35_0
- dropbox=7.3.1=py35_0
- expat=2.2.1=0
- filechunkio=1.8=py35_1
- fontconfig=2.12.1=4
- freetype=2.7=1
- ftputil=3.3.1=py35_0
- future=0.16.0=py35_0
- gettext=0.19.7=1
- glib=2.51.4=0
- gst-plugins-base=1.8.0=0
- gstreamer=1.8.0=2
- h5py=2.7.0=np113py35_1
- hdf5=1.8.18=0
- icu=58.1=1
- idna=2.5=py35_0
- ipython=6.1.0=py35_0
- ipython_genutils=0.2.0=py35_0
- jedi=0.10.2=py35_0
- jpeg=9b=0
- libffi=3.2.1=3
- libiconv=1.14=4
- libpng=1.6.28=0
- libxcb=1.12=1
- libxml2=2.9.4=4
- lockfile=0.12.2=py35_0
- matplotlib=2.0.2=py35_2
- multidict=2.1.4=py35_0
- natsort=5.0.2=py35_0
- ncurses=5.9=10
- nose=1.3.7=py35_2
- numpy=1.13.1=py35_blas_openblas_200
- openblas=0.2.19=2
- openssl=1.0.2l=0
- pandas=0.20.3=py35_1
- paramiko=2.2.1=py35_0
- pcre=8.39=0
- pexpect=4.2.1=py35_0
- pickleshare=0.7.3=py35_0
- pip=9.0.1=py35_0
- prompt_toolkit=1.0.15=py35_0
- psutil=5.2.2=py35_0
- ptyprocess=0.5.2=py35_0
- pyasn1=0.3.2=py35_0
- pycparser=2.18=py35_0
- pygments=2.2.0=py35_0
- pynacl=1.1.2=py35_0
- pyopenssl=16.2.0=py35_0
- pyparsing=2.2.0=py35_0
- pyqt=5.6.0=py35_4
- pysftp=0.2.9=py35_0
- pysocks=1.6.7=py35_0
- python=3.5.4=0
- pytz=2017.2=py35_0
- pyyaml=3.12=py35_1
- qt=5.6.2=3
- readline=6.2=0
- requests=2.18.3=py35_0
- scipy=0.19.1=py35_blas_openblas_202
- setuptools=36.2.2=py35_0
- simplegeneric=0.8.1=py35_0
- sip=4.18=py35_1
- six=1.10.0=py35_1
- sqlite=3.13.0=1
- tk=8.5.19=2
- tornado=4.5.1=py35_0
- traitlets=4.3.2=py35_0
- urllib3=1.21.1=py35_1
- wcwidth=0.1.7=py35_0
- wheel=0.29.0=py35_0
- wrapt=1.10.11=py35_0
- xorg-libxau=1.0.8=3
- xorg-libxdmcp=1.1.2=3
- xz=5.2.3=0
- yaml=0.1.6=0
- yarl=0.10.0=py35_0
- zlib=1.2.8=0
- bcrypt=3.1.3=py35_0
- cachecontrol=0.11.7=py35_0
- libgcc=5.2.0=0
- libgfortran=3.0.0=1
prefix: /sandbox/ylelievre/miniconda3/envs/DeblurEnv
name: QiimeEnv
channels:
- conda-forge
- bird
- bioconda
- r
- defaults
dependencies:
- biom-format=2.1.5=py27_3
- burrito=0.9.1=py27_0
- burrito-fillings=0.1.1=py27_0
- cogent=1.5.3=py27_0
- emperor=0.9.51=py27_0
- pynast=1.2.2=py27_0
- pyqi=0.3.2=py27_0
- python-dateutil=2.3=py27_0
- qcli=0.1.1=py27_0
- qiime=1.9.1=np110py27_1
- qiime-default-reference=0.1.3=py27_0
- scikit-bio=0.2.3=np110py27_0
- backports.shutil_get_terminal_size=1.0.0=py27_1
- ca-certificates=2017.7.27.1=0
- certifi=2017.7.27.1=py27_0
- click=6.7=py27_0
- decorator=4.1.2=py27_0
- enum34=1.1.6=py27_1
- funcsigs=1.0.2=py27_0
- future=0.16.0=py27_0
- gettext=0.19.7=1
- glib=2.51.4=0
- h5py=2.6.0=np110py27_7
- hdf5=1.8.17=11
- icu=58.1=1
- ipython=5.4.1=py27_0
- ipython_genutils=0.2.0=py27_0
- libffi=3.2.1=3
- libiconv=1.14=4
- libuuid=1.0.3=1
- libxcb=1.12=1
- libxml2=2.9.4=4
- mock=2.0.0=py27_0
- mpi4py=2.0.0=py27_2
- mpich=3.2=4
- ncurses=5.9=10
- nose=1.3.7=py27_2
- openssl=1.0.2l=0
- pandas=0.20.3=py27_1
- pathlib2=2.3.0=py27_0
- pbr=3.1.1=py27_0
- pcre=8.39=0
- pexpect=4.2.1=py27_0
- pickleshare=0.7.3=py27_0
- pip=9.0.1=py27_0
- pixman=0.32.6=0
- prompt_toolkit=1.0.15=py27_0
- ptyprocess=0.5.2=py27_0
- pygments=2.2.0=py27_0
- pyqt=4.11.4=py27_2
- python=2.7.13=1
- pytz=2017.2=py27_0
- readline=6.2=0
- scandir=1.5=py27_1
- setuptools=36.2.2=py27_0
- simplegeneric=0.8.1=py27_0
- sip=4.18=py27_1
- six=1.10.0=py27_1
- sqlalchemy=1.1.11=py27_0
- sqlite=3.13.0=1
- tk=8.5.19=2
- traitlets=4.3.2=py27_0
- wcwidth=0.1.7=py27_0
- wheel=0.29.0=py27_0
- xorg-kbproto=1.0.7=1
- xorg-libice=1.0.9=2
- xorg-libsm=1.2.2=2
- xorg-libx11=1.6.4=6
- xorg-libxau=1.0.8=3
- xorg-libxdmcp=1.1.2=3
- xorg-libxext=1.3.3=2
- xorg-libxrender=0.9.10=0
- xorg-renderproto=0.11.1=1
- xorg-xextproto=7.3.0=1
- xorg-xproto=7.0.31=6
- xz=5.2.3=0
- zlib=1.2.11=0
- cairo=1.12.18=6
- fontconfig=2.11.1=6
- freetype=2.5.5=1
- gdata=2.0.18=py27_0
- libgcc=5.2.0=0
- libgfortran=3.0.0=1
- libpng=1.6.17=0
- matplotlib=1.4.3=np110py27_2
- mkl=11.3.3=0
- mysql-python=1.2.5=py27_0
- natsort=3.5.0=py27_0
- numpy=1.10.4=py27_2
- py2cairo=1.10.0=py27_2
- pyparsing=2.0.3=py27_0
- qt=4.8.7=3
- scipy=0.17.1=np110py27_1
prefix: /sandbox/ylelievre/miniconda3/envs/QiimeEnv
name: microbiome
channels:
- biocore
- anaconda
- qiime2
- bioconda
- qiime2/label/r2018.2
- conda-forge
- defaults
dependencies:
- intel-openmp=2018.0.0=hc7b2577_8
- libgcc=7.2.0=h69d50b8_2
- libgcc-ng=7.2.0=h7cc24e2_2
- libgfortran-ng=7.2.0=h9f7466a_2
- libstdcxx-ng=7.2.0=h7a57d05_2
- mkl=2018.0.1=h19d6760_4
- wget=1.19.1=he4ec0ba_0
- bioconductor-biobase=2.38.0=r3.4.1_0
- bioconductor-biocgenerics=0.24.0=r3.4.1_0
- bioconductor-biocparallel=1.12.0=r3.4.1_0
- bioconductor-biostrings=2.46.0=r3.4.1_0
- bioconductor-dada2=1.6.0=r3.4.1_0
- bioconductor-delayedarray=0.4.1=r3.4.1_0
- bioconductor-genomeinfodb=1.14.0=r3.4.1_0
- bioconductor-genomeinfodbdata=1.0.0=r3.4.1_1
- bioconductor-genomicalignments=1.14.0=r3.4.1_0
- bioconductor-genomicranges=1.30.0=r3.4.1_0
- bioconductor-iranges=2.12.0=r3.4.1_0
- bioconductor-rsamtools=1.30.0=r3.4.1_0
- bioconductor-s4vectors=0.16.0=r3.4.1_0
- bioconductor-shortread=1.36.0=r3.4.1_0
- bioconductor-summarizedexperiment=1.8.0=r3.4.1_0
- bioconductor-xvector=0.18.0=r3.4.1_0
- bioconductor-zlibbioc=1.24.0=r3.4.1_0
- blast=2.6.0=boost1.64_2
- cutadapt=1.15=py35_0
- dropbox=5.2.1=py35_0
- fastqc=0.11.5=1
- fasttree=2.1.10=0
- filechunkio=1.6=py35_0
- ftputil=3.2=py35_0
- java-jdk=8.0.92=1
- mafft=7.310=0
- pysftp=0.2.9=py35_0
- snakemake=3.13.3=py35_0
- sortmerna=2.0=2
- vsearch=2.7.0=1
- xopen=0.3.2=py35_0
- deblur=1.0.3=py35h87d23f6_0
- gneiss=0.4.2=py35hda95eb5_6
- unifrac=0.9.2=py35hae32d81_1
- asn1crypto=0.22.0=py35_0
- backports=1.0=py35_1
- backports.functools_lru_cache=1.5=py35_0
- blas=1.1=openblas
- bleach=2.0.0=py_1
- bokeh=0.12.13=py35_0
- boost=1.64.0=py35_4
- boost-cpp=1.64.0=1
- bzip2=1.0.6=1
- ca-certificates=2018.1.18=0
- cairo=1.14.10=0
- certifi=2018.1.18=py35_0
- cffi=1.11.2=py35_0
- chardet=3.0.4=py35_0
- click=6.7=py_1
- cryptography=2.1.4=py35_0
- curl=7.55.1=0
- cycler=0.10.0=py35_0
- cython=0.27.3=py35_0
- dbus=1.10.22=0
- decorator=4.1.2=py35_0
- emperor=1.0.0beta13=py35_1
- entrypoints=0.2.3=py35_1
- expat=2.2.5=0
- fastcluster=1.1.24=py35_0
- fontconfig=2.12.6=0
- freetype=2.8.1=0
- future=0.16.0=py35_0
- gettext=0.19.8.1=0
- glib=2.55.0=0
- gmp=6.1.2=0
- graphite2=1.3.10=0
- gsl=2.1=2
- gst-plugins-base=1.8.0=0
- gstreamer=1.8.0=1
- h5py=2.7.0=np112py35_0
- harfbuzz=1.7.1=0
- hdf5=1.8.17=11
- html5lib=1.0.1=py_0
- icu=58.2=0
- idna=2.6=py35_1
- ipykernel=4.8.1=py35_0
- ipython=6.2.1=py35_1
- ipython_genutils=0.2.0=py35_0
- ipywidgets=7.1.1=py35_0
- jedi=0.11.1=py35_0
- jinja2=2.10=py35_0
- jpeg=9b=2
- jsonschema=2.6.0=py35_1
- jupyter_client=5.2.2=py35_0
- jupyter_core=4.4.0=py_0
- krb5=1.14.2=0
- libffi=3.2.1=3
- libiconv=1.15=0
- libpng=1.6.34=0
- libsodium=1.0.15=1
- libssh2=1.8.0=2
- libtiff=4.0.9=0
- libxcb=1.12=1
- libxml2=2.9.7=0
- lockfile=0.12.2=py35_0
- markupsafe=1.0=py35_0
- matplotlib=2.1.2=py35_0
- mistune=0.8.3=py_0
- msgpack-python=0.5.1=py35_0
- natsort=5.0.2=py35_0
- nbconvert=5.3.1=py_1