diff --git a/Snakefile b/Snakefile index 67883d077eea1366de706b097afb19e855fb774b..9bd082f2c4c79181e3cf2d0bceeb9be7b7dc765d 100644 --- a/Snakefile +++ b/Snakefile @@ -45,6 +45,9 @@ OUTPUTDIR = os.environ.get("OUTPUTDIR", config['General']['outputdir']) MG = os.environ.get("MG", config['General']['raws']['Metagenomics']).split() MT = os.environ.get("MT", config['General']['raws']['Metatranscriptomics']).split() SAMPLE = os.environ.get("SAMPLE", config['General']['sample']) +DBPATH = os.environ.get("DBPATH", config['General']['db_path']) +if not os.path.exists(DBPATH): + os.makedirs(DBPATH) # Get general parameters THREADS = os.environ.get("THREADS", config['General']['threads']) @@ -66,7 +69,7 @@ def prepare_environment(stepname): if not os.path.exists(out): os.makedirs(out) elif not os.path.isdir(out): - raise OSError("[IMP] Output is not a directory: %s" % out) + raise OSError("//[IMP] Output is not a directory: %s" % out) if not os.path.exists(TMPDIR): os.makedirs(TMPDIR) bench = os.path.join(out, 'benchmarks') diff --git a/docker/Dockerfile b/docker/Dockerfile index fa28efcd2f99b58a7e1abfe2ac0f6577b69ffeeb..93ac511ec7fafd8c842d1ee99dfd12307ce4e15b 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -55,8 +55,8 @@ RUN mkdir -p /home/imp/tmp /home/imp/lib \ && cd trimmomatic-0.32 \ && ant \ && cp dist/jar/trimmomatic-0.32.jar /home/imp/lib/. \ -&& mkdir -p /usr/db/trimmomatic \ -&& cp adapters/* /usr/db/trimmomatic/. \ +#&& mkdir -p /usr/db/trimmomatic \ +#&& cp adapters/* /usr/db/trimmomatic/. \ && cd .. && rm -rf *rimmomatic* ## idba ud diff --git a/rules/Analysis/MGMT.rules b/rules/Analysis/MGMT.rules index 1db5c0aa2d1387947d135df3e3be42281a00a37f..bebfbd37b86e984983c7cdc47f6ac071abcd28bf 100644 --- a/rules/Analysis/MGMT.rules +++ b/rules/Analysis/MGMT.rules @@ -6,7 +6,7 @@ rule ANALYSIS_ANNOTATE: "%s/benchmarks/ANALYSIS_ANNOTATE.json" % AN_OUT input: '{dir}/MGMT.assembly.merged.fa'.format(dir=A_OUT), - expand("{path}/{db}", path=config["prokka"]["db_path"], db=config["prokka"]["databases"]) + expand("{path}/{db}", path=DBPATH, db=config["prokka"]["databases"]) output: "%s/annotation/annotation.filt.gff" % AN_OUT shell: diff --git a/rules/Preprocessing/MG.rules b/rules/Preprocessing/MG.rules index 7b509d0ca2695f88ec23f0ea1534200893ccb240..7c5b48849a818831a91fa412cb43b28775503568 100644 --- a/rules/Preprocessing/MG.rules +++ b/rules/Preprocessing/MG.rules @@ -54,7 +54,8 @@ rule PREPROCESSING_MG_TRIM: log: P_LOG input: - expand('{dir}/{uniq}', uniq=['MG.R1.fq', 'MG.R2.fq'], dir=P_OUT) + expand('{dir}/{uniq}', uniq=['MG.R1.fq', 'MG.R2.fq'], dir=P_OUT), + DBPATH + "/adapters/adapters.done" output: expand('{dir}/{trim}', trim=[ 'MG.R1.uniq.trimmed.fq', @@ -63,7 +64,7 @@ rule PREPROCESSING_MG_TRIM: 'MG.SE2.uniq.trimmed.fq'], dir=P_OUT) shell: """ - java -jar {config[Preprocessing][trimmomatic][jarfile]} PE -threads {THREADS} {input} {output} ILLUMINACLIP:{config[Preprocessing][trimmomatic][databases]}/{config[Preprocessing][trimmomatic][adapter]}-PE.fa:{config[Preprocessing][trimmomatic][seed_mismatch]}:{config[Preprocessing][trimmomatic][palindrome_clip_threshold]}:{config[Preprocessing][trimmomatic][simple_clip_threshold]} LEADING:{config[Preprocessing][trimmomatic][leading]} TRAILING:{config[Preprocessing][trimmomatic][trailing]} SLIDINGWINDOW:{config[Preprocessing][trimmomatic][window_size]}:{config[Preprocessing][trimmomatic][window_quality]} MINLEN:{config[Preprocessing][trimmomatic][minlen]} MAXINFO:{config[Preprocessing][trimmomatic][target_length]}:{config[Preprocessing][trimmomatic][strictness]} > {log} 2>&1 + java -jar {config[Preprocessing][trimmomatic][jarfile]} PE -threads {THREADS} {input[0]} {input[1]} {output} ILLUMINACLIP:{config[Preprocessing][trimmomatic][databases]}/{config[Preprocessing][trimmomatic][adapter]}-PE.fa:{config[Preprocessing][trimmomatic][seed_mismatch]}:{config[Preprocessing][trimmomatic][palindrome_clip_threshold]}:{config[Preprocessing][trimmomatic][simple_clip_threshold]} LEADING:{config[Preprocessing][trimmomatic][leading]} TRAILING:{config[Preprocessing][trimmomatic][trailing]} SLIDINGWINDOW:{config[Preprocessing][trimmomatic][window_size]}:{config[Preprocessing][trimmomatic][window_quality]} MINLEN:{config[Preprocessing][trimmomatic][minlen]} MAXINFO:{config[Preprocessing][trimmomatic][target_length]}:{config[Preprocessing][trimmomatic][strictness]} > {log} 2>&1 """ benchmark: "%s/benchmarks/PREPROCESSING_MG_TRIM.json" % P_OUT @@ -117,11 +118,10 @@ rule PREPROCESSING_MG_FILTER_HG: 'MG.R1.uniq.trimmed.fq', 'MG.R2.uniq.trimmed.fq', 'MG.SE.uniq.trimmed.fq'], dir=P_OUT), - '{dir}/{ref}.fa'.format(dir=config['human_filtering']['db_path'], ref=config['human_filtering']['filter']), + '{dir}/{ref}.fa'.format(dir=DBPATH + "/human", ref=config['human_filtering']['filter']), expand( - "{path}/{filter}.{ext}", - path=config["human_filtering"]["db_path"], filter=config["human_filtering"]["filter"], - ext=['fa', 'fa.amb', 'fa.ann', 'fa.bwt', 'fa.pac', 'fa.sa']) + "{p}/human/{filter}.{ext}", filter=config["human_filtering"]["filter"], + ext=['fa', 'fa.amb', 'fa.ann', 'fa.bwt', 'fa.pac', 'fa.sa'], p=DBPATH) output: expand('{dir}/{filter}', filter=expand([ 'MG.R1.uniq.trimmed.{f}.fq', @@ -144,5 +144,3 @@ rule PREPROCESSING_MG_FILTER_HG: samtools view -@ {THREADS} -uf 4 - | bamToFastq -i stdin -fq {output[2]} rm -rf $BUFFER* $TMP_FILE """ - - diff --git a/rules/Preprocessing/MT.rules b/rules/Preprocessing/MT.rules index ddc9941b8eecb0c610fa3359a2dac7a5fd4bbb86..8e300eef7b7322db3729e8db62e9ecdc6becde7e 100644 --- a/rules/Preprocessing/MT.rules +++ b/rules/Preprocessing/MT.rules @@ -34,7 +34,8 @@ rule PREPROCESSING_MT_TRIM: log: P_LOG input: - expand('{dir}/{uniq}', uniq=['MT.R1.fq', 'MT.R2.fq'], dir=P_OUT) + expand('{dir}/{uniq}', uniq=['MT.R1.fq', 'MT.R2.fq'], dir=P_OUT), + DBPATH + "/adapters/adapters.done" output: expand('{dir}/{trim}', trim=[ 'MT.R1.trimmed.fq', @@ -43,7 +44,7 @@ rule PREPROCESSING_MT_TRIM: 'MT.SE2.trimmed.fq'], dir=P_OUT) shell: """ - java -jar {config[Preprocessing][trimmomatic][jarfile]} PE -threads {THREADS} {input} {output} ILLUMINACLIP:{config[Preprocessing][trimmomatic][databases]}/{config[Preprocessing][trimmomatic][adapter]}-PE.fa:{config[Preprocessing][trimmomatic][seed_mismatch]}:{config[Preprocessing][trimmomatic][palindrome_clip_threshold]}:{config[Preprocessing][trimmomatic][simple_clip_threshold]} LEADING:{config[Preprocessing][trimmomatic][leading]} TRAILING:{config[Preprocessing][trimmomatic][trailing]} SLIDINGWINDOW:{config[Preprocessing][trimmomatic][window_size]}:{config[Preprocessing][trimmomatic][window_quality]} MINLEN:{config[Preprocessing][trimmomatic][minlen]} MAXINFO:{config[Preprocessing][trimmomatic][target_length]}:{config[Preprocessing][trimmomatic][strictness]} > {log} 2>&1 + java -jar {config[Preprocessing][trimmomatic][jarfile]} PE -threads {THREADS} {input[0]} {input[1]} {output} ILLUMINACLIP:{DBPATH}/trimmomatic/{config[Preprocessing][trimmomatic][adapter]}-PE.fa:{config[Preprocessing][trimmomatic][seed_mismatch]}:{config[Preprocessing][trimmomatic][palindrome_clip_threshold]}:{config[Preprocessing][trimmomatic][simple_clip_threshold]} LEADING:{config[Preprocessing][trimmomatic][leading]} TRAILING:{config[Preprocessing][trimmomatic][trailing]} SLIDINGWINDOW:{config[Preprocessing][trimmomatic][window_size]}:{config[Preprocessing][trimmomatic][window_quality]} MINLEN:{config[Preprocessing][trimmomatic][minlen]} MAXINFO:{config[Preprocessing][trimmomatic][target_length]}:{config[Preprocessing][trimmomatic][strictness]} > {log} 2>&1 """ benchmark: "%s/benchmarks/PREPROCESSING_MT_TRIM.json" % P_OUT @@ -105,7 +106,7 @@ rule PREPROCESSING_MT_FILTER_RRNA: expand( "{path}/idx/{files}.{ext}", files=config["sortmerna"]["files"], - path=config["sortmerna"]["db_path"], + path=DBPATH + "/sortmerna", ext=['bursttrie_0.dat', 'kmer_0.dat', 'pos_0.dat', 'stats']) output: expand('{dir}/{filter}', filter=[ @@ -117,8 +118,8 @@ rule PREPROCESSING_MT_FILTER_RRNA: 'MT.SE.trimmed.rna.fq', ], dir=P_OUT) run: - fastafiles = expand("{path}/{files}.fasta", files=config["sortmerna"]["files"], path=config["sortmerna"]["db_path"]) - fastaindexed = expand("{path}/idx/{files}", files=config["sortmerna"]["files"], path=config["sortmerna"]["db_path"]) + fastafiles = expand("{path}/{files}.fasta", files=config["sortmerna"]["files"], path="{DBPATH}/sortmerna") + fastaindexed = expand("{path}/idx/{files}", files=config["sortmerna"]["files"], path="{DBPATH}/sortmerna") ref = ':'.join('%s,%s' % (a, b) for a, b in zip(fastafiles, fastaindexed)) # shell("{EXE} {SCRIPTS[filter_rna]} -t {config[sortmerna][threads]} \ # -d %s -p {config[sortmerna][scripts_path]} \ @@ -143,10 +144,10 @@ rule PREPROCESSING_MT_FILTER_HG: 'MT.R1.trimmed.rna_filtered.fq', 'MT.R2.trimmed.rna_filtered.fq', 'MT.SE.trimmed.rna_filtered.fq'], dir=P_OUT), - '{dir}/{ref}.fa'.format(dir=config['human_filtering']['db_path'], ref=config['human_filtering']['filter']), + '{dir}/{ref}.fa'.format(dir=DBPATH + "/human", ref=config['human_filtering']['filter']), expand( "{path}/{filter}.{ext}", - path=config["human_filtering"]["db_path"], + path=DBPATH + "/human", filter=config["human_filtering"]["filter"], ext=['fa', 'fa.amb', 'fa.ann', 'fa.bwt', 'fa.pac', 'fa.sa']) output: diff --git a/rules/Util.rules b/rules/Util.rules index aea13ba8f1c22206b18dfb5d35af219e21d1bad7..08ed1c1b3f6555598faa1d960c963f7c57b01cee 100644 --- a/rules/Util.rules +++ b/rules/Util.rules @@ -3,9 +3,9 @@ U_OUT, U_LOG = prepare_environment('Util') rule _DOWNLOAD_HUMAN_DB: output: - expand("{path}/{filter}.{ext}", path=config["human_filtering"]["db_path"], filter=config["human_filtering"]["filter"], ext=['fa']) + expand("{path}/{filter}.{ext}", path="{DBPATH}/human", filter=config["human_filtering"]["filter"], ext=['fa']) params: - filter = config["human_filtering"]["filter"], outdir = config["human_filtering"]["db_path"] + filter = config["human_filtering"]["filter"], outdir = "{DBPATH}/human" shell: """ TMPD=$(mktemp -d -t --tmpdir={TMPDIR} "XXXXXX") @@ -19,7 +19,7 @@ rule _DOWNLOAD_HUMAN_DB: rule _DOWNLOAD_SORTMERNA_DATABASES: output: - expand("{path}/{files}.fasta", files=config["sortmerna"]["files"], path=config["sortmerna"]["db_path"]) + expand("{path}/{files}.fasta", files=config["sortmerna"]["files"], path="{DBPATH}/sortmerna") shell: """ TMPD=$(mktemp -d -t --tmpdir={tmp} "XXXXXX") @@ -28,11 +28,11 @@ rule _DOWNLOAD_SORTMERNA_DATABASES: mkdir -p {path} mv $TMPD/rRNA_databases/*.fasta {path}/. rm -rf $TMPD - """.format(pkg_url=config["sortmerna"]["pkg_url"], path=config["sortmerna"]["db_path"], tmp='{TMPDIR}') + """.format(pkg_url=config["sortmerna"]["pkg_url"], path="{DBPATH}/sortmerna", tmp='{TMPDIR}') rule _DOWNLOAD_PROKKA_DATABASES: output: - expand("{path}/{db}", path=config["prokka"]["db_path"], db=config["prokka"]["databases"]) + expand("{path}/{db}", path="{DBPATH}", db=config["prokka"]["databases"]) shell: """ TMPDIR=$(mktemp -d -t "XXXXXX") @@ -42,7 +42,20 @@ rule _DOWNLOAD_PROKKA_DATABASES: cp -r $TMPDIR/db/* {path}/. rm -rf $TMPDIR prokka --setupdb - """.format(pkg_url=config["prokka"]["pkg_url"], path=config["prokka"]["db_path"]) + """.format(pkg_url=config["prokka"]["pkg_url"], path="{DBPATH}") + + +rule _DOWNLOAD_TRIMMOMATIC_ADAPTERS: + output: + "{DBPATH}/adapters/adapters.done" + shell: + """ + wget http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-Src-0.32.zip + unzip Trimmomatic-Src-0.32.zip + cp -r trimmomatic-0.32/adapters {DBPATH} + rm Trimmomatic-Src-0.32.zip && rm -rf trimmomatic-0.32 + touch {output} + """ rule _DOWNLOAD_LIBRARY_FILES: @@ -61,20 +74,20 @@ rule INDEX_SORTMERNA_DB: benchmark: "%s/benchmarks/INDEX_SORTMERNA_DB.json" % U_OUT input: - expand("{path}/{files}.fasta", files=config["sortmerna"]["files"], path=config["sortmerna"]["db_path"]) + expand("{path}/{files}.fasta", files=config["sortmerna"]["files"], path="{DBPATH}/sortmerna") output: expand( "{path}/idx/{files}.{ext}", files=config["sortmerna"]["files"], - path=config["sortmerna"]["db_path"], + path="{DBPATH}/sortmerna", ext=['bursttrie_0.dat', 'kmer_0.dat', 'pos_0.dat', 'stats']) params: - outdir = "%s/idx" % config["sortmerna"]["db_path"] + outdir = "%s/idx" % "{DBPATH}/sortmerna" run: fastaindexed = expand( "{path}/idx/{files}", files=config["sortmerna"]["files"], - path=config["sortmerna"]["db_path"]) + path="{DBPATH}/sortmerna") ref = ':'.join('%s,%s' % (a, b) for a, b in zip(input, fastaindexed)) shell("mkdir -p {params.outdir}") shell("indexdb_rna --ref {ref} > {log} 2>&1") @@ -89,8 +102,8 @@ rule DIAGRAMMS: outdir = "%s" % OUTPUTDIR shell: """ - snakemake RUN --forceall --dag | dot -Tpng > {params.outdir}/workflow.png - snakemake RUN --forceall --dag | dot -Tpdf > {params.outdir}/workflow.pdf + snakemake ALL --forceall --dag | dot -Tpng > {params.outdir}/workflow.png + snakemake ALL --forceall --dag | dot -Tpdf > {params.outdir}/workflow.pdf """ rule REPORT: @@ -211,4 +224,3 @@ rule _DOWNLOAD_KEGG_INFORMATION: echo "Preparing KEGG ID to pathway mappings" python {SRCDIR}/make.pwy.hierarchy.kegg.py -o {output[1]} """ - diff --git a/src/config.imp.json b/src/config.imp.json index f2d000ea8db94b769d01b21f832d7714ef8f4c2b..ce21bd1e7b467812b7650e9cdbd4ae61e1c82a3c 100644 --- a/src/config.imp.json +++ b/src/config.imp.json @@ -11,7 +11,8 @@ "Metatranscriptomics": "MT.R1.5_percent.fq MT.R2.5_percent.fq" }, "sample": "test", - "outputdir": "build" + "outputdir": "build", + "db_path": "db" }, "Preprocessing": { "pre": "echo '//LOADING PREPROCESSING MODULE'", @@ -28,8 +29,7 @@ "window_quality": 3, "strictness": 0.5, "target_length": 40, - "jarfile": "/home/imp/lib/trimmomatic-0.32.jar", - "databases": "/usr/db/trimmomatic" + "jarfile": "/home/imp/lib/trimmomatic-0.32.jar" } }, "Assembly": { @@ -57,11 +57,9 @@ }, "human_filtering": { "filter": "chr21", - "url": "http://hgdownload.cse.ucsc.edu/goldenPath/hg38/chromosomes/chr21.fa.gz", - "db_path": "/usr/db/human" + "url": "http://hgdownload.cse.ucsc.edu/goldenPath/hg38/chromosomes/chr21.fa.gz" }, "sortmerna": { - "db_path": "/usr/db/sortmerna", "pkg_url": "https://github.com/biocore/sortmerna/archive/2.0.tar.gz", "scripts_path": "/home/imp/lib", @@ -78,7 +76,6 @@ }, "prokka": { "pkg_url": "http://www.vicbioinformatics.com/prokka-1.10.tar.gz", - "db_path": "/usr/db", "databases": [ "cm/Bacteria.i1i", "genus/Staphylococcus.phr",