diff --git a/init.rule b/init.rule index 00b160665cb2bf05822b259df84ba5d6306d36ab..55944d3a28bd571096c3d7dcc19ea274a8fb698b 100644 --- a/init.rule +++ b/init.rule @@ -1,10 +1,13 @@ include: "config" - rule ALL: input: - expand("{path}/{filter}.{ext}", path=DBPATH + "/human", filter=config["human_filtering"]["filter"], ext=['fa']), + expand( + "{path}/{filter}.{ext}", path=DBPATH + "/human", + filter=config["human_filtering"]["filter"], + ext=['fa', 'fa.amb', 'fa.ann', 'fa.bwt', 'fa.pac', 'fa.sa'] + ), expand("{path}/{files}.fasta", files=config["sortmerna"]["files"], path=DBPATH + "/sortmerna"), expand("{path}/{db}", path=DBPATH, db=config["prokka"]["databases"]), "%s/adapters/adapters.done" % DBPATH, diff --git a/rules/Assembly/MGMT.rules b/rules/Assembly/MGMT.rules index 33f766de00189d14016599bd115a223cde279280..c7a79f12b4512cc1102df2b084d94c789d992049 100644 --- a/rules/Assembly/MGMT.rules +++ b/rules/Assembly/MGMT.rules @@ -3,7 +3,7 @@ ################################################################################################### ######################### -## ## MT MEGAHIT ASSEMBLY +# ## MT MEGAHIT ASSEMBLY ######################### rule ASSEMBLY_MT_MEGAHIT_1: @@ -110,108 +110,6 @@ rule ASSEMBLY_MT_CAT_MEGAHIT: """ cat {input[0]} {input[1]} | awk '/^>/{{print ">contig_MT_" ++i; next}}{{print}}' > {output} """ -######################### -## ## MT TRINITY ASSEMBLY -######################### -# -#rule ASSEMBLY_MT_TRINITY_TRINITY_1: -# log: -# A_LOG -# benchmark: -# "%s/benchmarks/ASSEMBLY_MT_TRINITY_1.json" % A_OUT -# input: -# preprocessed_mt('R1'), -# preprocessed_mt('R2'), -# preprocessed_mt('SE') -# output: -# '{dir}/MT.assembly_1/final.contigs.fa'.format(dir=A_OUT) -# params: -# outdir = "{dir}/MT.assembly_1".format(dir=A_OUT) -# shell: -# """ -# Trinity --seqType fq\ -# --left {input[0]} --right {input[1]} --single {input[2]}\ -# --output {output[0]}\ -# --JM {MEMTOTAL}G --CPU {THREADS}\ -# --inchworm_cpu {THREADS} --bflyHeapSpaceMax {MEMTOTAL}G\ -# --bflyCPU {THREADS} --bflyCalculateCPU\ -# --normalize_max_read_cov 25 --full_cleanup\ -# """ -# -# -#rule ASSEMBLY_MT_EXTRACT_UNMAPPED_FROM_TRINITY_1: -# log: -# A_LOG -# benchmark: -# "%s/benchmarks/ASSEMBLY_MT_EXTRACT_UNMAPPED_FROM_TRINITY_1.json" % A_OUT -# input: -# preprocessed_mt('R1'), -# preprocessed_mt('R2'), -# preprocessed_mt('SE'), -# '{dir}/MT.assembly_1/final.contigs.fa'.format(dir=A_OUT), -# expand('{dir}/MT.assembly_1/final.contigs.fa.{ext}', dir=A_OUT, ext=['amb', 'bwt', 'pac', 'sa', 'ann']) -# output: -# expand('{dir}/{name}', name=[ -# 'MT.R1.unmapped.fq', -# 'MT.R2.unmapped.fq', -# 'MT.SE.unmapped.fq'], dir=A_OUT) -# shell: -# """ -# TMP_FILE=$(mktemp --tmpdir={TMPDIR} -t "alignment_XXXXXX.bam") -# BUFFER=$(mktemp --tmpdir={TMPDIR} -t "alignment_buffer_XXXXXX.bam") -# bwa mem -v 1 -t {THREADS} {input[3]} {input[0]} {input[1]} | samtools view -@ {THREADS} -bS - > $TMP_FILE -# samtools merge -@ {THREADS} -u - \ -# <(samtools view -@ {THREADS} -u -f 4 -F 264 $TMP_FILE) \ -# <(samtools view -@ {THREADS} -u -f 8 -F 260 $TMP_FILE) \ -# <(samtools view -@ {THREADS} -u -f 12 -F 256 $TMP_FILE) | \ -# samtools view -@ {THREADS} -bF 0x800 - | samtools sort -o -@ {THREADS} -m {MEMCORE}G -n - $BUFFER | \ -# bamToFastq -i stdin -fq {output[0]} -fq2 {output[1]} -# bwa mem -v 1 -t {THREADS} {input[3]} {input[2]} | \ -# samtools view -@ {THREADS} -bS - | samtools view -@ {THREADS} -uf 4 - | \ -# bamToFastq -i stdin -fq {output[2]} -# rm -rf $BUFFER* $TMP_FILE -# """ -# -#rule ASSEMBLY_MT_TRINITY_2: -# log: -# A_LOG -# benchmark: -# "%s/benchmarks/ASSEMBLY_MT_TRINITY_2.json" % A_OUT -# input: -# expand('{dir}/{name}', name=[ -# 'MT.R1.unmapped.fq', -# 'MT.R2.unmapped.fq', -# 'MT.SE.unmapped.fq'], dir=A_OUT) -# output: -# '{dir}/MT.assembly_2/final.contigs.fa'.format(dir=A_OUT) -# params: -# outdir = "{dir}/MT.assembly_2".format(dir=A_OUT) -# shell: -# """ -# Trinity --seqType fq\ -# --left {input[0]} --right {input[1]} --single {input[2]}\ -# --output {output[0]}\ -# --JM {MEMTOTAL}G --CPU {THREADS}\ -# --inchworm_cpu {THREADS} --bflyHeapSpaceMax {MEMTOTAL}G\ -# --bflyCPU {THREADS} --bflyCalculateCPU\ -# --normalize_max_read_cov 25 --full_cleanup\ -# """ -# -#rule ASSEMBLY_MT_CAT_TRINITY: -# log: -# A_LOG -# benchmark: -# "%s/benchmarks/ASSEMBLY_MT_CAT_TRINITY.json" % A_OUT -# input: -# '{dir}/MT.assembly_1/final.contigs.fa'.format(dir=A_OUT), -# '{dir}/MT.assembly_2/final.contigs.fa'.format(dir=A_OUT) -# output: -# '{dir}/MT.assembly.cat.fa'.format(dir=A_OUT) -# shell: -# """ -# cat {input[0]} {input[1]} | awk '/^>/{{print ">contig_MT_" ++i; next}}{{print}}' > {output} -# """ -# ################## # ## MGMT SECTION - ASSEMBLY ################## diff --git a/rules/Assembly/master.rules b/rules/Assembly/master.rules index ff8298fbe38e216702eaef92cf1470d2f0f1730c..75eebedbdb9a641a6266aad23a58295eba8c6408 100644 --- a/rules/Assembly/master.rules +++ b/rules/Assembly/master.rules @@ -7,6 +7,7 @@ include: include: "MG.rules" + def assembly_output_files(): return expand('{dir}/{name}', name=[ 'MG.reads.sorted.bam', @@ -33,10 +34,3 @@ rule ASSEMBLY_MG_ALL: "%s/MG.assembly.merged.fa" % A_OUT shell: "echo 'MG Assembly Done'" - -#rule ASSEMBLY_MG_FAST: -# input: -# "%s/MG.assembly.merged.fa" % A_OUT -# shell: -# "echo 'MG fast assembly done'" -# diff --git a/rules/Preprocessing/MG.rules b/rules/Preprocessing/MG.rules index 62dcaebb1b22963f3c74c91f1c38d5a796b43b57..0b230b13ac46d30edda9fb75bb5ebb552da33124 100644 --- a/rules/Preprocessing/MG.rules +++ b/rules/Preprocessing/MG.rules @@ -39,7 +39,15 @@ rule PREPROCESSING_MG_TRIM: 'MG.SE2.uniq.trimmed.fq'], dir=P_OUT) shell: """ - java -jar {config[trimmomatic][jarfile]} PE -threads {THREADS} {input[0]} {input[1]} {output} ILLUMINACLIP:{DBPATH}/adapters/{config[trimmomatic][adapter]}-PE.fa:{config[trimmomatic][seed_mismatch]}:{config[trimmomatic][palindrome_clip_threshold]}:{config[trimmomatic][simple_clip_threshold]} LEADING:{config[trimmomatic][leading]} TRAILING:{config[trimmomatic][trailing]} SLIDINGWINDOW:{config[trimmomatic][window_size]}:{config[trimmomatic][window_quality]} MINLEN:{config[trimmomatic][minlen]} MAXINFO:{config[trimmomatic][target_length]}:{config[trimmomatic][strictness]} >> {log} 2>&1 + java -jar {config[trimmomatic][jarfile]} PE -threads {THREADS} {input[0]} {input[1]} {output} \ + ILLUMINACLIP:{DBPATH}/adapters/{config[trimmomatic][adapter]}-PE.fa:\ + {config[trimmomatic][seed_mismatch]}:{config[trimmomatic][palindrome_clip_threshold]}:\ + {config[trimmomatic][simple_clip_threshold]} \ + LEADING:{config[trimmomatic][leading]} \ + TRAILING:{config[trimmomatic][trailing]} \ + SLIDINGWINDOW:{config[trimmomatic][window_size]}:{config[trimmomatic][window_quality]} \ + MINLEN:{config[trimmomatic][minlen]} \ + MAXINFO:{config[trimmomatic][target_length]}:{config[trimmomatic][strictness]} >> {log} 2>&1 """ benchmark: "%s/benchmarks/PREPROCESSING_MG_TRIM.json" % P_OUT diff --git a/rules/Preprocessing/MT.rules b/rules/Preprocessing/MT.rules index d305bfcd81a84b9f314f46789ebeeeaedeec1799..990a1596e6fcb2603faee373f974a96da2e8a070 100644 --- a/rules/Preprocessing/MT.rules +++ b/rules/Preprocessing/MT.rules @@ -22,7 +22,13 @@ rule PREPROCESSING_MT_TRIM: 'MT.SE2.trimmed.fq'], dir=P_OUT) shell: """ - java -jar {config[trimmomatic][jarfile]} PE -threads {THREADS} {input[0]} {input[1]} {output} ILLUMINACLIP:{DBPATH}/adapters/{config[trimmomatic][adapter]}-PE.fa:{config[trimmomatic][seed_mismatch]}:{config[trimmomatic][palindrome_clip_threshold]}:{config[trimmomatic][simple_clip_threshold]} LEADING:{config[trimmomatic][leading]} TRAILING:{config[trimmomatic][trailing]} SLIDINGWINDOW:{config[trimmomatic][window_size]}:{config[trimmomatic][window_quality]} MINLEN:{config[trimmomatic][minlen]} MAXINFO:{config[trimmomatic][target_length]}:{config[trimmomatic][strictness]} >> {log} 2>&1 + java -jar {config[trimmomatic][jarfile]} PE -threads {THREADS} {input[0]} {input[1]} {output} \ + ILLUMINACLIP:{DBPATH}/adapters/{config[trimmomatic][adapter]}-PE.fa:{config[trimmomatic][seed_mismatch]}:\ + {config[trimmomatic][palindrome_clip_threshold]}:{config[trimmomatic][simple_clip_threshold]} \ + LEADING:{config[trimmomatic][leading]} TRAILING:{config[trimmomatic][trailing]} \ + SLIDINGWINDOW:{config[trimmomatic][window_size]}:{config[trimmomatic][window_quality]} \ + MINLEN:{config[trimmomatic][minlen]} \ + MAXINFO:{config[trimmomatic][target_length]}:{config[trimmomatic][strictness]} >> {log} 2>&1 """ benchmark: "%s/benchmarks/PREPROCESSING_MT_TRIM.json" % P_OUT @@ -44,7 +50,6 @@ rule PREPROCESSING_MT_CAT_TRIMMED_SE: "cat {input[0]} {input[1]} > {output}" - rule PREPROCESSING_MT_FILTER_RRNA: log: P_LOG @@ -81,10 +86,12 @@ rule PREPROCESSING_MT_FILTER_RRNA: shell(""" TMP_R12=$(mktemp --tmpdir={TMPDIR} -t "tmp_XXXXXX.R12") {config[sortmerna][scripts_path]}/merge-paired-reads.sh {input[0]} {input[1]} $TMP_R12.fq - sortmerna --ref {ref} --reads $TMP_R12.fq --fastx --aligned $TMP_R12.rrna --other $TMP_R12.rrna_filt -a {THREADS} --paired_in -m {mem} + sortmerna --ref {ref} --reads $TMP_R12.fq --fastx --aligned $TMP_R12.rrna \ + --other $TMP_R12.rrna_filt -a {THREADS} --paired_in -m {mem} {config[sortmerna][scripts_path]}/unmerge-paired-reads.sh $TMP_R12.rrna.fq {output[3]} {output[4]} {config[sortmerna][scripts_path]}/unmerge-paired-reads.sh $TMP_R12.rrna_filt.fq {output[0]} {output[1]} - sortmerna --ref {ref} --reads {input[2]} --fastx --aligned {P_OUT}/MT.SE.trimmed.rna --other {P_OUT}/MT.SE.trimmed.rna_filtered -a {THREADS} -m {mem} + sortmerna --ref {ref} --reads {input[2]} --fastx --aligned {P_OUT}/MT.SE.trimmed.rna \ + --other {P_OUT}/MT.SE.trimmed.rna_filtered -a {THREADS} -m {mem} rm $TMP_R12* """) @@ -119,8 +126,9 @@ rule PREPROCESSING_MT_FILTER_HG: <(samtools view -@ {THREADS} -u -f 4 -F 264 $TMP_FILE) \ <(samtools view -@ {THREADS} -u -f 8 -F 260 $TMP_FILE) \ <(samtools view -@ {THREADS} -u -f 12 -F 256 $TMP_FILE) | \ - samtools view -@ {THREADS} -bF 0x800 - | samtools sort -o -@ {THREADS} -m {MEMCORE}G -n - $BUFFER | bamToFastq -i stdin -fq {output[0]} -fq2 {output[1]} - + samtools view -@ {THREADS} -bF 0x800 - | samtools sort -o -@ {THREADS} -m {MEMCORE}G -n - $BUFFER \ + | bamToFastq -i stdin -fq {output[0]} -fq2 {output[1]} + if [[ -s {input[2]} ]] then bwa mem -v 1 -t {THREADS} {input[3]} {input[2]} | samtools view -@ {THREADS} -bS - | \ @@ -129,6 +137,6 @@ rule PREPROCESSING_MT_FILTER_HG: echo "{input[2]} is empty, skipping single end human sequence filtering, but creating it anyway..." touch {output[2]} fi - + rm -rf $BUFFER* $TMP_FILE """ diff --git a/rules/Preprocessing/master.rules b/rules/Preprocessing/master.rules index eb8a26ade9a76d1839fd9633b77e04c473f2c547..36ca9cdf046ba4aec0ef2d3e4a7b58513c9414e2 100644 --- a/rules/Preprocessing/master.rules +++ b/rules/Preprocessing/master.rules @@ -15,7 +15,14 @@ def preprocessing_output_files(): """ Dynamically generate output files names based on parameters """ - return [preprocessed_mt('R1'), preprocessed_mt('R2'), preprocessed_mt('SE')] + [preprocessed_mg('R1'), preprocessed_mg('R2'), preprocessed_mg('SE')] + return [ + preprocessed_mt('R1'), + preprocessed_mt('R2'), + preprocessed_mt('SE'), + preprocessed_mg('R1'), + preprocessed_mg('R2'), + preprocessed_mg('SE') + ] def preprocessed_mg(target, dir=P_OUT):