Skip to content
Snippets Groups Projects
Commit 14b56d1b authored by Shaman Narayanasamy's avatar Shaman Narayanasamy
Browse files

Added modules for fast assembly using MEGAHIT

parent f40a9590
No related branches found
No related tags found
No related merge requests found
......@@ -294,3 +294,90 @@ rule ASSEMBLY_MT_MAPPING:
# index
samtools index $PREFIX.sorted.bam
"""
################
# ## MGMT FAST ASSEMBLY USING MEGAHIT
################
rule ASSEMBLY_MG_FAST_1:
log:
A_LOG
benchmark:
"%s/benchmarks/ASSEMBLY_MT_ASSEMBLY_1.json" % A_OUT
input:
preprocessed_mg('R1'),
preprocessed_mg('R2'),
preprocessed_mg('SE'),
'{dir}/MT.assembly_1/final.contigs.fa'.format(dir=A_OUT)
output:
'{dir}/MGMT.assembly_1/final.contigs.fa'.format(dir=A_OUT)
params:
outdir = "{dir}/MGMT.assembly_1".format(dir=A_OUT)
shell:
"""
MAX_READ_LEN=$(cat {input} | sed -n '1~4s/^@/>/p;2~4p' |\
awk '$0 ~ \">\" {{c=0\"\t\"; }} $0 !~ \">\" {{c+=length($0); max=(max>c)?max:c;}} END {{print max}}')
echo "Max read length: $MAX_READ_LEN"
MEMBYTES=$(({MEMTOTAL}*1000*1000*1000))
echo "Available memory in bytes: $MEMBYTES"
megahit -o {params.outdir} --cpu-only -m $MEMBYTES --mem-flag 1\
-l $MAX_READ_LEN --input-cmd "cat {input}" -t {THREADS} --continue > {log} 2>&1
"""
rule ASSEMBLY_MGMT_EXTRACT_UNMAPPED_FROM_FAST_1:
log:
A_LOG
benchmark:
"%s/benchmarks/ASSEMBLY_MT_EXTRACT_UNMAPPED_FROM_ASSEMBLY_1.json" % A_OUT
input:
preprocessed_mg('R1'),
preprocessed_mg('R2'),
preprocessed_mg('SE'),
'{dir}/MGMT.assembly_1/final.contigs.fa'.format(dir=A_OUT),
expand('{dir}/MGMT.assembly_1/final.contigs.fa.{ext}', dir=A_OUT, ext=['amb', 'bwt', 'pac', 'sa', 'ann'])
output:
expand('{dir}/{name}', name=[
'MG.R1.unmapped.fq',
'MG.R2.unmapped.fq',
'MG.SE.unmapped.fq'], dir=A_OUT)
shell:
"""
TMP_FILE=$(mktemp --tmpdir={TMPDIR} -t "alignment_XXXXXX.bam")
BUFFER=$(mktemp --tmpdir={TMPDIR} -t "alignment_buffer_XXXXXX.bam")
bwa mem -v 1 -t {THREADS} {input[3]} {input[0]} {input[1]} | samtools view -@ {THREADS} -bS - > $TMP_FILE
samtools merge -@ {THREADS} -u - \
<(samtools view -@ {THREADS} -u -f 4 -F 264 $TMP_FILE) \
<(samtools view -@ {THREADS} -u -f 8 -F 260 $TMP_FILE) \
<(samtools view -@ {THREADS} -u -f 12 -F 256 $TMP_FILE) | \
samtools view -@ {THREADS} -bF 0x800 - | samtools sort -o -@ {THREADS} -m {MEMCORE}G -n - $BUFFER | \
bamToFastq -i stdin -fq {output[0]} -fq2 {output[1]}
bwa mem -v 1 -t {THREADS} {input[3]} {input[2]} | \
samtools view -@ {THREADS} -bS - | samtools view -@ {THREADS} -uf 4 - | \
bamToFastq -i stdin -fq {output[2]}
rm -rf $BUFFER* $TMP_FILE
"""
rule ASSEMBLY_MGMT_FAST_2:
log:
A_LOG
benchmark:
"%s/benchmarks/ASSEMBLY_MT_ASSEMBLY_2.json" % A_OUT
input:
expand('{dir}/{name}', name=[
'MG.R1.unmapped.fq',
'MG.R2.unmapped.fq',
'MG.SE.unmapped.fq'], dir=A_OUT)
output:
'{dir}/MGMT.assembly_2/final.contigs.fa'.format(dir=A_OUT)
params:
outdir = "{dir}/MGMT.assembly_2".format(dir=A_OUT)
shell:
"""
MAX_READ_LEN=$(cat {input} | sed -n '1~4s/^@/>/p;2~4p' | \
awk '$0 ~ \">\" {{c=0\"\t\"; }} $0 !~ \">\" {{c+=length($0); max=(max>c)?max:c;}} END {{print max}}')
MEMBYTES=$(({MEMTOTAL}*1000*1000*1000))
megahit -o {params.outdir} --cpu-only -m $MEMBYTES --mem-flag 1 -l $MAX_READ_LEN \
--input-cmd "cat {input}" -t {THREADS} --continue > {log} 2>&1
"""
......@@ -34,3 +34,9 @@ rule ASSEMBLY_MG_ALL:
shell:
"echo 'MG Assembly Done'"
#rule ASSEMBLY_FAST:
# input:
# "%s/MGMT.assembly.merged.fa" % A_OUT
# shell:
# "echo 'MGMT fast assembly Done'"
#
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment