Skip to content
Snippets Groups Projects
Commit e67aa347 authored by Shaman Narayanasamy's avatar Shaman Narayanasamy
Browse files

Fast MG-only assembly module

parent 768ab92d
No related branches found
No related tags found
No related merge requests found
......@@ -120,3 +120,88 @@ rule ASSEMBLY_MG_MERGE_ASSEMBLY:
awk '/^>/{{print \">contig_\" ++i; next}}{{print}}' > $NAME.merged.fa
"""
################
# ## MGMT FAST ASSEMBLY USING MEGAHIT
################
rule ASSEMBLY_MG_FAST_1:
log:
A_LOG
benchmark:
"%s/benchmarks/ASSEMBLY_MT_ASSEMBLY_1.json" % A_OUT
input:
preprocessed_mg('R1'),
preprocessed_mg('R2'),
preprocessed_mg('SE'),
'{dir}/MT.assembly_1/final.contigs.fa'.format(dir=A_OUT)
output:
'{dir}/MGMT.assembly_1/final.contigs.fa'.format(dir=A_OUT)
params:
outdir = "{dir}/MGMT.assembly_1".format(dir=A_OUT)
shell:
"""
MAX_READ_LEN=$(cat {input} | sed -n '1~4s/^@/>/p;2~4p' |\
awk '$0 ~ \">\" {{c=0\"\t\"; }} $0 !~ \">\" {{c+=length($0); max=(max>c)?max:c;}} END {{print max}}')
echo "Max read length: $MAX_READ_LEN"
MEMBYTES=$(({MEMTOTAL}*1000*1000*1000))
echo "Available memory in bytes: $MEMBYTES"
megahit -o {params.outdir} --cpu-only -m $MEMBYTES --mem-flag 1\
-l $MAX_READ_LEN --input-cmd "cat {input}" -t {THREADS} --continue > {log} 2>&1
"""
rule ASSEMBLY_MGMT_EXTRACT_UNMAPPED_FROM_FAST_1:
log:
A_LOG
benchmark:
"%s/benchmarks/ASSEMBLY_MT_EXTRACT_UNMAPPED_FROM_ASSEMBLY_1.json" % A_OUT
input:
preprocessed_mg('R1'),
preprocessed_mg('R2'),
preprocessed_mg('SE'),
'{dir}/MGMT.assembly_1/final.contigs.fa'.format(dir=A_OUT),
expand('{dir}/MGMT.assembly_1/final.contigs.fa.{ext}', dir=A_OUT, ext=['amb', 'bwt', 'pac', 'sa', 'ann'])
output:
expand('{dir}/{name}', name=[
'MG.R1.unmapped.fq',
'MG.R2.unmapped.fq',
'MG.SE.unmapped.fq'], dir=A_OUT)
shell:
"""
TMP_FILE=$(mktemp --tmpdir={TMPDIR} -t "alignment_XXXXXX.bam")
BUFFER=$(mktemp --tmpdir={TMPDIR} -t "alignment_buffer_XXXXXX.bam")
bwa mem -v 1 -t {THREADS} {input[3]} {input[0]} {input[1]} | samtools view -@ {THREADS} -bS - > $TMP_FILE
samtools merge -@ {THREADS} -u - \
<(samtools view -@ {THREADS} -u -f 4 -F 264 $TMP_FILE) \
<(samtools view -@ {THREADS} -u -f 8 -F 260 $TMP_FILE) \
<(samtools view -@ {THREADS} -u -f 12 -F 256 $TMP_FILE) | \
samtools view -@ {THREADS} -bF 0x800 - | samtools sort -o -@ {THREADS} -m {MEMCORE}G -n - $BUFFER | \
bamToFastq -i stdin -fq {output[0]} -fq2 {output[1]}
bwa mem -v 1 -t {THREADS} {input[3]} {input[2]} | \
samtools view -@ {THREADS} -bS - | samtools view -@ {THREADS} -uf 4 - | \
bamToFastq -i stdin -fq {output[2]}
rm -rf $BUFFER* $TMP_FILE
"""
rule ASSEMBLY_MGMT_FAST_2:
log:
A_LOG
benchmark:
"%s/benchmarks/ASSEMBLY_MT_ASSEMBLY_2.json" % A_OUT
input:
expand('{dir}/{name}', name=[
'MG.R1.unmapped.fq',
'MG.R2.unmapped.fq',
'MG.SE.unmapped.fq'], dir=A_OUT)
output:
'{dir}/MGMT.assembly_2/final.contigs.fa'.format(dir=A_OUT)
params:
outdir = "{dir}/MGMT.assembly_2".format(dir=A_OUT)
shell:
"""
MAX_READ_LEN=$(cat {input} | sed -n '1~4s/^@/>/p;2~4p' | \
awk '$0 ~ \">\" {{c=0\"\t\"; }} $0 !~ \">\" {{c+=length($0); max=(max>c)?max:c;}} END {{print max}}')
MEMBYTES=$(({MEMTOTAL}*1000*1000*1000))
megahit -o {params.outdir} --cpu-only -m $MEMBYTES --mem-flag 1 -l $MAX_READ_LEN \
--input-cmd "cat {input}" -t {THREADS} --continue > {log} 2>&1
"""
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment