Skip to content
Snippets Groups Projects
Commit 1b257f12 authored by Valentina Galata's avatar Valentina Galata
Browse files

notes: updated mashmap script

parent 38268814
No related branches found
No related tags found
No related merge requests found
# conda activate mashmap
cores=20
cores=28
# estimate how many contigs from assembly_2 can be mapped to contigs in assembly_1
# splitting is allowed, report best hits for each query
# one query can have multiple hits
for pident in 70 80 90; do
for pident in 80 90; do
echo -e "pct\tmapped\ttotal\tquery\tref" > "mashmap.asm.cov.id${pident}.tsv"
find data_GDB/results/assembly/ -type f -name "ASSEMBLY.FILTERED.fasta" | sort | while read asm1; do
find data_GDB/results/assembly/ -type f -name "ASSEMBLY.FILTERED.fasta" | sort | while read asm2; do
......@@ -13,7 +13,7 @@ for pident in 70 80 90; do
mashmap -r ${asm1} -q ${asm2} -s 1000 --pi ${pident} -f map -t ${cores} -o mashmap.12.out &> mashmap.12.log
mapped=$(cut -d" " -f1 mashmap.12.out | sort | uniq | wc -l)
total=$(grep '^>' ${asm2} | wc -l)
mapped_pct=$(awk 'BEGIN{printf("%.2f\n",100*'${mapped}'/'${total}')}')
mapped_pct=$(awk -v m=${mapped} -v t=${total} 'BEGIN{printf("%.2f\n",100*m/t)}')
echo -e "${mapped_pct}\t${mapped}\t${total}\t$(basename $(dirname ${asm2}))\t$(basename $(dirname ${asm1}))"
fi
done
......@@ -22,7 +22,7 @@ done
# one-to-one mappings between two assemblies
# results are filtered to get best hit per query and reference, splitting is not allowed
for pident in 70 80 90; do
for pident in 80 90; do
echo -e "pct\tmapped\ttotal\tquery\tref" > "mashmap.asm.map.id${pident}.tsv"
find data_GDB/results/assembly/ -type f -name "ASSEMBLY.FILTERED.fasta" | sort | while read asm1; do
find data_GDB/results/assembly/ -type f -name "ASSEMBLY.FILTERED.fasta" | sort | while read asm2; do
......@@ -30,7 +30,7 @@ for pident in 70 80 90; do
mashmap -r ${asm1} -q ${asm2} -s 1000 --pi ${pident} -f one-to-one --noSplit -t ${cores} -o mashmap.12.out &> mashmap.12.log
mapped=$(cut -d" " -f1 mashmap.12.out | sort | uniq | wc -l)
total=$(grep '^>' ${asm2} | wc -l)
mapped_pct=$(awk 'BEGIN{printf("%.2f\n",100*'${mapped}'/'${total}')}')
mapped_pct=$(awk -v m=${mapped} -v t=${total} 'BEGIN{printf("%.2f\n",100*m/t)}')
echo -e "${mapped_pct}\t${mapped}\t${total}\t$(basename $(dirname ${asm2}))\t$(basename $(dirname ${asm1}))"
fi
done
......@@ -45,10 +45,10 @@ done
# find data_GDB/results/assembly/ -type f -name "ASSEMBLY.FILTERED.fasta" | sort | while read asm1; do
# mashmap -r ${asm1} -q ${fastq_lr} -s 1000 --pi ${pident} -t 10 -o mashmap.lr.out &> mashmap.lr.log
# r_mapped=$(cut -d" " -f1 mashmap.lr.out | sort | uniq | wc -l)
# r_mapped_pct=$(awk 'BEGIN{printf("%.2f\n",100*'${r_mapped}'/'${r_total}')}')
# r_mapped_pct=$(awk -v m=${r_mapped} -v t=${r_total} 'BEGIN{printf("%.2f\n",100*m/t)}')
# c_mapped=$(cut -d" " -f6 mashmap.lr.out | sort | uniq | wc -l)
# c_total=$(grep '^>' ${asm1} | wc -l)
# c_mapped_pct=$(awk 'BEGIN{printf("%.2f\n",100*'${c_mapped}'/'${c_total}')}')
# c_mapped_pct=$(awk -v m=${c_mapped} -v t=${c_total} 'BEGIN{printf("%.2f\n",100*m/t)}')
# echo -e "${r_mapped_pct}\t${r_mapped}\t${r_total}\t${c_mapped_pct}\t${c_mapped}\t${c_total}\t$(basename $(dirname ${asm1}))"
# done >> "mashmap.lr.id${pident}.tsv"
# done
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment