Skip to content
Snippets Groups Projects
Commit e2acad95 authored by Valentina Galata's avatar Valentina Galata
Browse files

assembly/mapping step: some fixes; added operams (issue #40)

parent aaacd0d1
No related branches found
No related tags found
1 merge request!76Merge "cleanup" branch with "master" branch
......@@ -86,8 +86,8 @@ fastqc:
assemblers:
sr: ["megahit", "metaspades"]
lr: ["flye"]
hy: ["metaspadeshybrid"]
# hy: ["metaspadeshybrid", "operams"]
# hy: ["metaspadeshybrid"]
hy: ["metaspadeshybrid", "operams"]
flye:
threads: 10
......
channels:
- conda-forge
- bioconda
- r
- etetoolkit
- au-eoed
- compbiocore
- defaults
dependencies:
- _libgcc_mutex=0.1
- _openmp_mutex=4.5
- _r-mutex=1.0.1
- binutils_impl_linux-64=2.34
- binutils_linux-64=2.34
- bwidget=1.9.14
- bzip2=1.0.8
- ca-certificates=2020.6.20
- cairo=1.16.0
- certifi=2020.6.20
- curl=7.68.0
- fontconfig=2.13.1
- freetype=2.10.2
- fribidi=1.0.9
- gcc_impl_linux-64=7.5.0
- gcc_linux-64=7.5.0
- gettext=0.19.8.1
- gfortran_impl_linux-64=7.5.0
- gfortran_linux-64=7.5.0
- glib=2.65.0
- graphite2=1.3.13
- gsl=2.5
- gxx_impl_linux-64=7.5.0
- gxx_linux-64=7.5.0
- harfbuzz=2.4.0
- icu=64.2
- jpeg=9d
- krb5=1.16.4
- ld_impl_linux-64=2.34
- libblas=3.8.0
- libcblas=3.8.0
- libcurl=7.68.0
- libedit=3.1.20191231
- libffi=3.2.1
- libgcc-ng=9.2.0
- libgfortran-ng=7.5.0
- libgomp=9.2.0
- libiconv=1.15
- libopenblas=0.3.7
- libpng=1.6.37
- libssh2=1.9.0
- libstdcxx-ng=9.2.0
- libtiff=4.1.0
- libuuid=2.32.1
- libwebp-base=1.1.0
- libxcb=1.13
- libxml2=2.9.10
- lz4-c=1.9.2
- make=4.3
- ncurses=6.1
- openssl=1.1.1g
- pango=1.42.4
- pcre=8.44
- perl=5.26.2
- perl-app-cpanminus=1.7044
- perl-carp=1.38
- perl-exporter=5.72
- perl-extutils-makemaker=7.36
- perl-file-spec=3.48_01
- perl-file-which=1.23
- perl-getopt-long=2.50
- perl-if=0.0608
- perl-io-tty=1.12
- perl-ipc-run=20180523.0
- perl-number-format=1.75
- perl-regexp-common=2017060201
- perl-scalar-list-utils=1.52
- perl-statistics-basic=1.6611
- perl-statistics-r=0.34
- perl-switch=2.17
- perl-text-balanced=2.03
- perl-text-wrap=2013.0523
- pip=20.1.1
- pixman=0.38.0
- pthread-stubs=0.4
- python=3.8.3
- python_abi=3.8
- r-base=3.5.1
- readline=8.0
- setuptools=47.3.1
- sqlite=3.30.1
- tk=8.6.10
- tktable=2.10
- wheel=0.34.2
- xorg-kbproto=1.0.7
- xorg-libice=1.0.10
- xorg-libsm=1.2.3
- xorg-libx11=1.6.9
- xorg-libxau=1.0.9
- xorg-libxdmcp=1.1.3
- xorg-libxext=1.3.4
- xorg-libxrender=0.9.10
- xorg-renderproto=0.11.1
- xorg-xextproto=7.3.0
- xorg-xproto=7.0.31
- xz=5.2.5
- zlib=1.2.11
- zstd=1.4.4
......@@ -10,16 +10,15 @@ rule assembly_lr_flye:
threads:
config["flye"]["threads"]
log:
out="logs/assembly_lr.flye.out.log",
err="logs/assembly_lr.flye.err.log"
out="logs/flye.out.log",
err="logs/flye.err.log"
conda:
os.path.join(ENV_DIR, "flye_v2_7.yaml")
message:
"Assembly: long reads: Flye"
shell:
"(date && flye --nano-raw {input} --meta --out-dir $(dirname {output}) --genome-size {config[flye][genome_size]} "
"--threads {threads} && date) 2> {log.err} > {log.out}"
# "cd $(dirname {output}) && ln -sf assembly.fasta $(basename {output})"
"(date && flye --nano-raw {input} --meta --out-dir $(dirname {output}) --genome-size {config[flye][genome_size]} --threads {threads} && "
"date) 2> {log.err} > {log.out}"
rule polishing_lr_racon:
input:
......@@ -32,8 +31,8 @@ rule polishing_lr_racon:
sam=temp(os.path.join(RESULTS_DIR, "assembly/lr/{tool}/polishing/racon/input.sam")),
asm=temp(os.path.join(RESULTS_DIR, "assembly/lr/{tool}/polishing/racon/input.fasta"))
log:
out="logs/polishing_lr.{tool}.racon.out.log",
err="logs/polishing_lr.{tool}.racon.err.log"
out="logs/racon.{tool}.out.log",
err="logs/racon.{tool}.err.log"
wildcard_constraints:
tool="|".join(config["assemblers"]["lr"])
threads:
......@@ -54,8 +53,8 @@ rule polishing_lr_medaka:
output:
os.path.join(RESULTS_DIR, "assembly/lr/{tool}/polishing/racon_medaka/consensus.fasta")
log:
out="logs/polishing_lr.{tool}.racon.medaka.out.log",
err="logs/polishing_lr.{tool}.racon.medaka.err.log"
out="logs/racon.medaka.{tool}.out.log",
err="logs/racon.medaka.{tool}.err.log"
wildcard_constraints:
tool="|".join(config["assemblers"]["lr"])
threads:
......@@ -65,8 +64,8 @@ rule polishing_lr_medaka:
shell:
"(date && "
"medaka_consensus -i {input.lr} -d {input.asm} -o $(dirname {output}) -t {threads} -m {config[medaka][model]} && "
"date) 2> {log.err} > {log.out} && "
"cd $(dirname {output}) && ln -sf consensus.fasta $(basename {output})"
"cd $(dirname {output}) && ln -sf consensus.fasta $(basename {output}) && "
"date) 2> {log.err} > {log.out}"
localrules: assembly_lr_flye_link
......@@ -76,7 +75,7 @@ rule assembly_lr_flye_link:
output:
os.path.join(RESULTS_DIR, "assembly/lr/{tool}/ASSEMBLY.fasta")
shell:
"cd $(dirname {output}) && ln -sf $(realpath --relative-to=$(basename {output}) {input}) $(basename {output})"
"ipath=$(realpath {input}) && cd $(dirname {output}) && ln -sf $(realpath --relative-to=\".\" \"${{ipath}}\") $(basename {output})"
# Short reads
rule assembly_sr_megahit:
......@@ -86,8 +85,8 @@ rule assembly_sr_megahit:
output:
os.path.join(RESULTS_DIR, "assembly/sr/megahit/ASSEMBLY.fasta")
log:
out="logs/assembly_sr.megahit.out.log",
err="logs/assembly_sr.megahit.err.log"
out="logs/megahit.out.log",
err="logs/megahit.err.log"
threads:
config["megahit"]["threads"]
conda:
......@@ -95,11 +94,12 @@ rule assembly_sr_megahit:
message:
"Assembly: short reads: MEGAHIT"
shell:
"(date && megahit -1 {input.r1} -2 {input.r2} -t {threads} -o $(dirname {output})/tmp && date) 2> {log.err} > {log.out} && "
"(date && megahit -1 {input.r1} -2 {input.r2} -t {threads} -o $(dirname {output})/tmp && "
"cd $(dirname {output}) && "
"rsync -avP tmp/ . && "
"ln -sf final.contigs.fa $(basename {output}) && "
"rm -rf tmp/"
"rm -rf tmp/ && "
"date) 2> {log.err} > {log.out}"
rule assembly_sr_metaspades:
input:
......@@ -108,8 +108,8 @@ rule assembly_sr_metaspades:
output:
os.path.join(RESULTS_DIR, "assembly/sr/metaspades/ASSEMBLY.fasta")
log:
out="logs/assembly_sr.metaspades.out.log",
err="logs/assembly_sr.metaspades.err.log"
out="logs/metaspades.out.log",
err="logs/metaspades.err.log"
threads:
config["metaspades"]["threads"]
conda:
......@@ -117,8 +117,10 @@ rule assembly_sr_metaspades:
message:
"Assembly: short reads: MetaSPAdes"
shell:
"(date && metaspades.py -k 21,33,55,77 -t {threads} -1 {input.r1} -2 {input.r2} -o $(dirname {output}) && date) 2> {log.err} > {log.out} && "
"cd $(dirname {output}) && ln -sf contigs.fasta $(basename {output})"
"(date && "
"metaspades.py -k 21,33,55,77 -t {threads} -1 {input.r1} -2 {input.r2} -o $(dirname {output}) && "
"cd $(dirname {output}) && ln -sf contigs.fasta $(basename {output}) && "
"date) 2> {log.err} > {log.out}"
# Hybrid
rule assemble_hy_metaspades:
......@@ -129,8 +131,8 @@ rule assemble_hy_metaspades:
output:
os.path.join(RESULTS_DIR, "assembly/hy/metaspadeshybrid/ASSEMBLY.fasta")
log:
out="logs/assembly_hy.metaspades.out.log",
err="logs/assembly_hy.metaspades.err.log"
out="logs/metaspadeshybrid.out.log",
err="logs/metaspadeshybrid.err.log"
threads:
config["metaspades"]["threads"]
conda:
......@@ -138,5 +140,33 @@ rule assemble_hy_metaspades:
message:
"Assembly: hybrid: MetaSPAdes"
shell:
"(date && spades.py --meta -k 21,33,55,77 -t {threads} -1 {input.r1} -2 {input.r2} --nanopore {input.lr} -o $(dirname {output}) && date) 2> {log.err} > {log.out} && "
"cd $(dirname {output}) && ln -sf contigs.fasta $(basename {output})"
"(date && "
"spades.py --meta -k 21,33,55,77 -t {threads} -1 {input.r1} -2 {input.r2} --nanopore {input.lr} -o $(dirname {output}) && "
"cd $(dirname {output}) && ln -sf contigs.fasta $(basename {output}) && "
"date) 2> {log.err} > {log.out}"
rule assemble_hy_operams:
input:
lr=os.path.join(RESULTS_DIR, "preproc/metag/lr/lr.fastq.gz"),
r1=os.path.join(RESULTS_DIR, "preproc/metag/sr/R1.fastp.fastq.gz"),
r2=os.path.join(RESULTS_DIR, "preproc/metag/sr/R2.fastp.fastq.gz"),
asm=os.path.join(RESULTS_DIR, "assembly/sr/megahit/ASSEMBLY.fasta")
output:
asm=os.path.join(RESULTS_DIR, "assembly/hy/operams/ASSEMBLY.fasta"),
lr=temp(os.path.join(RESULTS_DIR, "assembly/hy/operams/lr.fastq"))
log:
out="logs/operams.out.log",
err="logs/operams.err.log"
threads:
config["operams"]["threads"]
conda:
os.path.join(ENV_DIR, "operams.yaml")
message:
"Assembly: hybrid: OPERA-MS"
shell:
"(date && "
"zcat {input.lr} > {output.lr} && "
"{config[operams][bin]} --short-read1 {input.r1} --short-read2 {input.r2} --long-read {output.lr} --contig-file {input.asm} "
"--out-dir $(dirname {output.asm}) --long-read-mapper minimap2 --num-processors {threads} && "
"cd $(dirname {output.asm}) && ln -sf contigs.fasta $(basename {output.asm}) && "
"date) 2> {log.err} > {log.out}"
\ No newline at end of file
......@@ -186,4 +186,4 @@ rule assembly_genomecov_average:
script=os.path.join(SRC_DIR, "coverage.awk")
threads: 1
shell:
"(date && cat {input} | awk -f {params.script} | tail -n+2 > {output} && date) 2>> {log.err} >> {log.out}"
"(date && cat {input} | awk -f {params.script} | tail -n+2 > {output} && date) 2> {log.err} > {log.out}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment