Skip to content
Snippets Groups Projects
Commit cc4965ba authored by Valentina Galata's avatar Valentina Galata
Browse files

fast5/preproc: mv basecalling to fast5 workflow (issue #103)

parent cb1f8e9b
No related branches found
No related tags found
No related merge requests found
......@@ -27,6 +27,7 @@ workdir:
SINGLE_FAST5_DIR = os.path.abspath(config["single_fast5_dir"])
MULTI_FAST5_DIR = os.path.abspath(config["multi_fast5_dir"])
BASECALLING_DIR = os.path.abspath(config["basecalling_dir"])
##################################################
# RULES
......@@ -34,8 +35,11 @@ MULTI_FAST5_DIR = os.path.abspath(config["multi_fast5_dir"])
rule all:
input:
MULTI_FAST5_DIR
mfast5=MULTI_FAST5_DIR,
fastq=os.path.join(BASECALLING_DIR, "lr.fastq.gz")
# Single-FAST5 to Multi-FAST5
rule create_multifast5s:
input:
SINGLE_FAST5_DIR
......@@ -50,8 +54,56 @@ rule create_multifast5s:
batch_size=config["ont_fast5_api"]["single_to_multi_fast5"]["batch_size"]
conda:
os.path.join(ENV_DIR, "ont_fast5_api.yaml")
message:
"FAST5: single-FAST5 to multi-FAST5"
shell:
"(date && "
"single_to_multi_fast5 --input_path {input} --save_path {output} --filename_base multifast5 --batch_size {params.batch_size} --recursive -t {threads} && "
"touch {output} && "
"date) 2> {log.err} > {log.out}"
# Basecalling
checkpoint guppy_gpu_basecalling:
input:
MULTI_FAST5_DIR
output:
directory(os.path.join(BASECALLING_DIR, "checkpoints"))
log:
out="logs/guppy.metag.lr.out.log",
err="logs/guppy.metag.lr.err.log"
threads:
config["guppy"]["gpu"]["threads"]
message:
"FATS5: basecalling w/ Guppy"
shell:
"""
(date && \
{config[guppy][gpu][bin]} --input_path {input} --save_path {output} \
--config {config[guppy][config]} \
--disable_pings --compress_fastq \
--cpu_threads_per_caller {threads} \
-x {config[guppy][gpu][gpu_device]} \
--records_per_fastq {config[guppy][gpu][records_per_fastq]} \
--chunk_size {config[guppy][gpu][chunk_size]} \
--chunks_per_runner {config[guppy][gpu][chunks_per_runner]} \
--gpu_runners_per_device {config[guppy][gpu][runners_per_device]} \
--num_callers {config[guppy][gpu][num_callers]} && \
date) 2>> {log.err} >> {log.out}
"""
def aggregate_guppy_basecalling(wildcards):
checkpoint_output = checkpoints.guppy_gpu_basecalling.get(**wildcards).output[0]
return expand(
os.path.join(BASECALLING_DIR, "checkpoints/fastq_runid_{runid_i_j}.fastq.gz"),
runid_i_j=glob_wildcards(os.path.join(checkpoint_output, "fastq_runid_{runid_i_j}.fastq.gz")).runid_i_j,
)
rule merge_guppy_basecalling:
input:
aggregate_guppy_basecalling
output:
os.path.join(BASECALLING_DIR, "lr.fastq.gz")
message:
"FAST5: Concat FASTQs after basecalling"
shell:
"cat $(echo \"{input}\" | sort) > {output}"
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment