From 915b2d7527d34913903bf315c2918471212174bd Mon Sep 17 00:00:00 2001
From: "Susheel Bhanu Busi susheel.busi@uni.lu" <susheel.busi@uni.lu>
Date: Wed, 27 May 2020 07:23:54 +0200
Subject: [PATCH] collected miscellaneous files into one folder

---
 .../basecalling_snakefile                     | 269 +++++++++++++++++
 .../checkpoint_SNAKEFILE                      |  57 ++++
 test_troubleshooting_snakefiles/map_snakefile | 165 +++++++++++
 .../no_barcode_snakefile                      | 219 ++++++++++++++
 .../test_snakefile                            | 280 ++++++++++++++++++
 5 files changed, 990 insertions(+)
 create mode 100755 test_troubleshooting_snakefiles/basecalling_snakefile
 create mode 100755 test_troubleshooting_snakefiles/checkpoint_SNAKEFILE
 create mode 100644 test_troubleshooting_snakefiles/map_snakefile
 create mode 100755 test_troubleshooting_snakefiles/no_barcode_snakefile
 create mode 100755 test_troubleshooting_snakefiles/test_snakefile

diff --git a/test_troubleshooting_snakefiles/basecalling_snakefile b/test_troubleshooting_snakefiles/basecalling_snakefile
new file mode 100755
index 0000000..57e5ecf
--- /dev/null
+++ b/test_troubleshooting_snakefiles/basecalling_snakefile
@@ -0,0 +1,269 @@
+# For running only the BASECALLING part of the workflow for ONT data
+
+import os
+from tempfile import TemporaryDirectory
+
+configfile: "config/CONFIG.yaml"
+DATA_DIR = config["data_dir"]
+RESULTS_DIR = config["results_dir"]
+DB_DIR=config["db_dir"]
+RUNS=[config["runs"]["first"],
+    config["runs"]["second"]]
+#    config["runs"]["third"]]
+BARCODES=config["barcodes"]
+SAMPLES=["ONT3_MG_xx_Rashi_S11"]
+sr_sample=["ONT3_MG_xx_Rashi_S11"]
+# SAMPLES_ALL=config["samples"]
+REFERENCES=["igc", "hg38"]
+IGC_URI=config["igc"]["uri"]
+HG38_URI=config["hg38"]["uri"]
+ASSEMBLERS=config["assemblers"]
+MAPPERS=["bwa_mem", "minimap2"]
+
+# Rule all for running the assembly and annotation
+rule all:
+    input:
+        "basecall_merge_qc.done",
+        "basecall_merge_qc_NO_MOD.done",
+        "dummy_folders_created.done"
+
+######
+# WRAPPER RULES
+######
+# Added the `shell:` so as to avoid it from being autodetected as `localrule`. This is needed so that an email can be sent upon event changes for this rule.
+rule BASECALL_MERGE_QC:
+    input:
+        expand(os.path.join(RESULTS_DIR, "basecalled/merged/{barcode}.fastq.gz"), barcode=BARCODES),
+        expand(os.path.join(RESULTS_DIR, "qc/lr/merged/{barcode}/{barcode}NanoStats.txt"), barcode=BARCODES),
+        expand(os.path.join(RESULTS_DIR, "qc/lr/{run}/{barcode}/{barcode}NanoStats.txt"), run=RUNS, barcode=BARCODES)
+    output: "basecall_merge_qc.done"
+    shell:
+        """
+        touch {output}
+        """
+
+rule BASECALL_MERGE_QC_NO_MOD:
+    input:
+        expand(os.path.join(RESULTS_DIR, "basecalled_NO_MOD/merged/{barcode}.fastq.gz"), barcode=BARCODES),
+        expand(os.path.join(RESULTS_DIR, "qc/lr/merged/{barcode}/{barcode}NanoStats_NO_MOD.txt"), barcode=BARCODES),
+        expand(os.path.join(RESULTS_DIR, "qc/lr/{run}/{barcode}/{barcode}NanoStats_NO_MOD.txt"), run=RUNS, barcode=BARCODES)
+    output: "basecall_merge_qc_NO_MOD.done"
+    shell:
+        """
+        touch {output}
+        """
+
+rule DUMMY_FOLDER:
+    input:
+#         expand(os.path.join(RESULTS_DIR, "basecalled/{run}/dummy_folder.done"), run=RUNS),
+         expand(os.path.join(RESULTS_DIR, "basecalled{type}/{run}/dummy_folder.done"), run=RUNS, type=["", "_NO_MOD"])
+    output: "dummy_folders_created.done"
+    shell:
+        """
+        touch {output}
+        """      
+
+######
+# RULES
+######
+# Basecalling the multi-FAST5 files.
+# Currently requires dummy target, but would be nice to have this be a "real" file target. However, it seems that files are filled incrementally, hence there is no check if the whole process was successful or not if checking only for file presence.
+rule guppy_gpu_basecall:
+    input: os.path.join(DATA_DIR, "multifast5/{run}")
+    output: os.path.join(RESULTS_DIR, "basecalled/{run}/basecalling.done")
+    log: os.path.join(RESULTS_DIR, "basecalled/{run}/basecalling")
+    threads: config["guppy_gpu"]["cpu_threads"]
+    shell:
+        """
+        (date &&\
+        {config[guppy_gpu][bin]} --input_path {input} --save_path $(dirname {output}) --config {config[guppy_gpu][config]} --cpu_threads_per_caller {threads} -x {config[guppy_gpu][gpu_device]} --disable_pings --compress_fastq --records_per_fastq {config[guppy_gpu][records_per_fastq]} --chunk_size {config[guppy_gpu][chunk_size]} --chunks_per_runner {config[guppy_gpu][chunks_per_runner]} --gpu_runners_per_device {config[guppy_gpu][runners_per_device]} --num_callers {config[guppy_gpu][num_callers]} && \
+        touch {output} &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+
+rule guppy_gpu_basecall_NO_MOD:
+    input: os.path.join(DATA_DIR, "multifast5/{run}")
+    output: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/NO_MOD_basecalling.done")
+    log: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/NO_MOD_basecalling")
+    threads: config["guppy_gpu"]["cpu_threads"]
+    shell:
+        """
+        (date &&\
+        {config[guppy_gpu][bin]} --input_path {input} --save_path $(dirname {output}) --config {config[guppy_gpu][hac_config]} --cpu_threads_per_caller {threads} -x {config[guppy_gpu][gpu_device]} --disable_pings --compress_fastq --records_per_fastq {config[guppy_gpu][records_per_fastq]} --chunk_size {config[guppy_gpu][chunk_size]} --chunks_per_runner {config[guppy_gpu][chunks_per_runner]} --gpu_runners_per_device {config[guppy_gpu][runners_per_device]} --num_callers {config[guppy_gpu][num_callers]} && \
+        touch {output} &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+
+# Demultiplex.
+# Currently requires dummy target, but would be nice to have this be a "real" file target. However, it seems that files are filled incrementally, hence there is no check if the whole process was successful or not if checking only for file presence.
+rule guppy_gpu_demux:
+    input: rules.guppy_gpu_basecall.output
+    output: os.path.join(RESULTS_DIR, "basecalled/{run}/demux.done")
+    log: os.path.join(RESULTS_DIR, "basecalled/{run}/demux")
+    threads: config["guppy_barcoder"]["threads"]
+    shell:
+        """
+        (date &&\
+        {config[guppy_barcoder][bin]} --input_path $(dirname {input}) --save_path $(dirname {output}) -t {threads} --compress_fastq --records_per_fastq {config[guppy_gpu][records_per_fastq]} --trim_barcodes && \
+        touch {output} &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+rule guppy_gpu_demux_NO_MOD:
+    input: rules.guppy_gpu_basecall_NO_MOD.output
+    output: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/demux_NO_MOD.done")
+    log: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/demux")
+    threads: config["guppy_barcoder"]["threads"]
+    shell:
+        """
+        (date &&\
+        {config[guppy_barcoder][bin]} --input_path $(dirname {input}) --save_path $(dirname {output}) -t {threads} --compress_fastq --records_per_fastq {config[guppy_gpu][records_per_fastq]} --trim_barcodes && \
+        touch {output} &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+
+# Modifications (IMPORTANT)
+# Since 2019_GDB sample was "NOT" barcoded the demux folders are obsolete. 
+# The below section gathers all the fastq.gz files from basecalling into a "dummy" folder
+# The "dummy" folder will be used downstream similar to the "barcode" folder used in the 2018 analyses
+# The barcode in the CONFIG file will be given the name of this dummy folder
+rule create_dummy_folder_basecalling:
+    input: os.path.join(RESULTS_DIR, "basecalled/{run}/basecalling.done")
+    output:
+         out2=os.path.join(RESULTS_DIR, "basecalled/{run}/dummy_folder.done")
+    log: os.path.join(RESULTS_DIR, "basecalled/{run}/no_barcode")
+    shell:
+        """
+        (date &&\
+        mkdir $(dirname {input})/no_barcode  &&\
+        cp -vrf $(dirname {input})/*.gz $(dirname {input})/no_barcode/.  &&\
+        touch {output.out2}  &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+#        for file in $(dirname {input})/*.gz; do ln -s "$file" $(dirname {input})/no_barcode/`basename "$file"`; done  &&\
+
+rule create_dummy_folder_NO_MOD_basecalling:
+    input: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/NO_MOD_basecalling.done")
+    output:
+         out2=os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/dummy_folder.done")
+    log: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/no_barcode")
+    shell:
+        """
+        (date &&\
+        mkdir $(dirname {input})/no_barcode  &&\
+        cp -vrf $(dirname {input})/*.gz $(dirname {input})/no_barcode/.  &&\
+        touch {output.out2}  &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+#        for file in $(dirname {input})/*.gz; do ln -s "$file" $(dirname {input})/no_barcode/`basename "$file"`; done  &&\
+#
+# Merge multiple basecalled files, i.e., FASTQ files per barcode and run. Each multi-FAST5 file will produce a respective FASTQ file.
+# TODO: Simplify output path to `basecalled/{run}/{barcode}.fastq.gz`. This will require to change `$(dirname {output})` to `$(dirname {output})/{wildcards.barcode}`
+rule merge_individual_fastq_per_barcode:
+#    input: rules.create_dummy_folder_basecalling.output.out1
+    input: os.path.join(RESULTS_DIR, "basecalled/{run}/no_barcode")
+    output: os.path.join(RESULTS_DIR, "basecalled/{run}/{barcode}/{barcode}.fastq.gz")
+    shell:
+        """
+        date
+        cat $(find $(dirname {output}) -name "*.fastq.gz" | sort) > {output}
+        touch {output}
+        date
+        """
+
+rule merge_individual_fastq_per_barcode_NO_MOD:
+#    input: rules.create_dummy_folder_NO_MOD_basecalling.output.out1
+    input: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/no_barcode")
+    output: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/{barcode}/{barcode}.fastq.gz")
+    shell:
+        """
+        date
+        cat $(find $(dirname {output}) -name "*.fastq.gz" | sort) > {output}
+        touch {output}
+        date
+        """
+
+# Helper function to get the input in order to create a *single* fastq file for all runs with the *same* barcode
+# TODO: Adjust according to TODO-merge_individual_fastq_per_barcode
+def get_all_fastq_for_barcode(wildcards):
+    return expand(os.path.join(RESULTS_DIR, "basecalled/{run}/{barcode}/{barcode}.fastq.gz"), run=RUNS, barcode=wildcards.barcode)
+
+def get_all_fastq_for_barcode_NO_MOD(wildcards):
+    return expand(os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/{barcode}/{barcode}.fastq.gz"), run=RUNS, barcode=wildcards.barcode)
+
+# Merge basecalled files (FASTQ) per barcode, i.e., combine reads from multiple runs according to their barcode.
+rule get_single_fastq_per_barcode:
+    input: get_all_fastq_for_barcode
+    output: os.path.join(RESULTS_DIR, "basecalled/merged/{barcode}.fastq.gz")
+    shell:
+        """
+        date
+        cat {input} > {output}
+        date
+        """
+rule get_single_fastq_per_barcode_NO_MOD:
+    input: get_all_fastq_for_barcode_NO_MOD
+    output: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/merged/{barcode}.fastq.gz")
+    shell:
+        """
+        date
+        cat {input} > {output}
+        date
+        """
+
+# Get basic statistics of the ONT data; over all runs
+# TODO: Simplify top of path: replace `lr` by `nanostat`
+# TODO: Write more generic: have patterned `input: os.path.join(RESULTS_DIR, "basecalled/{prefix}/{barcode}.fastq.gz")` and patterned `output: os.path.join(RESULTS_DIR, "qc/nanostat/{prefix}/{barcode}/{barcode}NanoStats.txt")
+#
+rule nanostat:
+    input: rules.get_single_fastq_per_barcode.output
+    output: os.path.join(RESULTS_DIR, "qc/lr/merged/{barcode}/{barcode}NanoStats.txt")
+    log: os.path.join(RESULTS_DIR, "qc/lr/merged/{barcode}/{barcode}NanoStats")
+    conda: "../envs/nanostat.yaml"
+    shell:
+        """
+        (date &&\
+        NanoStat --fastq {input} --outdir $(dirname {output}) --prefix {wildcards.barcode} -n $(basename -s "" {output}) &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+
+rule nanostat_NO_MOD:
+    input: rules.get_single_fastq_per_barcode_NO_MOD.output
+    output: os.path.join(RESULTS_DIR, "qc/lr/merged/{barcode}/{barcode}NanoStats_NO_MOD.txt")
+    log: os.path.join(RESULTS_DIR, "qc/lr/merged/{barcode}/{barcode}NanoStats_NO_MOD")
+    conda: "../envs/nanostat.yaml"
+    shell:
+        """
+        (date &&\
+        NanoStat --fastq {input} --outdir $(dirname {output}) --prefix {wildcards.barcode} -n $(basename -s "" {output}) &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+
+# TODO: Deprecated if TODO-nanostat is completed
+# Get basic statistics of the ONT data; per run
+rule nanostat_per_run:
+    input: os.path.join(RESULTS_DIR, "basecalled/{run}/{barcode}/{barcode}.fastq.gz")
+    output: os.path.join(RESULTS_DIR, "qc/lr/{run}/{barcode}/{barcode}NanoStats.txt")
+    wildcard_constraints:
+        run='S.*'
+    conda: "../envs/nanostat.yaml"
+    shell:
+        """
+        date
+        NanoStat --fastq {input} --outdir $(dirname {output}) --prefix {wildcards.barcode} -n $(basename -s "" {output})
+        date
+        """
+
+rule nanostat_per_run_NO_MOD:
+    input: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/{barcode}/{barcode}.fastq.gz")
+    output: os.path.join(RESULTS_DIR, "qc/lr/{run}/{barcode}/{barcode}NanoStats_NO_MOD.txt")
+    wildcard_constraints:
+        run='S.*'
+    conda: "../envs/nanostat.yaml"
+    shell:
+        """
+        date
+        NanoStat --fastq {input} --outdir $(dirname {output}) --prefix {wildcards.barcode} -n $(basename -s "" {output})
+        date
+        """
+
+
diff --git a/test_troubleshooting_snakefiles/checkpoint_SNAKEFILE b/test_troubleshooting_snakefiles/checkpoint_SNAKEFILE
new file mode 100755
index 0000000..498e49c
--- /dev/null
+++ b/test_troubleshooting_snakefiles/checkpoint_SNAKEFILE
@@ -0,0 +1,57 @@
+# File for running ONT analyses
+
+# default configuration file
+configfile:"config/CONFIG.yaml"
+
+# default executable for snakmake
+shell.executable("bash")
+
+# input settings
+RUNS=config['runs']['first']
+STEPS=config['steps']
+
+# include rules for the workflows based on "steps" in the CONFIG.yaml file
+# ONT analyses workflow
+TARGETS = []
+
+if 'assembly_annotation' in STEPS:
+        include: "workflows/checkpoint_assembly_annotation.smk"
+        TARGETS += ["assemble_and_coverage.done",
+                    "annotate.done",
+                    "basecall_merge_qc.done",
+                    "coverage_of_references.done",
+                    "prodigal_gene_call.done",
+                    "diamond_proteins.done"]
+
+if 'mmseq' in STEPS:
+        include: "workflows/mmseq.smk"
+        TARGETS += ["mmseq_comparison_for_ont.done"]
+
+if 'metaT' in STEPS:
+        include: "workflows/metat.smk"
+        TARGETS += ["metaT_mapping_for_ONT.done"]
+
+if 'mapping' in STEPS:
+        include: "workflows/mapping.smk"
+        TARGETS += ["mapping_for_binning.done"]
+
+if 'binning' in STEPS:
+        include: "workflows/binning.smk"
+        TARGETS += ["binning_for_ont.done"]
+
+if 'taxonomy' in STEPS:
+        include: "workflows/taxonomy.smk"
+        TARGETS += ["taxonomy_for_ont.done"]
+
+if 'analysis' in STEPS:
+        include: "workflows/analysis.smk"
+        TARGETS += ["analysis.done"]
+
+#else:
+#    raise Exception('You are not serious. No input data')
+#        print("No input data provided")
+
+
+rule all:
+    input:
+        TARGETS
diff --git a/test_troubleshooting_snakefiles/map_snakefile b/test_troubleshooting_snakefiles/map_snakefile
new file mode 100644
index 0000000..f05d94e
--- /dev/null
+++ b/test_troubleshooting_snakefiles/map_snakefile
@@ -0,0 +1,165 @@
+# Workflow for running mapping steps of different assemblers, and mappers for the Binning workflow
+
+#shell.executable("/bin/bash")
+#shell.prefix("source ~/.bashrc; ")
+import os
+from tempfile import TemporaryDirectory
+
+configfile: "config/CONFIG.yaml"
+DATA_DIR = config["data_dir"]
+RESULTS_DIR = config["results_dir"]
+DB_DIR=config["db_dir"]
+BARCODES=config["barcodes"]
+ASSEMBLERS=config["assemblers"]
+MAPPERS=["bwa", "mmi"]
+SAMPLES=["flye", "megahit", "metaspades_hybrid"]
+BINNING_SAMPLES=config["binning_samples"]
+HYBRID_ASSEMBLER=config["hybrid_assembler"]
+
+# specify which rules to run
+#include:
+#    '../rules/MAPPING_RULES'
+
+# Rule all for mapping
+rule MAPPING:
+    input:
+        expand(os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.bwt"), hybrid_assembler=HYBRID_ASSEMBLER),
+        expand(os.path.join(RESULTS_DIR, "mapping/{mapper}_{reads}_{hybrid_assembler}/{mapper}_{reads}_{hybrid_assembler}.bam"), mapper=MAPPERS, reads=["sr", "lr"], hybrid_assembler=HYBRID_ASSEMBLER),
+        expand(os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{hybrid_assembler}.mmi"), hybrid_assembler=HYBRID_ASSEMBLER),
+        expand(os.path.join(RESULTS_DIR, "mapping/{mapper}_merged_{hybrid_assembler}/{mapper}_merged_{hybrid_assembler}.bam"), mapper=MAPPERS, hybrid_assembler=HYBRID_ASSEMBLER)
+    output:
+        touch('mapping_for_binning.done')
+
+#######################
+## Mapping to hybrid ##
+#######################
+rule build_hybrid_bwa_index:
+    input: expand(os.path.join(RESULTS_DIR, "assembly/{hybrid_assembler}/lr_{barcode}-sr_{sr_sample}/contigs.fasta"), sr_sample="ONT3_MG_xx_Rashi_S11", barcode="no_barcode", hybrid_assembler=HYBRID_ASSEMBLER)
+    output:
+#           done=touch("bwa_index.done")
+           index_amb=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{prefix}.amb"),
+           index_ann=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{prefix}.ann"),
+           index_bwt=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{prefix}.bwt"),
+           index_pac=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{prefix}.pac"),
+           index_sa=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{prefix}.sa")
+    params:
+           prefix=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{prefix}")
+    log: os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{prefix}.bwa_index")
+    conda: "../envs/bwa.yaml"
+    shell: """
+           (date &&\
+           bwa index {input} -p {params.prefix} &&\
+           date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+           """
+
+rule hybrid_map_short_reads_to_hybrid_contigs_w_bwa_mem:
+    input:
+          query_r1=expand(os.path.join(RESULTS_DIR, "preprocessing/sr/{sr_sample}_R1_001.fastp.fq.gz"), sr_sample="ONT3_MG_xx_Rashi_S11"),
+          query_r2=expand(os.path.join(RESULTS_DIR, "preprocessing/sr/{sr_sample}_R2_001.fastp.fq.gz"), sr_sample="ONT3_MG_xx_Rashi_S11"),
+          index_amb=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.amb"),
+          index_ann=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.ann"),
+          index_bwt=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.bwt"),
+          index_pac=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.pac"),
+          index_sa=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.sa"),
+          index_fa=expand(os.path.join(RESULTS_DIR, "assembly/{hybrid_assembler}/lr_{barcode}-sr_{sr_sample}/contigs.fasta"), sr_sample="ONT3_MG_xx_Rashi_S11", barcode="no_barcode", hybrid_assembler=HYBRID_ASSEMBLER)
+    output:
+          os.path.join(RESULTS_DIR, "mapping/bwa_sr_{hybrid_assembler}/bwa_sr_{hybrid_assembler}.bam")
+    params:
+          index_prefix=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades"),
+          out_prefix=os.path.join(RESULTS_DIR, "mapping/bwa_sr_{hybrid_assembler}/bwa_sr_{hybrid_assembler}")
+    log: os.path.join(RESULTS_DIR, "mapping/bwa_sr_{hybrid_assembler}/sr_contigs.bwa_mem_samtools")
+    conda: "../envs/bwa.yaml"
+    threads:
+          config["bwa"]["threads"]
+#    wildcard_constraints:
+#          sample='\w+'
+    shell: """
+           (date &&\
+           bwa mem -t {threads} {params.index_prefix} {input.query_r1} {input.query_r2} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.out_prefix} > {output} &&\
+           date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+           """
+
+rule hybrid_map_long_reads_to_hybrid_contigs_w_bwa_mem:
+    input:
+          query_lr=expand(os.path.join(RESULTS_DIR, "basecalled/merged/{barcode}.fastq.gz"), barcode=BARCODES),
+          index_amb=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.amb"),
+          index_ann=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.ann"),
+          index_bwt=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.bwt"),
+          index_pac=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.pac"),
+          index_sa=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.sa"),
+          index_fa=expand(os.path.join(RESULTS_DIR, "assembly/{hybrid_assembler}/lr_{barcode}-sr_{sr_sample}/contigs.fasta"), sr_sample="ONT3_MG_xx_Rashi_S11", barcode="no_barcode", hybrid_assembler=HYBRID_ASSEMBLER)
+    output:
+          os.path.join(RESULTS_DIR, "mapping/bwa_lr_{hybrid_assembler}/bwa_lr_{hybrid_assembler}.bam")
+    params:
+          index_prefix=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades"),
+          out_prefix=os.path.join(RESULTS_DIR, "mapping/bwa_lr_{hybrid_assembler}/bwa_lr_{hybrid_assembler}"),
+    log:  os.path.join(RESULTS_DIR, "mapping/bwa_lr_{hybrid_assembler}/lr_contigs.bwa_mem_samtools")
+    conda: "../envs/bwa.yaml"
+    threads: config["bwa"]["threads"]
+#    wildcard_constraints:
+#          sample='\w+'
+    shell: """
+          (date &&\
+          bwa mem -x ont2d -t {threads} {params.index_prefix} {input.query_lr} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.out_prefix} > {output} &&\
+          date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+          """
+
+rule hybrid_build_minimap2_long_read_contigs_ont_index:
+    input: expand(os.path.join(RESULTS_DIR, "assembly/{hybrid_assembler}/lr_{barcode}-sr_{sr_sample}/contigs.fasta"), sr_sample="ONT3_MG_xx_Rashi_S11", barcode="no_barcode", hybrid_assembler=HYBRID_ASSEMBLER)
+    output: os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{hybrid_assembler}.mmi")
+    conda: "../envs/minimap2.yaml"
+    shell:
+           """
+           date
+           minimap2 -x map-ont -d {output} {input}
+           date
+           """
+
+rule hybrid_map_long_reads_to_hybrid_contigs_w_minimap2:
+    input:
+          query=expand(os.path.join(RESULTS_DIR, "basecalled/merged/{barcode}.fastq.gz"), barcode="no_barcode"),
+          index=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{hybrid_assembler}.mmi"),
+          index_fa=expand(os.path.join(RESULTS_DIR, "assembly/{hybrid_assembler}/lr_{barcode}-sr_{sr_sample}/contigs.fasta"), sr_sample="ONT3_MG_xx_Rashi_S11", barcode="no_barcode", hybrid_assembler=HYBRID_ASSEMBLER)
+    output: os.path.join(RESULTS_DIR, "mapping/mmi_lr_{hybrid_assembler}/mmi_lr_{hybrid_assembler}.bam")
+    params:
+          prefix=os.path.join(RESULTS_DIR, "mapping/mmi_lr_{hybrid_assembler}/mmi_lr_{hybrid_assembler}")
+    conda: "../envs/minimap2.yaml"
+    threads: config["minimap2"]["threads"]
+    log: os.path.join(RESULTS_DIR, "mapping/mmi_lr_{hybrid_assembler}/metaspades.minimap2_samtools")
+    shell: """
+            (date &&\
+            minimap2 -a -t {threads} {input.index} {input.query} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.prefix} > {output} &&\
+            date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+            """
+
+rule hybrid_map_short_reads_to_hybrid_contigs_w_minimap2:
+    input:
+          query_r1=expand(os.path.join(RESULTS_DIR, "preprocessing/sr/{sr_sample}_R1_001.fastp.fq.gz"), sr_sample="ONT3_MG_xx_Rashi_S11"),
+          query_r2=expand(os.path.join(RESULTS_DIR, "preprocessing/sr/{sr_sample}_R2_001.fastp.fq.gz"), sr_sample="ONT3_MG_xx_Rashi_S11"),
+          index=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{hybrid_assembler}.mmi"),
+          index_fa=expand(os.path.join(RESULTS_DIR, "assembly/{hybrid_assembler}/lr_{barcode}-sr_{sr_sample}/contigs.fasta"), sr_sample="ONT3_MG_xx_Rashi_S11", barcode="no_barcode", hybrid_assembler=HYBRID_ASSEMBLER)
+    output: os.path.join(RESULTS_DIR, "mapping/mmi_sr_{hybrid_assembler}/mmi_sr_{hybrid_assembler}.bam")
+    params:
+          prefix=os.path.join(RESULTS_DIR, "mapping/mmi_sr_{hybrid_assembler}/mmi_sr_{hybrid_assembler}")
+    conda: "../envs/minimap2.yaml"
+    threads: config["minimap2"]["threads"]
+    log: os.path.join(RESULTS_DIR, "mapping/mmi_sr_{hybrid_assembler}/metaspades.minimap2_samtools")
+    shell:
+           """
+            (date &&\
+           minimap2 -a -t {threads} {input.index} {input.query_r1} {input.query_r2} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.prefix} > {output}
+           date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+            """
+
+rule hybrid_merge_bam_files_for_metaspades:
+    input:
+          bam_sr=os.path.join(RESULTS_DIR, "mapping/{mapper}_sr_{hybrid_assembler}/{mapper}_sr_{hybrid_assembler}.bam"),
+          bam_lr=os.path.join(RESULTS_DIR, "mapping/{mapper}_lr_{hybrid_assembler}/{mapper}_lr_{hybrid_assembler}.bam")
+    output: os.path.join(RESULTS_DIR, "mapping/{mapper}_merged_{hybrid_assembler}/{mapper}_merged_{hybrid_assembler}.bam")
+    conda: "../envs/binning.yaml"
+    shell:
+          """
+          (date &&\
+          samtools merge {output} {input.bam_sr} {input.bam_lr} &&\
+          date)
+          """
diff --git a/test_troubleshooting_snakefiles/no_barcode_snakefile b/test_troubleshooting_snakefiles/no_barcode_snakefile
new file mode 100755
index 0000000..d7c8681
--- /dev/null
+++ b/test_troubleshooting_snakefiles/no_barcode_snakefile
@@ -0,0 +1,219 @@
+# For running only the BASECALLING part of the workflow for ONT data
+
+import os
+from tempfile import TemporaryDirectory
+
+configfile: "config/CONFIG.yaml"
+DATA_DIR = config["data_dir"]
+RESULTS_DIR = config["results_dir"]
+DB_DIR=config["db_dir"]
+RUNS=[config["runs"]["first"],
+    config["runs"]["second"]]
+#    config["runs"]["third"]]
+BARCODES=config["barcodes"]
+SAMPLES=["ONT3_MG_xx_Rashi_S11"]
+sr_sample=["ONT3_MG_xx_Rashi_S11"]
+# SAMPLES_ALL=config["samples"]
+REFERENCES=["igc", "hg38"]
+IGC_URI=config["igc"]["uri"]
+HG38_URI=config["hg38"]["uri"]
+ASSEMBLERS=config["assemblers"]
+MAPPERS=["bwa_mem", "minimap2"]
+
+# Rule all for running the assembly and annotation
+rule all:
+    input:
+        "basecall_merge_qc.done",
+        "basecall_merge_qc_NO_MOD.done"
+
+######
+# WRAPPER RULES
+######
+# Added the `shell:` so as to avoid it from being autodetected as `localrule`. This is needed so that an email can be sent upon event changes for this rule.
+rule BASECALL_MERGE_QC:
+    input:
+        expand(os.path.join(RESULTS_DIR, "basecalled/merged/merged.fastq.gz"), barcode=BARCODES),
+        expand(os.path.join(RESULTS_DIR, "qc/lr/merged/merged_NanoStats.txt"), barcode=BARCODES),
+        expand(os.path.join(RESULTS_DIR, "qc/lr/{run}/{run}NanoStats.txt"), run=RUNS, barcode=BARCODES)
+    output: "basecall_merge_qc.done"
+    shell:
+        """
+        touch {output}
+        """
+
+rule BASECALL_MERGE_QC_NO_MOD:
+    input:
+        expand(os.path.join(RESULTS_DIR, "basecalled_NO_MOD/merged/merged.fastq.gz"), barcode=BARCODES),
+        expand(os.path.join(RESULTS_DIR, "qc/lr/merged/merged_NanoStats_NO_MOD.txt"), barcode=BARCODES),
+        expand(os.path.join(RESULTS_DIR, "qc/lr/{run}/{run}NanoStats_NO_MOD.txt"), run=RUNS, barcode=BARCODES)
+    output: "basecall_merge_qc_NO_MOD.done"
+    shell:
+        """
+        touch {output}
+        """
+
+
+######
+# RULES
+######
+# Basecalling the multi-FAST5 files.
+# Currently requires dummy target, but would be nice to have this be a "real" file target. However, it seems that files are filled incrementally, hence there is no check if the whole process was successful or not if checking only for file presence.
+rule guppy_gpu_basecall:
+    input: os.path.join(DATA_DIR, "multifast5/{run}")
+    output: os.path.join(RESULTS_DIR, "basecalled/{run}/basecalling.done")
+    log: os.path.join(RESULTS_DIR, "basecalled/{run}/basecalling")
+    threads: config["guppy_gpu"]["cpu_threads"]
+    shell:
+        """
+        (date &&\
+        {config[guppy_gpu][bin]} --input_path {input} --save_path $(dirname {output}) --config {config[guppy_gpu][config]} --cpu_threads_per_caller {threads} -x {config[guppy_gpu][gpu_device]} --disable_pings --compress_fastq --records_per_fastq {config[guppy_gpu][records_per_fastq]} --chunk_size {config[guppy_gpu][chunk_size]} --chunks_per_runner {config[guppy_gpu][chunks_per_runner]} --gpu_runners_per_device {config[guppy_gpu][runners_per_device]} --num_callers {config[guppy_gpu][num_callers]} && \
+        touch {output} &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+
+rule guppy_gpu_basecall_NO_MOD:
+    input: os.path.join(DATA_DIR, "multifast5/{run}")
+    output: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/NO_MOD_basecalling.done")
+    log: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/NO_MOD_basecalling")
+    threads: config["guppy_gpu"]["cpu_threads"]
+    shell:
+        """
+        (date &&\
+        {config[guppy_gpu][bin]} --input_path {input} --save_path $(dirname {output}) --config {config[guppy_gpu][hac_config]} --cpu_threads_per_caller {threads} -x {config[guppy_gpu][gpu_device]} --disable_pings --compress_fastq --records_per_fastq {config[guppy_gpu][records_per_fastq]} --chunk_size {config[guppy_gpu][chunk_size]} --chunks_per_runner {config[guppy_gpu][chunks_per_runner]} --gpu_runners_per_device {config[guppy_gpu][runners_per_device]} --num_callers {config[guppy_gpu][num_callers]} && \
+        touch {output} &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+
+# Demultiplex.
+# Currently requires dummy target, but would be nice to have this be a "real" file target. However, it seems that files are filled incrementally, hence there is no check if the whole process was successful or not if checking only for file presence.
+# Since the 2019_GDB run did not have any barcodes, the demultiplexing step is not requried
+#rule guppy_gpu_demux:
+#    input: rules.guppy_gpu_basecall.output
+#    output: os.path.join(RESULTS_DIR, "basecalled/{run}/demux.done")
+#    log: os.path.join(RESULTS_DIR, "basecalled/{run}/demux")
+#    threads: config["guppy_barcoder"]["threads"]
+#    shell:
+#        """
+#        (date &&\
+#        {config[guppy_barcoder][bin]} --input_path $(dirname {input}) --save_path $(dirname {output}) -t {threads} --compress_fastq --records_per_fastq {config[guppy_gpu][records_per_fastq]} --trim_barcodes && \
+#        touch {output} &&\
+#        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+#        """
+#rule guppy_gpu_demux_NO_MOD:
+#    input: rules.guppy_gpu_basecall_NO_MOD.output
+#    output: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/demux_NO_MOD.done")
+#    log: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/demux")
+#    threads: config["guppy_barcoder"]["threads"]
+#    shell:
+#        """
+#        (date &&\
+#        {config[guppy_barcoder][bin]} --input_path $(dirname {input}) --save_path $(dirname {output}) -t {threads} --compress_fastq --records_per_fastq {config[guppy_gpu][records_per_fastq]} --trim_barcodes && \
+#        touch {output} &&\
+#        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+#        """
+
+# Merge multiple basecalled files, i.e., FASTQ files per barcode and run. Each multi-FAST5 file will produce a respective FASTQ file.
+# TODO: Simplify output path to `basecalled/{run}/{barcode}.fastq.gz`. This will require to change `$(dirname {output})` to `$(dirname {output})/{wildcards.barcode}`
+rule merge_individual_fastq_per_barcode:
+    input: os.path.join(RESULTS_DIR, "basecalled/{run}")
+    output: os.path.join(RESULTS_DIR, "basecalled/{run}/{run}.fastq.gz")
+    shell:
+        """
+        date
+        cat $(find $(dirname {output}) -name "*.fastq.gz" | sort) > {output}
+        touch {output}
+        date
+        """
+
+rule merge_individual_fastq_per_barcode_NO_MOD:
+    input: os.path.join(RESULTS_DIR, "basecalled/{run}")
+    output: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/{run}.fastq.gz")
+    shell:
+        """
+        date
+        cat $(find $(dirname {output}) -name "*.fastq.gz" | sort) > {output}
+        touch {output}
+        date
+        """
+
+## Helper function to get the input in order to create a *single* fastq file for all runs with the *same* barcode
+## TODO: Adjust according to TODO-merge_individual_fastq_per_barcode
+#def get_all_fastq_for_barcode(wildcards):
+#    return expand(os.path.join(RESULTS_DIR, "basecalled/{run}/{barcode}/{barcode}.fastq.gz"), run=RUNS, barcode=wildcards.barcode)
+#
+#def get_all_fastq_for_barcode_NO_MOD(wildcards):
+#    return expand(os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/{barcode}/{barcode}.fastq.gz"), run=RUNS, barcode=wildcards.barcode)
+
+# Merge basecalled files (FASTQ) per barcode, i.e., combine reads from multiple runs according to their barcode.
+rule get_single_fastq_per_barcode:
+    input: os.path.join(RESULTS_DIR, "basecalled/{run}/{run}.fastq.gz")
+    output: os.path.join(RESULTS_DIR, "basecalled/{run}/../merged/merged.fastq.gz")
+    shell:
+        """
+        date
+        cat {input} > {output}
+        date
+        """
+rule get_single_fastq_per_barcode_NO_MOD:
+    input: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/{run}.fastq.gz")
+    output: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/../merged/merged.fastq.gz")
+    shell:
+        """
+        date
+        cat {input} > {output}
+        date
+        """
+
+# Get basic statistics of the ONT data; over all runs
+# TODO: Simplify top of path: replace `lr` by `nanostat`
+# TODO: Write more generic: have patterned `input: os.path.join(RESULTS_DIR, "basecalled/{prefix}/{barcode}.fastq.gz")` and patterned `output: os.path.join(RESULTS_DIR, "qc/nanostat/{prefix}/{barcode}/{barcode}NanoStats.txt")
+#
+rule nanostat:
+    input: os.path.join(RESULTS_DIR, "basecalled/merged/merged.fastq.gz")
+    output: os.path.join(RESULTS_DIR, "qc/lr/merged/merged_NanoStats.txt")
+    log: os.path.join(RESULTS_DIR, "qc/lr/merged/merged_NanoStats")
+    conda: "../envs/nanostat.yaml"
+    shell:
+        """
+        (date &&\
+        NanoStat --fastq {input} --outdir $(dirname {output}) --prefix merged -n $(basename -s "" {output}) &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+
+rule nanostat_NO_MOD:
+    input: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/merged/merged.fastq.gz")
+    output: os.path.join(RESULTS_DIR, "qc/lr/merged/merged_NanoStats_NO_MOD.txt")
+    log: os.path.join(RESULTS_DIR, "qc/lr/merged/merged_NanoStats_NO_MOD")
+    conda: "../envs/nanostat.yaml"
+    shell:
+        """
+        (date &&\
+        NanoStat --fastq {input} --outdir $(dirname {output}) --prefix merged -n $(basename -s "" {output}) &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+
+# TODO: Deprecated if TODO-nanostat is completed
+# Get basic statistics of the ONT data; per run
+rule nanostat_per_run:
+    input: os.path.join(RESULTS_DIR, "basecalled/{run}/{run}.fastq.gz")
+    output: os.path.join(RESULTS_DIR, "qc/lr/{run}/{run}NanoStats.txt")
+    conda: "../envs/nanostat.yaml"
+    shell:
+        """
+        date
+        NanoStat --fastq {input} --outdir $(dirname {output}) --prefix {wildcards.run} -n $(basename -s "" {output})
+        date
+        """
+
+rule nanostat_per_run_NO_MOD:
+    input: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/{run}.fastq.gz")
+    output: os.path.join(RESULTS_DIR, "qc/lr/{run}/{run}NanoStats_NO_MOD.txt")
+    conda: "../envs/nanostat.yaml"
+    shell:
+        """
+        date
+        NanoStat --fastq {input} --outdir $(dirname {output}) --prefix {wildcards.run} -n $(basename -s "" {output})
+        date
+        """
+
+
diff --git a/test_troubleshooting_snakefiles/test_snakefile b/test_troubleshooting_snakefiles/test_snakefile
new file mode 100755
index 0000000..5c0e2fd
--- /dev/null
+++ b/test_troubleshooting_snakefiles/test_snakefile
@@ -0,0 +1,280 @@
+# For running only the BASECALLING part of the workflow for ONT data
+
+import os
+from tempfile import TemporaryDirectory
+
+configfile: "config/CONFIG.yaml"
+DATA_DIR = config["data_dir"]
+RESULTS_DIR = config["results_dir"]
+DB_DIR=config["db_dir"]
+RUNS=[config["runs"]["first"],
+    config["runs"]["second"]]
+#    config["runs"]["third"]]
+BARCODES=config["barcodes"]
+SAMPLES=["ONT3_MG_xx_Rashi_S11"]
+sr_sample=["ONT3_MG_xx_Rashi_S11"]
+# SAMPLES_ALL=config["samples"]
+REFERENCES=["igc", "hg38"]
+IGC_URI=config["igc"]["uri"]
+HG38_URI=config["hg38"]["uri"]
+ASSEMBLERS=config["assemblers"]
+MAPPERS=["bwa_mem", "minimap2"]
+
+# Rule all for running the assembly and annotation
+rule all:
+    input:
+        "basecall_merge_qc.done",
+        "basecall_merge_qc_NO_MOD.done",
+        "dummy_folders_created.done"
+
+######
+# WRAPPER RULES
+######
+# Added the `shell:` so as to avoid it from being autodetected as `localrule`. This is needed so that an email can be sent upon event changes for this rule.
+rule BASECALL_MERGE_QC:
+    input:
+#        expand(os.path.join(RESULTS_DIR, "basecalled/merged/{barcode}.fastq.gz"), barcode=BARCODES)
+        expand(os.path.join(RESULTS_DIR, "basecalled/{run}/{barcode}.fastq.gz"), run=RUNS, barcode=BARCODES)
+#        expand(os.path.join(RESULTS_DIR, "qc/lr/merged/{barcode}/{barcode}NanoStats.txt"), barcode=BARCODES),
+#        expand(os.path.join(RESULTS_DIR, "qc/lr/{run}/{barcode}/{barcode}NanoStats.txt"), run=RUNS, barcode=BARCODES)
+    output: "basecall_merge_qc.done"
+    shell:
+        """
+        touch {output}
+        """
+
+rule BASECALL_MERGE_QC_NO_MOD:
+    input:
+#        expand(os.path.join(RESULTS_DIR, "basecalled_NO_MOD/merged/{barcode}.fastq.gz"), barcode=BARCODES)
+         expand(os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/{barcode}.fastq.gz"), run=RUNS, barcode=BARCODES)
+ #       expand(os.path.join(RESULTS_DIR, "qc/lr/merged/{barcode}/{barcode}NanoStats_NO_MOD.txt"), barcode=BARCODES),
+ #       expand(os.path.join(RESULTS_DIR, "qc/lr/{run}/{barcode}/{barcode}NanoStats_NO_MOD.txt"), run=RUNS, barcode=BARCODES)
+    output: "basecall_merge_qc_NO_MOD.done"
+    shell:
+        """
+        touch {output}
+        """
+
+rule DUMMY_FOLDER:
+    input:
+#         expand(os.path.join(RESULTS_DIR, "basecalled/{run}/dummy_folder.done"), run=RUNS),
+         expand(os.path.join(RESULTS_DIR, "basecalled{type}/{run}/dummy_folder.done"), run=RUNS, type=["", "_NO_MOD"])
+    output: "dummy_folders_created.done"
+    shell:
+        """
+        touch {output}
+        """      
+
+######
+# RULES
+######
+# Basecalling the multi-FAST5 files.
+# Currently requires dummy target, but would be nice to have this be a "real" file target. However, it seems that files are filled incrementally, hence there is no check if the whole process was successful or not if checking only for file presence.
+checkpoint guppy_gpu_basecall:
+    input: os.path.join(DATA_DIR, "multifast5/{run}")
+    output: directory(os.path.join(RESULTS_DIR, "basecalled/{run}"))
+    log: os.path.join(RESULTS_DIR, "basecalled/{run}/basecalling")
+    threads: config["guppy_gpu"]["cpu_threads"]
+    shell:
+        """
+        (date &&\
+        {config[guppy_gpu][bin]} --input_path {input} --save_path $(dirname {output}) --config {config[guppy_gpu][config]} --cpu_threads_per_caller {threads} -x {config[guppy_gpu][gpu_device]} --disable_pings --compress_fastq --records_per_fastq {config[guppy_gpu][records_per_fastq]} --chunk_size {config[guppy_gpu][chunk_size]} --chunks_per_runner {config[guppy_gpu][chunks_per_runner]} --gpu_runners_per_device {config[guppy_gpu][runners_per_device]} --num_callers {config[guppy_gpu][num_callers]} && \
+        touch {output} &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+
+checkpoint guppy_gpu_basecall_NO_MOD:
+    input: os.path.join(DATA_DIR, "multifast5/{run}")
+    output: directory(os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}"))
+    log: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/NO_MOD_basecalling")
+    wildcard_constraints:
+#        run='S.*'
+#        run='S.*^\/'
+#        run='\w+'
+#        run="^no.*"
+    threads: config["guppy_gpu"]["cpu_threads"]
+    shell:
+        """
+        (date &&\
+        {config[guppy_gpu][bin]} --input_path {input} --save_path $(dirname {output}) --config {config[guppy_gpu][hac_config]} --cpu_threads_per_caller {threads} -x {config[guppy_gpu][gpu_device]} --disable_pings --compress_fastq --records_per_fastq {config[guppy_gpu][records_per_fastq]} --chunk_size {config[guppy_gpu][chunk_size]} --chunks_per_runner {config[guppy_gpu][chunks_per_runner]} --gpu_runners_per_device {config[guppy_gpu][runners_per_device]} --num_callers {config[guppy_gpu][num_callers]} && \
+        touch {output} &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+
+## Demultiplex.
+## Currently requires dummy target, but would be nice to have this be a "real" file target. However, it seems that files are filled incrementally, hence there is no check if the whole process was successful or not if checking only for file presence.
+#rule guppy_gpu_demux:
+#    input: rules.guppy_gpu_basecall.output
+#    output: os.path.join(RESULTS_DIR, "basecalled/{run}/demux.done")
+#    log: os.path.join(RESULTS_DIR, "basecalled/{run}/demux")
+#    threads: config["guppy_barcoder"]["threads"]
+#    shell:
+#        """
+#        (date &&\
+#        {config[guppy_barcoder][bin]} --input_path $(dirname {input}) --save_path $(dirname {output}) -t {threads} --compress_fastq --records_per_fastq {config[guppy_gpu][records_per_fastq]} --trim_barcodes && \
+#        touch {output} &&\
+#        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+#        """
+#rule guppy_gpu_demux_NO_MOD:
+#    input: rules.guppy_gpu_basecall_NO_MOD.output
+#    output: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/demux_NO_MOD.done")
+#    log: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/demux")
+#    threads: config["guppy_barcoder"]["threads"]
+#    shell:
+#        """
+#        (date &&\
+#        {config[guppy_barcoder][bin]} --input_path $(dirname {input}) --save_path $(dirname {output}) -t {threads} --compress_fastq --records_per_fastq {config[guppy_gpu][records_per_fastq]} --trim_barcodes && \
+#        touch {output} &&\
+#        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+#        """
+
+# Modifications (IMPORTANT)
+# Since 2019_GDB sample was "NOT" barcoded the demux folders are obsolete. 
+# The below section gathers all the fastq.gz files from basecalling into a "dummy" folder
+# The "dummy" folder will be used downstream similar to the "barcode" folder used in the 2018 analyses
+# The barcode in the CONFIG file will be given the name of this dummy folder
+rule intermediate_basecalling:
+    input: os.path.join(RESULTS_DIR, "basecalled/{run}/{i}.fastq.gz")
+    output: os.path.join(RESULTS_DIR, "basecalled/{run}/no_nobarcode/{i}.fastq.gz")
+    log: os.path.join(RESULTS_DIR, "basecalled/{run}/no_barcode_{i}")
+    shell:
+        """
+        (date &&\ 
+        ln -s {input} {output}  &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+
+rule intermediate_NO_MOD_basecalling:
+    input: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/{i}.fastq.gz")
+    output: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/no_nobarcode/{i}.fastq.gz")
+    log: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/no_barcode_{i}")
+    shell:
+        """
+        (date &&\
+        ln -s {input} {output}  &&\
+        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+        """
+
+def aggregate_dummy_basecalling(wildcards):
+    checkpoint_output = checkpoints.guppy_gpu_basecall.get(**wildcards).output[0]
+    return expand(os.path.join(RESULTS_DIR, "basecalled/{run}/no_nobarcode/{id}.fastq.gz"),
+        run=wildcards.run,
+        i=glob_wildcards(os.path.join(checkpoint_output, "{i}.fastq.gz")).i)
+
+
+def aggregate_dummy_basecalling_NO_MOD(wildcards):
+    dummy_output=checkpoints.guppy_gpu_basecall_NO_MOD(**wildcards).output[0]
+    return expand(os.path.join(RESULTS_DIR, "basecalled/{run}/no_nobarcode/{i}.fastq.gz"),
+        run=wildcards.run, 
+        i=glob_wildcards(os.path.join(dummy_output, "{i}.fastq.gz")).i)
+
+
+# Merge multiple basecalled files, i.e., FASTQ files per barcode and run. Each multi-FAST5 file will produce a respective FASTQ file.
+# TODO: Simplify output path to `basecalled/{run}/{barcode}.fastq.gz`. This will require to change `$(dirname {output})` to `$(dirname {output})/{wildcards.barcode}`
+rule merge_individual_fastq_per_barcode:
+    input: aggregate_dummy_basecalling
+    output: os.path.join(RESULTS_DIR, "basecalled/{run}/{barcode}/{barcode}.fastq.gz")
+    shell:
+        """
+        date
+        cat $(find $(dirname {output}) -name "*.fastq.gz" | sort) > {output}
+        touch {output}
+        date
+        """
+
+rule merge_individual_fastq_per_barcode_NO_MOD:
+    input: aggregate_dummy_basecalling_NO_MOD
+    output: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/{barcode}/{barcode}.fastq.gz")
+    shell:
+        """
+        date
+        cat $(find $(dirname {output}) -name "*.fastq.gz" | sort) > {output}
+        touch {output}
+        date
+        """
+
+## Helper function to get the input in order to create a *single* fastq file for all runs with the *same* barcode
+## TODO: Adjust according to TODO-merge_individual_fastq_per_barcode
+#def get_all_fastq_for_barcode(wildcards):
+#    return expand(os.path.join(RESULTS_DIR, "basecalled/{run}/{barcode}/{barcode}.fastq.gz"), run=RUNS, barcode=wildcards.barcode)
+#
+#def get_all_fastq_for_barcode_NO_MOD(wildcards):
+#    return expand(os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/{barcode}/{barcode}.fastq.gz"), run=RUNS, barcode=wildcards.barcode)
+#
+## Merge basecalled files (FASTQ) per barcode, i.e., combine reads from multiple runs according to their barcode.
+#rule get_single_fastq_per_barcode:
+#    input: get_all_fastq_for_barcode
+#    output: os.path.join(RESULTS_DIR, "basecalled/merged/{barcode}.fastq.gz")
+#    shell:
+#        """
+#        date
+#        cat {input} > {output}
+#        date
+#        """
+#rule get_single_fastq_per_barcode_NO_MOD:
+#    input: get_all_fastq_for_barcode_NO_MOD
+#    output: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/merged/{barcode}.fastq.gz")
+#    shell:
+#        """
+#        date
+#        cat {input} > {output}
+#        date
+#        """
+#
+## Get basic statistics of the ONT data; over all runs
+## TODO: Simplify top of path: replace `lr` by `nanostat`
+## TODO: Write more generic: have patterned `input: os.path.join(RESULTS_DIR, "basecalled/{prefix}/{barcode}.fastq.gz")` and patterned `output: os.path.join(RESULTS_DIR, "qc/nanostat/{prefix}/{barcode}/{barcode}NanoStats.txt")
+##
+#rule nanostat:
+#    input: rules.get_single_fastq_per_barcode.output
+#    output: os.path.join(RESULTS_DIR, "qc/lr/merged/{barcode}/{barcode}NanoStats.txt")
+#    log: os.path.join(RESULTS_DIR, "qc/lr/merged/{barcode}/{barcode}NanoStats")
+#    conda: "../envs/nanostat.yaml"
+#    shell:
+#        """
+#        (date &&\
+#        NanoStat --fastq {input} --outdir $(dirname {output}) --prefix {wildcards.barcode} -n $(basename -s "" {output}) &&\
+#        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+#        """
+#
+#rule nanostat_NO_MOD:
+#    input: rules.get_single_fastq_per_barcode_NO_MOD.output
+#    output: os.path.join(RESULTS_DIR, "qc/lr/merged/{barcode}/{barcode}NanoStats_NO_MOD.txt")
+#    log: os.path.join(RESULTS_DIR, "qc/lr/merged/{barcode}/{barcode}NanoStats_NO_MOD")
+#    conda: "../envs/nanostat.yaml"
+#    shell:
+#        """
+#        (date &&\
+#        NanoStat --fastq {input} --outdir $(dirname {output}) --prefix {wildcards.barcode} -n $(basename -s "" {output}) &&\
+#        date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+#        """
+#
+## TODO: Deprecated if TODO-nanostat is completed
+## Get basic statistics of the ONT data; per run
+#rule nanostat_per_run:
+#    input: os.path.join(RESULTS_DIR, "basecalled/{run}/{barcode}/{barcode}.fastq.gz")
+#    output: os.path.join(RESULTS_DIR, "qc/lr/{run}/{barcode}/{barcode}NanoStats.txt")
+#    wildcard_constraints:
+#        run='S.*'
+#    conda: "../envs/nanostat.yaml"
+#    shell:
+#        """
+#        date
+#        NanoStat --fastq {input} --outdir $(dirname {output}) --prefix {wildcards.barcode} -n $(basename -s "" {output})
+#        date
+#        """
+#
+#rule nanostat_per_run_NO_MOD:
+#    input: os.path.join(RESULTS_DIR, "basecalled_NO_MOD/{run}/{barcode}/{barcode}.fastq.gz")
+#    output: os.path.join(RESULTS_DIR, "qc/lr/{run}/{barcode}/{barcode}NanoStats_NO_MOD.txt")
+#    wildcard_constraints:
+#        run='S.*'
+#    conda: "../envs/nanostat.yaml"
+#    shell:
+#        """
+#        date
+#        NanoStat --fastq {input} --outdir $(dirname {output}) --prefix {wildcards.barcode} -n $(basename -s "" {output})
+#        date
+#        """
+#
+#
-- 
GitLab