init.smk for workflow and workflow_report

0171c350 · Valentina Galata · 3f9425a4 · 0171c350 · 0171c350 · 0171c350
Commit 0171c350 authored 4 years ago by Valentina Galata
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
 # Pipeline
-
-# snakemake -s workflow/Snakefile --configfile config/GDB/config.yaml --use-conda --conda-prefix ${CONDA_PREFIX}/pipeline --cores 1 -rpn
+#
+# Example call: snakemake -s workflow/Snakefile --configfile config/GDB/config.yaml --use-conda --conda-prefix ${CONDA_PREFIX}/pipeline --cores 1 -rpn

 ##############################
 # MODULES
 import os
-from pathlib import Path
-from tempfile import TemporaryDirectory
-
 from scripts.utils import find_fast5, assembler_pairs

 ##############################
@@ -15,48 +12,8 @@ from scripts.utils import find_fast5, assembler_pairs
 # can be overwritten by using --configfile <path to config> when calling snakemake
 # configfile: "config/config.yaml"

-# Paths
-SRC_DIR = srcdir("scripts")
-ENV_DIR = srcdir("envs")
-MOD_DIR = srcdir("../submodules")
-
-# default executable for snakmake
-shell.executable("bash")
-
-# working directory
-workdir:
-    config["work_dir"]
-
-##############################
-# DATA & TOOLS
-
-# TODO: config validation
-
-# DATA_DIR = config["data_dir"]
-RESULTS_DIR = config["results_dir"]
-DB_DIR = config["db_dir"]
-
-# Steps
-STEPS               = config['steps']
-# ANNOTATION_STEPS    = config["annotation_steps"]
-# ANALYSIS_STEPS      = config["analysis_steps"]
-
-# Input
-INPUT_G_FAST5 = find_fast5(config["data"]["metag"]["ont"]["files"], config["data"]["metag"]["ont"]["dirs"]) # TODO: consider a different approach
-INPUT_G_SR    = list(config["data"]["metag"]["sr"].values())
-INPUT_T_SR    = []
-if config["data"]["metat"]["sr"]["r1"] and config["data"]["metat"]["sr"]["r2"]:
-    INPUT_T_SR = list(config["data"]["metat"]["sr"].values())
-
-# Assemblers and read types
-META_TYPES      = ["metag", "metat"] if INPUT_T_SR else ["metag"]
-READ_TYPES      = list(config["assemblers"].keys()) # list of read types
-ASSEMBLERS      = [y for x in config["assemblers"].values() for y in x] # list of all assemblers
-READ_ASSEMBLERS = [y for x in [[(k, vv) for vv in v] for k, v in config["assemblers"].items()] for y in x] # list of (read type, assembler)
-READ_ASSEMBLER_PAIRS = assembler_pairs(READ_ASSEMBLERS)
-
-# File extensions of index files created by BWA
-BWA_IDX_EXT = ["amb", "ann", "bwt", "pac", "sa"]
+include:
+    "rules/init.smk"

 ##############################
 # TARGETS & RULES

--- a/workflow/rules/init.smk
+++ b/workflow/rules/init.smk
+# Paths
+SRC_DIR = srcdir("scripts")
+ENV_DIR = srcdir("envs")
+MOD_DIR = srcdir("../submodules")
+
+# default executable for snakmake
+shell.executable("bash")
+
+# working directory
+workdir:
+    config["work_dir"]
+
+# TODO: config validation
+
+# DATA_DIR = config["data_dir"]
+RESULTS_DIR = config["results_dir"]
+DB_DIR = config["db_dir"]
+
+# Steps
+STEPS               = config['steps']
+
+# Input
+INPUT_G_FAST5 = find_fast5(config["data"]["metag"]["ont"]["files"], config["data"]["metag"]["ont"]["dirs"]) # TODO: consider a different approach
+INPUT_G_SR    = list(config["data"]["metag"]["sr"].values())
+INPUT_T_SR    = []
+if config["data"]["metat"]["sr"]["r1"] and config["data"]["metat"]["sr"]["r2"]:
+    INPUT_T_SR = list(config["data"]["metat"]["sr"].values())
+
+# Assemblers and read types
+META_TYPES      = ["metag", "metat"] if INPUT_T_SR else ["metag"]
+READ_TYPES      = list(config["assemblers"].keys()) # list of read types
+ASSEMBLERS      = [y for x in config["assemblers"].values() for y in x] # list of all assemblers
+READ_ASSEMBLERS = [y for x in [[(k, vv) for vv in v] for k, v in config["assemblers"].items()] for y in x] # list of (read type, assembler)
+READ_ASSEMBLER_PAIRS = assembler_pairs(READ_ASSEMBLERS)
+
+# File extensions of index files created by BWA
+BWA_IDX_EXT = ["amb", "ann", "bwt", "pac", "sa"]
\ No newline at end of file
--- a/workflow_report/Snakefile
+++ b/workflow_report/Snakefile
-# snakemake -s workflow_report/Snakefile --configfile config/GDB/config.yaml --use-conda --conda-prefix ${CONDA_PREFIX}/pipeline --cores 1 -rpn
-# References:
-#   rmarkdown: https://bookdown.org/yihui/rmarkdown/
+# Report pipeline
+#
+# Example call: snakemake -s workflow_report/Snakefile --configfile config/GDB/config.yaml --use-conda --conda-prefix ${CONDA_PREFIX}/pipeline --cores 1 -rpn

-##################################################
-# Python modules
-##################################################
+##############################
+# MODULES
 import os
-
-from scripts.utils import assembler_pairs
+from scripts.utils import find_fast5, assembler_pairs

 ##############################
 # CONFIG
+# can be overwritten by using --configfile <path to config> when calling snakemake
+# configfile: "config/config.yaml"

-# Paths
-SRC_DIR = srcdir("scripts")
-ENV_DIR = srcdir("envs")
-
-# default executable for snakmake
-shell.executable("bash")
-
-# working directory
-workdir:
-    config["work_dir"]
+include:
+    "rules/init.smk"

 ##############################
-# DATA & TOOLS
-
-# TODO: config validation
+# TARGETS & RULES

-RESULTS_DIR = config["results_dir"]
-DB_DIR = config["db_dir"]
-
-# Input
-# INPUT_G_FAST5 = find_fast5(config["data"]["metag"]["ont"]["files"], config["data"]["metag"]["ont"]["dirs"]) # TODO: consider a different approach
-INPUT_G_SR    = list(config["data"]["metag"]["sr"].values())
-INPUT_T_SR    = []
-if config["data"]["metat"]["sr"]["r1"] and config["data"]["metat"]["sr"]["r2"]:
-    INPUT_T_SR = list(config["data"]["metat"]["sr"].values())
-
-# Assemblers and read types
-META_TYPES      = ["metag", "metat"] if INPUT_T_SR else ["metag"]
-READ_TYPES      = list(config["assemblers"].keys()) # list of read types
-ASSEMBLERS      = [y for x in config["assemblers"].values() for y in x] # list of all assemblers
-READ_ASSEMBLERS = [y for x in [[(k, vv) for vv in v] for k, v in config["assemblers"].items()] for y in x] # list of (read type, assembler)
-READ_ASSEMBLER_PAIRS = assembler_pairs(READ_ASSEMBLERS)
-
-##################################################
-# TARGETS
-##################################################
-# FIG_MMSEQ_UPSETR    = os.path.join(config["output"], config["fig_mmseq_upsetr"]["output"])
-# FIG_PARTIAL_GENES   = os.path.join(config["output"], config["fig_partial_genes"]["output"])
-# FIG_CRISPR          = os.path.join(config["output"], config["fig_crispr"]["output"])
-# FIG_PLASFLOW        = os.path.join(config["output"], config["fig_plasflow"]["output"])
-# FIG_RGI             = os.path.join(config["output"], config["fig_rgi"]["output"])
-
-##################################################
-# RULES
-##################################################
 # TODO: sub-workflow

 rule all:
    input:
        os.path.join(RESULTS_DIR, "report/report.html")
-#         FIG_MMSEQ_UPSETR,
-#         FIG_PARTIAL_GENES,
-#         FIG_NANOSTATS,
-#         FIG_CRISPR,
-#         FIG_PLASFLOW,
-#         FIG_RGI,
-
-# include:
-    # "rules/parse_data.smk"

 include:
    "rules/collect_data.smk"

--- a/workflow_report/rules/init.smk
+++ b/workflow_report/rules/init.smk
+../../workflow/rules/init.smk
\ No newline at end of file