Skip to content
Snippets Groups Projects
Commit 7c15b717 authored by Susheel Busi's avatar Susheel Busi
Browse files

added header extraction rules to Snakefile

parent f54e24e7
No related branches found
No related tags found
2 merge requests!78Add zymo workflow,!77Workflow: Zymo - Comparison to Reference Genomes
This commit is part of merge request !78. Comments created here will be created in the context of that merge request.
......@@ -42,6 +42,6 @@ snakemake -s ${ONTP_SMK} -rp --jobs 10 --local-cores 1 \
--cluster-config ${ONTP_SLURM} --cluster "${ONTP_CLUSTER}" --unlock
# run the pipeline
snakemake -s ${ONTP_SMK} -rp --jobs 10 --local-cores 1 \
snakemake -s ${ONTP_SMK} -rpn --jobs 10 --local-cores 1 \
--configfile ${ONTP_CONFIG} --use-conda --conda-prefix ${CONDA_PREFIX}/pipeline \
--cluster-config ${ONTP_SLURM} --cluster "${ONTP_CLUSTER}"
......@@ -25,9 +25,9 @@ zymo_prodigal:
diamond_db:
partition: "batch"
quality: "normal"
runtime: "00-00:30:00"
runtime: "00-00:15:00"
analysis_diamond:
partition: "batch"
quality: "normal"
runtime: "00-2:00:00"
\ No newline at end of file
runtime: "00-00:15:00"
......@@ -62,7 +62,12 @@ rule all:
expand(os.path.join(RESULTS_DIR,"annotation/diamond/zymo.dmnd")),
expand(os.path.join(RESULTS_DIR, "analysis/diamond/{rtype_tool}.{ext}"),
rtype_tool=["%s_%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS],
ext=["daa", "tsv"])
ext=["daa", "tsv"]),
expand(os.path.join(RESULTS_DIR, "analysis/headers/{combi}_headers.txt"),
combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS] +
["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS]),
expand(os.path.join(RESULTS_DIR, "analysis/headers/{rtype_tool}_zymo_headers.txt"),
rtype_tool=["%s_%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS])
rule download_genomes:
......@@ -160,3 +165,37 @@ rule analysis_diamond:
"diamond blastp -q {input.faa} --db {input.db} --out {output.daa} -p {threads} --outfmt 100 && "
"diamond view --daa ${{daa%.*}} --max-target-seqs 1 -p {threads} --outfmt {params.outfmt} --out {output.tsv} && "
"date) &> {log}"
rule get_headers:
input:
os.path.join(DATA_DIR, "analysis/cdhit/{rtype1}_{tool1}__{rtype2}_{tool2}.faa")
output:
os.path.join(RESULTS_DIR,"analysis/headers/{rtype1}_{tool1}__{rtype2}_{tool2}_headers.txt")
log:
out="logs/headers.{rtype1}.{tool1}.{rtype2}.{tool2}.out.log",
err="logs/headers.{rtype1}.{tool1}.{rtype2}.{tool2}.err.log"
wildcard_constraints:
rtype1="|".join(READ_TYPES),
rtype2="|".join(READ_TYPES),
tool1="|".join(ASSEMBLERS),
tool2="|".join(ASSEMBLERS)
shell:
"(date && "
"grep '>' {input} | sed 's/^.*\_\_//' | sed 's/\ .*//' > {output} && "
"date) 2> {log.err} > {log.out}"
rule zymo_headers:
input:
os.path.join(RESULTS_DIR, "analysis/diamond/{rtype}_{tool}.tsv")
output:
os.path.join(RESULTS_DIR,"analysis/headers/{rtype}_{tool}_zymo_headers.txt")
log:
out="logs/zymo_headers.{rtype}.{tool}.out.log",
err="logs/zymo_headers.{rtype}.{tool}.err.log"
wildcard_constraints:
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
shell:
"(date && "
"awk '{{print $1}}' {input} > {output} && "
"date) 2> {log.err} > {log.out}"
\ No newline at end of file
......@@ -3,37 +3,37 @@ channels:
- bioconda
- defaults
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=1_llvm
- boost=1.70.0=py38h9de70de_1
- boost-cpp=1.70.0=h7b93d67_3
- bzip2=1.0.8=h516909a_2
- ca-certificates=2020.6.20=hecda079_0
- certifi=2020.6.20=py38h32f6830_0
- diamond=0.9.25=hfb76ee0_0
- icu=67.1=he1b5a44_0
- ld_impl_linux-64=2.34=h53a641e_5
- libblas=3.8.0=16_openblas
- libcblas=3.8.0=16_openblas
- libffi=3.2.1=he1b5a44_1007
- libgcc-ng=9.2.0=h24d8f2e_2
- libgfortran-ng=7.5.0=hdf63c60_6
- liblapack=3.8.0=16_openblas
- libopenblas=0.3.9=h5ec1e0e_0
- libstdcxx-ng=9.2.0=hdf63c60_2
- llvm-openmp=10.0.0=hc9558a2_0
- lz4-c=1.9.2=he1b5a44_1
- ncurses=6.1=hf484d3e_1002
- numpy=1.18.5=py38h8854b6b_0
- openssl=1.1.1g=h516909a_0
- pip=20.1.1=py_1
- python=3.8.3=cpython_he5300dc_0
- python_abi=3.8=1_cp38
- readline=8.0=hf8c457e_0
- setuptools=47.3.1=py38h32f6830_0
- sqlite=3.30.1=hcee41ef_0
- tk=8.6.10=hed695b0_0
- wheel=0.34.2=py_1
- xz=5.2.5=h516909a_0
- zlib=1.2.11=h516909a_1006
- zstd=1.4.4=h6597ccf_3
- _libgcc_mutex=0.1
- _openmp_mutex=4.5
- boost=1.70.0
- boost-cpp=1.70.0
- bzip2=1.0.8
- ca-certificates=2020.6.20
- certifi=2020.6.20
- diamond=0.9.25
- icu=67.1
- ld_impl_linux-64=2.34
- libblas=3.8.0
- libcblas=3.8.0
- libffi=3.2.1
- libgcc-ng=9.2.0
- libgfortran-ng=7.5.0
- liblapack=3.8.0
- libopenblas=0.3.9
- libstdcxx-ng=9.2.0
- llvm-openmp=10.0.0
- lz4-c=1.9.2
- ncurses=6.1
- numpy=1.18.5
- openssl=1.1.1g
- pip=20.1.1
- python=3.8.3
- python_abi=3.8
- readline=8.0
- setuptools=47.3.1
- sqlite=3.30.1
- tk=8.6.10
- wheel=0.34.2
- xz=5.2.5
- zlib=1.2.11
- zstd=1.4.4
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment