Comparing the headers in comparision of read-type and assembler against zymo

b150f5eb · Susheel Busi · 7c15b717 · b150f5eb · b150f5eb · b150f5eb
Commit b150f5eb authored 4 years ago by Susheel Busi
--- a/config/Zymo/sbatch.zymo.sh
+++ b/config/Zymo/sbatch.zymo.sh
@@ -42,6 +42,6 @@ snakemake -s ${ONTP_SMK} -rp --jobs 10 --local-cores 1 \
 --cluster-config ${ONTP_SLURM} --cluster "${ONTP_CLUSTER}" --unlock 
 
 # run the pipeline
-snakemake -s ${ONTP_SMK} -rpn --jobs 10 --local-cores 1 \
+snakemake -s ${ONTP_SMK} -rp --jobs 10 --local-cores 1 \
 --configfile ${ONTP_CONFIG} --use-conda --conda-prefix ${CONDA_PREFIX}/pipeline \
 --cluster-config ${ONTP_SLURM} --cluster "${ONTP_CLUSTER}"
--- a/config/Zymo/slurm.zymo.yaml
+++ b/config/Zymo/slurm.zymo.yaml
@@ -3,7 +3,7 @@ __default__:
  nodes: 1
  partition: "batch"
  quality: "normal"
-  runtime: "0-01:00:00"
+  runtime: "0-00:10:00"
  threads: 1 
  job-name: "ZYMO_REF.{rule}"

@@ -31,3 +31,12 @@ analysis_diamond:
  partition: "batch"
  quality: "normal"
  runtime: "00-00:15:00"
+
+get_headers:
+  runtime: "00-00:05:00"
+
+zymo_headers:
+  runtime: "00-00:05:00"
+
+comparison:
+  runtime: "00-00:15:00"
--- a/workflow_zymo/Snakefile
+++ b/workflow_zymo/Snakefile
@@ -67,7 +67,10 @@ rule all:
            combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS] +
            ["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS]), 
        expand(os.path.join(RESULTS_DIR, "analysis/headers/{rtype_tool}_zymo_headers.txt"), 
-            rtype_tool=["%s_%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]) 
+            rtype_tool=["%s_%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]),
+        expand(os.path.join(RESULTS_DIR, "analysis/comparison/common_{combi}.txt"),
+            combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS] +
+            ["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS]) 


 rule download_genomes:
@@ -198,4 +201,29 @@ rule zymo_headers:
    shell:
       "(date && "
       "awk '{{print $1}}' {input} > {output} && "
-       "date) 2> {log.err} > {log.out}"
\ No newline at end of file
+       "date) 2> {log.err} > {log.out}"
+
+rule comparison:
+    input:
+        in1=os.path.join(RESULTS_DIR,"analysis/headers/{rtype1}_{tool1}__{rtype2}_{tool2}_headers.txt"),
+        in2=os.path.join(RESULTS_DIR,"analysis/headers/{rtype2}_{tool2}__{rtype1}_{tool1}_headers.txt")
+    output:
+        out1=os.path.join(RESULTS_DIR, "analysis/comparison/common_{rtype1}_{tool1}__{rtype2}_{tool2}.txt"),
+        out2=os.path.join(RESULTS_DIR, "analysis/comparison/{rtype1}_{tool1}_uniq__{rtype2}_{tool2}.txt"),
+        out3=os.path.join(RESULTS_DIR, "analysis/comparison/{rtype2}_{tool2}_uniq__{rtype1}_{tool1}.txt")
+    log:
+        out="logs/comparison.{rtype1}.{tool1}.{rtype2}.{tool2}.out.log",
+        err="logs/comparison.{rtype1}.{tool1}.{rtype2}.{tool2}.err.log"
+    wildcard_constraints:
+        rtype1="|".join(READ_TYPES),
+        rtype2="|".join(READ_TYPES),
+        tool1="|".join(ASSEMBLERS),
+        tool2="|".join(ASSEMBLERS)        
+    shell:
+        """
+        (date &&\
+        comm -12 <(sort {input.in1} | uniq) <(sort {input.in2} | uniq) > {output.out1} &&\
+        comm -23 <(sort {input.in1} | uniq) <(sort {input.in2} | uniq) > {output.out2} &&\
+        comm -13 <(sort {input.in1} | uniq) <(sort {input.in2} | uniq) > {output.out3} &&\
+        date) &> >(tee {log})
+        """
\ No newline at end of file