diff --git a/workflow/rules/analysis.smk b/workflow/rules/analysis.smk index 307db9d1fe7dc9b179622fde88ff65dc93f15bf2..d60d3af071a93b492f5881227752ba12304ea524 100644 --- a/workflow/rules/analysis.smk +++ b/workflow/rules/analysis.smk @@ -28,27 +28,29 @@ rule analysis_quast: # Proteins # for CD-HIT -rule analysis_bbmap_rename: +rule analysis_cdhit_rename: input: os.path.join(RESULTS_DIR, "annotation/prodigal/{rtype}/{tool}/proteins.faa") output: temp(os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype}_{tool}.faa")) - log: - "logs/analysis_bbmap_rename_{rtype}.{tool}.log" + # log: + # "logs/analysis_bbmap_rename_{rtype}.{tool}.log" wildcard_constraints: rtype="|".join(READ_TYPES), tool="|".join(ASSEMBLERS) - threads: - config["bbmap"]["threads"] - params: - # TODO: is this prefix okay? - prefix=lambda wildcards: "%s_%s" % (wildcards.rtype, wildcards.tool) - conda: - os.path.join(ENV_DIR, "bbmap.yaml") + threads: 1 + # config["bbmap"]["threads"] + # params: + # # TODO: is this prefix okay? + # prefix=lambda wildcards: "%s_%s" % (wildcards.rtype, wildcards.tool) + # conda: + # os.path.join(ENV_DIR, "bbmap.yaml") message: - "BBMAP: rename FASTA entries in {input}" + # "BBMAP: rename FASTA entries in {input}" + "CDHIT: rename FASTA entries in {input}" shell: - "(date && rename.sh in={input} out={output} prefix={params.prefix} ignorejunk=t && date) &> {log}" + # "(date && rename.sh in={input} out={output} prefix={params.prefix} ignorejunk=t && date) &> {log}" + "sed 's/^>/>{wildcards.rtype}__{wildcards.tool}__/' {input} > {output}" # https://github.com/weizhongli/cdhit/wiki/3.-User's-Guide#CDHIT2D rule analysis_cdhit: @@ -73,8 +75,8 @@ rule analysis_cdhit: "CD-HIT: {input}" shell: "(date && " - "cd-hit-2d -i {input.faa1} -i2 {input.faa2} -o {output.faa12} -c 0.9 -n 5 -d 0 -M 16000 -s2 0.9 -T {threads} && " - "cd-hit-2d -i {input.faa2} -i2 {input.faa1} -o {output.faa21} -c 0.9 -n 5 -d 0 -M 16000 -s2 0.9 -T {threads} && " + "cd-hit-2d -i {input.faa1} -i2 {input.faa2} -o {output.faa12} -c 0.9 -n 5 -d 0 -M 16000 -s2 0.5 -T {threads} && " + "cd-hit-2d -i {input.faa2} -i2 {input.faa1} -o {output.faa21} -c 0.9 -n 5 -d 0 -M 16000 -s2 0.5 -T {threads} && " "date) &> {log}" ##################################################