Skip to content
Snippets Groups Projects
Commit 214f07ad authored by Valentina Galata's avatar Valentina Galata
Browse files

cdhit: changed renaming rule (issue #75), use '-s2 0.5' (issue #68)

parent ae21c8ce
No related branches found
No related tags found
No related merge requests found
......@@ -28,27 +28,29 @@ rule analysis_quast:
# Proteins
# for CD-HIT
rule analysis_bbmap_rename:
rule analysis_cdhit_rename:
input:
os.path.join(RESULTS_DIR, "annotation/prodigal/{rtype}/{tool}/proteins.faa")
output:
temp(os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype}_{tool}.faa"))
log:
"logs/analysis_bbmap_rename_{rtype}.{tool}.log"
# log:
# "logs/analysis_bbmap_rename_{rtype}.{tool}.log"
wildcard_constraints:
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
threads:
config["bbmap"]["threads"]
params:
# TODO: is this prefix okay?
prefix=lambda wildcards: "%s_%s" % (wildcards.rtype, wildcards.tool)
conda:
os.path.join(ENV_DIR, "bbmap.yaml")
threads: 1
# config["bbmap"]["threads"]
# params:
# # TODO: is this prefix okay?
# prefix=lambda wildcards: "%s_%s" % (wildcards.rtype, wildcards.tool)
# conda:
# os.path.join(ENV_DIR, "bbmap.yaml")
message:
"BBMAP: rename FASTA entries in {input}"
# "BBMAP: rename FASTA entries in {input}"
"CDHIT: rename FASTA entries in {input}"
shell:
"(date && rename.sh in={input} out={output} prefix={params.prefix} ignorejunk=t && date) &> {log}"
# "(date && rename.sh in={input} out={output} prefix={params.prefix} ignorejunk=t && date) &> {log}"
"sed 's/^>/>{wildcards.rtype}__{wildcards.tool}__/' {input} > {output}"
# https://github.com/weizhongli/cdhit/wiki/3.-User's-Guide#CDHIT2D
rule analysis_cdhit:
......@@ -73,8 +75,8 @@ rule analysis_cdhit:
"CD-HIT: {input}"
shell:
"(date && "
"cd-hit-2d -i {input.faa1} -i2 {input.faa2} -o {output.faa12} -c 0.9 -n 5 -d 0 -M 16000 -s2 0.9 -T {threads} && "
"cd-hit-2d -i {input.faa2} -i2 {input.faa1} -o {output.faa21} -c 0.9 -n 5 -d 0 -M 16000 -s2 0.9 -T {threads} && "
"cd-hit-2d -i {input.faa1} -i2 {input.faa2} -o {output.faa12} -c 0.9 -n 5 -d 0 -M 16000 -s2 0.5 -T {threads} && "
"cd-hit-2d -i {input.faa2} -i2 {input.faa1} -o {output.faa21} -c 0.9 -n 5 -d 0 -M 16000 -s2 0.5 -T {threads} && "
"date) &> {log}"
##################################################
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment