Skip to content
Snippets Groups Projects
Commit d9110373 authored by Yohan Jarosz's avatar Yohan Jarosz
Browse files

remove de-duplicate step for MG data

parent 0d6c27d5
No related branches found
No related tags found
No related merge requests found
......@@ -11,32 +11,18 @@ rule PREPROCESSING_MG_GET_FILES:
prepare_input_files(input, 'MG')
rule PREPROCESSING_MG_DEDUPLICATE:
log:
P_LOG
input:
expand('{dir}/{raw}', raw=['MG.R1.fq', 'MG.R2.fq'], dir=P_OUT)
output:
expand('{dir}/{uniq}', uniq=['MG.R1.uniq.fq', 'MG.R2.uniq.fq'], dir=P_OUT)
benchmark:
"%s/benchmarks/PREPROCESSING_MG_DEDUPLICATE.json" % P_OUT
shell:
"""
fastuniq -i <(echo -e "{input[0]}\\n{input[1]}") -o {output[0]} -p {output[1]} >> {log} 2>&1
"""
rule PREPROCESSING_MG_TRIM:
log:
P_LOG
input:
expand('{dir}/{uniq}', uniq=['MG.R1.uniq.fq', 'MG.R2.uniq.fq'], dir=P_OUT),
expand('{dir}/{uniq}', uniq=['MG.R1.fq', 'MG.R2.fq'], dir=P_OUT),
DBPATH + "/adapters/adapters.done"
output:
expand('{dir}/{trim}', trim=[
'MG.R1.uniq.trimmed.fq',
'MG.SE1.uniq.trimmed.fq',
'MG.R2.uniq.trimmed.fq',
'MG.SE2.uniq.trimmed.fq'], dir=P_OUT)
'MG.R1.trimmed.fq',
'MG.SE1.trimmed.fq',
'MG.R2.trimmed.fq',
'MG.SE2.trimmed.fq'], dir=P_OUT)
shell:
"""
java -jar {config[trimmomatic][jarfile]} PE -threads {THREADS} {input[0]} {input[1]} {output} \
......@@ -56,10 +42,10 @@ rule PREPROCESSING_MG_CAT_TRIMMED_SE:
P_LOG
input:
expand('{dir}/{trim}', trim=[
'MG.SE1.uniq.trimmed.fq',
'MG.SE2.uniq.trimmed.fq'], dir=P_OUT)
'MG.SE1.trimmed.fq',
'MG.SE2.trimmed.fq'], dir=P_OUT)
output:
expand('{dir}/{cat}', cat='MG.SE.uniq.trimmed.fq', dir=P_OUT)
expand('{dir}/{cat}', cat='MG.SE.trimmed.fq', dir=P_OUT)
shell:
"cat {input[0]} {input[1]} > {output}"
benchmark:
......@@ -70,18 +56,18 @@ rule PREPROCESSING_MG_FILTER_HG:
P_LOG
input:
expand('{dir}/{trim}', trim=[
'MG.R1.uniq.trimmed.fq',
'MG.R2.uniq.trimmed.fq',
'MG.SE.uniq.trimmed.fq'], dir=P_OUT),
'MG.R1.trimmed.fq',
'MG.R2.trimmed.fq',
'MG.SE.trimmed.fq'], dir=P_OUT),
'{dir}/{ref}.fa'.format(dir=DBPATH + "/human", ref=config['human_filtering']['filter']),
expand(
"{p}/human/{filter}.{ext}", filter=config["human_filtering"]["filter"],
ext=['fa', 'fa.amb', 'fa.ann', 'fa.bwt', 'fa.pac', 'fa.sa'], p=DBPATH)
output:
expand('{dir}/{filter}', filter=expand([
'MG.R1.uniq.trimmed.{f}.fq',
'MG.R2.uniq.trimmed.{f}.fq',
'MG.SE.uniq.trimmed.{f}.fq'], f=config['human_filtering']['filter']), dir=P_OUT)
'MG.R1.trimmed.{f}.fq',
'MG.R2.trimmed.{f}.fq',
'MG.SE.trimmed.{f}.fq'], f=config['human_filtering']['filter']), dir=P_OUT)
benchmark:
"%s/benchmarks/PREPROCESSING_MG_FILTER_HG.json" % P_OUT
shell:
......
......@@ -28,12 +28,12 @@ def preprocessing_output_files():
def preprocessed_mg(target, dir=P_OUT):
if config['preprocessing_filtering']:
# with human filtering ON
return '{dir}/MG.{target}.uniq.trimmed.{f}.fq'.format(
return '{dir}/MG.{target}.trimmed.{f}.fq'.format(
f=config['human_filtering']['filter'],
dir=dir,
target=target)
# with human filtering OFF
return '{dir}/MG.{target}.uniq.trimmed.fq'.format(dir=dir, target=target)
return '{dir}/MG.{target}.trimmed.fq'.format(dir=dir, target=target)
def preprocessed_mt(target, dir=P_OUT):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment