Skip to content
Snippets Groups Projects
Commit 6bce664c authored by Yohan Jarosz's avatar Yohan Jarosz
Browse files

Merge branch 'htstat_move' into test

parents 6554a483 3738e9a6
No related branches found
No related tags found
No related merge requests found
......@@ -5,6 +5,10 @@ build
KronaTools-2.5.tar
KronaTools-2.5/
db
conf/userconfig.imp.json
conf/gaia_PREPROCESSING_modules.sh
env/
run_IMP_A02-20150318.sh
build_gz/
run_tests.sh
*~
build_bz2/
run_A01.sh
{
"threads": 12,
"memory_total_gb": 48,
"memory_per_core_gb": 4,
"datadir": "TEST_DATA/raw",
"sample": "test",
"outputdir": "build",
"db_path": "db",
"preprocessing_filtering": false,
"trimmomatic": {
"adapter": "TruSeq2",
"jarfile": "/mnt/nfs/projects/ecosystem_biology/local_tools/IMP/dependencies/trimmomatic-0.32/dist/jar/trimmomatic-0.32.jar"
},
"human_filtering": {
"filter": "chr21"
},
"sortmerna": {
"scripts_path": "/mnt/nfs/projects/ecosystem_biology/local_tools/sortmerna-1.99-beta/scripts"
},
"prokka": {
"databases": [
"cm/Bacteria.i1i",
"genus/Staphylococcus.phr",
"hmm/CLUSTERS.hmm.h3f",
"kingdom/Archaea/sprot.phr"
]
}
}
#!/bin/bash -l
#source this file before execution of snakefile
module load Python
source /mnt/nfs/projects/ecosystem_biology/local_tools/IMP/bin/activate
module load MEGAHIT
module load BWA
module load SAMtools
module load BEDTools
module load OpenBLAS
module load Boost/1.53.0-ictce-5.3.0
export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/idba-1.1.1.icc/bin
module load CAP3
#symbolic links for prokka db
module load prokka
export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/tabix-0.2.6
export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/gkno_launcher/tools/freebayes/bin
export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/vcftools/bin
export PERL5LIB=$PERL5LIB:/mnt/nfs/projects/ecosystem_biology/local_tools/vcftools/perl
export PATH=$PATH:/mnt/nfs/projects/ecosystem_biology/local_tools/Platypus/Platypus_0.7.9.1
module load R
Rscript -e "install.packages('beanplot')"
module list
#The Boost C++ Libraries were successfully built!
#
#The following directory should be added to compiler include paths:
#
# /mnt/src_nfs1/projects/ecosystem_biology/local_tools/IMP/dependencies/boost_1_54_0
#
#The following directory should be added to linker library paths:
#
# /mnt/src_nfs1/projects/ecosystem_biology/local_tools/IMP/dependencies/boost_1_54_0/stage/lib
#
......@@ -494,6 +494,22 @@ def analysis_plot_files_output():
"assembly_stats.html",
"assembly_stats.txt"], dir='%s/results' % AN_OUT)
def analysis_stats_files_output():
return expand([
'{dir}/{stat_flag}/{rtype}/cycle_composition_{n}.{ext}',
'{dir}/{stat_flag}/{rtype}/cycle_quality_{n}.{ext}',
'{dir}/{stat_flag}/{rtype}/cycle_quality_box_{n}.{ext}',
'{dir}/{stat_flag}/{rtype}/info.tab',
'{dir}/{stat_flag}/{rtype}/lane_tile_quality_{n}.{ext}',
'{dir}/{stat_flag}/{rtype}/quality_QQ.{ext}',
'{dir}/{stat_flag}/{rtype}/reads_length.{ext}',
'{dir}/{stat_flag}/{rtype}/reads_quality.{ext}'],
n = ['1', '2'],
ext = ['gnuplot', 'png', 'tab'],
dir = P_OUT,
stat_flag = ['stats', 'stats_after_preprocessing'],
rtype = ['MG', 'MT'])
rule ANALYSIS_PLOT:
log:
AN_LOG
......@@ -550,4 +566,100 @@ rule ANALYSIS_KRONA_PLOT_MG:
ktImportText -o {output[1]} {output[0]}
"""
rule ANALYSIS_MG_QUALITY_STATS:
input:
expand('{dir}/{raw}', raw=['MG.R1.fq', 'MG.R2.fq'], dir=P_OUT)
output:
expand(['{dir}/cycle_composition_{n}.{ext}',
'{dir}/cycle_quality_{n}.{ext}',
'{dir}/cycle_quality_box_{n}.{ext}',
'{dir}/info.tab',
'{dir}/lane_tile_quality_{n}.{ext}',
'{dir}/quality_QQ.{ext}',
'{dir}/reads_length.{ext}',
'{dir}/reads_quality.{ext}'
], n=['1', '2'], ext=['gnuplot', 'png', 'tab'], dir='%s/stats/MG' % P_OUT)
benchmark:
"%s/benchmarks/PREPROCESSING_MG_QUALITY_STATS.json" % P_OUT
log:
P_LOG
shell:
"""
ht2-stat --encode=sanger -q -P -t {THREADS} -o {P_OUT}/stats/MG -i {input} >> {log} 2>&1
ht2-stat-draw.pl --dir {P_OUT}/stats/MG >> {log} 2>&1
"""
rule ANALYSIS_MG_PREPROCESSED_QUALITY_STATS:
log:
P_LOG
input:
expand('{dir}/{trim}', trim=[
'MG.R1.uniq.trimmed.fq',
'MG.R2.uniq.trimmed.fq'], dir=P_OUT)
output:
expand(['{dir}/cycle_composition_{n}.{ext}',
'{dir}/cycle_quality_{n}.{ext}',
'{dir}/cycle_quality_box_{n}.{ext}',
'{dir}/info.tab', '{dir}/lane_tile_quality_{n}.{ext}',
'{dir}/quality_QQ.{ext}',
'{dir}/reads_length.{ext}',
'{dir}/reads_quality.{ext}'
], n=['1', '2'], ext=['gnuplot', 'png', 'tab'], dir='%s/stats_after_preprocessing/MG' % P_OUT)
shell:
"""
ht2-stat --encode=sanger -q -P -t {THREADS} -o {P_OUT}/stats_after_preprocessing/MG -i {input} >> {log} 2>&1
ht2-stat-draw.pl --dir {P_OUT}/stats_after_preprocessing/MG >> {log} 2>&1
"""
benchmark:
"%s/benchmarks/PREPROCESSING_MG_PREPROCESSED_QUALITY_STATS.json" % P_OUT
rule ANALYSIS_MT_QUALITY_STATS:
log:
P_LOG
benchmark:
"%s/benchmarks/PREPROCESSING_MT_QUALITY_STATS.json" % P_OUT
input:
expand('{dir}/{raw}', raw=['MT.R1.fq', 'MT.R2.fq'], dir=P_OUT)
output:
expand(['{dir}/cycle_composition_{n}.{ext}',
'{dir}/cycle_quality_{n}.{ext}',
'{dir}/cycle_quality_box_{n}.{ext}',
'{dir}/info.tab', '{dir}/lane_tile_quality_{n}.{ext}',
'{dir}/quality_QQ.{ext}',
'{dir}/reads_length.{ext}',
'{dir}/reads_quality.{ext}'
], n=['1', '2'], ext=['gnuplot', 'png', 'tab'], dir='%s/stats/MT' % P_OUT)
shell:
"""
ht2-stat --encode=sanger -q -P -t {THREADS} -o {P_OUT}/stats/MT -i {input} >> {log} 2>&1
ht2-stat-draw.pl --dir {P_OUT}/stats/MT >> {log} 2>&1
"""
rule ANALYSIS_MT_PREPROCESSED_QUALITY_STATS:
log:
P_LOG
benchmark:
"%s/benchmarks/PREPROCESSING_MT_PREPROCESSED_QUALITY_STATS.json" % P_OUT
input:
expand('{dir}/{trim}', trim=[
'MT.R1.trimmed.fq',
'MT.R2.trimmed.fq',
], dir=P_OUT)
output:
expand(['{dir}/cycle_composition_{n}.{ext}',
'{dir}/cycle_quality_{n}.{ext}',
'{dir}/cycle_quality_box_{n}.{ext}',
'{dir}/info.tab', '{dir}/lane_tile_quality_{n}.{ext}',
'{dir}/quality_QQ.{ext}',
'{dir}/reads_length.{ext}',
'{dir}/reads_quality.{ext}'
], n=['1', '2'], ext=['gnuplot', 'png', 'tab'], dir='%s/stats_after_preprocessing/MT' % P_OUT)
shell:
"""
ht2-stat --encode=sanger -q -P -t {THREADS} -o {P_OUT}/stats_after_preprocessing/MT -i {input} >> {log} 2>&1
ht2-stat-draw.pl --dir {P_OUT}/stats_after_preprocessing/MT >> {log} 2>&1
"""
#rule ANALYSIS_KRONA_PLOT_MT:
......@@ -43,7 +43,8 @@ def analysis_output_files():
#####################
"MGMT.vizbin.with-contig-names.points"], dir=AN_OUT)
plots = analysis_plot_files_output()
return all + plots
stats = analysis_stats_files_output()
return all + plots + stats
# master command
rule ANALYSIS:
......
......@@ -82,7 +82,9 @@ rule ASSEMBLY_MT_MEGAHIT_2:
"""
MAX_READ_LEN=$(cat {input} | sed -n '1~4s/^@/>/p;2~4p' | \
awk '$0 ~ \">\" {{c=0\"\t\"; }} $0 !~ \">\" {{c+=length($0); max=(max>c)?max:c;}} END {{print max}}')
echo "Max read length: $MAX_READ_LEN"
MEMBYTES=$(({MEMTOTAL}*1000*1000*1000))
echo "Available memory in bytes: $MEMBYTES"
megahit -o {params.outdir} --cpu-only -m $MEMBYTES --mem-flag 1 -l $MAX_READ_LEN \
--input-cmd "cat {input}" -t {THREADS} --continue >> {log} 2>&1
"""
......
......@@ -11,29 +11,6 @@ rule PREPROCESSING_MG_GET_FILES:
prepare_input_files(input, 'MG')
rule PREPROCESSING_MG_QUALITY_STATS:
input:
expand('{dir}/{raw}', raw=['MG.R1.fq', 'MG.R2.fq'], dir=P_OUT)
output:
expand(['{dir}/cycle_composition_{n}.{ext}',
'{dir}/cycle_quality_{n}.{ext}',
'{dir}/cycle_quality_box_{n}.{ext}',
'{dir}/info.tab',
'{dir}/lane_tile_quality_{n}.{ext}',
'{dir}/quality_QQ.{ext}',
'{dir}/reads_length.{ext}',
'{dir}/reads_quality.{ext}'
], n=['1', '2'], ext=['gnuplot', 'png', 'tab'], dir='%s/stats/MG' % P_OUT)
benchmark:
"%s/benchmarks/PREPROCESSING_MG_QUALITY_STATS.json" % P_OUT
log:
P_LOG
shell:
"""
ht2-stat --encode=sanger -q -P -t {THREADS} -o {P_OUT}/stats/MG -i {input} >> {log} 2>&1
ht2-stat-draw.pl --dir {P_OUT}/stats/MG >> {log} 2>&1
"""
rule PREPROCESSING_MG_DEDUPLICATE:
log:
......@@ -49,7 +26,6 @@ rule PREPROCESSING_MG_DEDUPLICATE:
fastuniq -i <(echo -e "{input[0]}\\n{input[1]}") -o {output[0]} -p {output[1]} >> {log} 2>&1
"""
rule PREPROCESSING_MG_TRIM:
log:
P_LOG
......@@ -84,32 +60,6 @@ rule PREPROCESSING_MG_CAT_TRIMMED_SE:
benchmark:
"%s/benchmarks/PREPROCESSING_MG_CAT_TRIMMED_SE.json" % P_OUT
rule PREPROCESSING_MG_PREPROCESSED_QUALITY_STATS:
log:
P_LOG
input:
expand('{dir}/{trim}', trim=[
'MG.R1.uniq.trimmed.fq',
'MG.R2.uniq.trimmed.fq'], dir=P_OUT)
output:
expand(['{dir}/cycle_composition_{n}.{ext}',
'{dir}/cycle_quality_{n}.{ext}',
'{dir}/cycle_quality_box_{n}.{ext}',
'{dir}/info.tab', '{dir}/lane_tile_quality_{n}.{ext}',
'{dir}/quality_QQ.{ext}',
'{dir}/reads_length.{ext}',
'{dir}/reads_quality.{ext}'
], n=['1', '2'], ext=['gnuplot', 'png', 'tab'], dir='%s/stats_after_preprocessing/MG' % P_OUT)
shell:
"""
ht2-stat --encode=sanger -q -P -t {THREADS} -o {P_OUT}/stats_after_preprocessing/MG -i {input} >> {log} 2>&1
ht2-stat-draw.pl --dir {P_OUT}/stats_after_preprocessing/MG >> {log} 2>&1
"""
benchmark:
"%s/benchmarks/PREPROCESSING_MG_PREPROCESSED_QUALITY_STATS.json" % P_OUT
rule PREPROCESSING_MG_FILTER_HG:
log:
P_LOG
......
......@@ -8,28 +8,6 @@ rule PREPROCESSING_MT_GET_FILES:
run:
prepare_input_files(input, 'MT')
rule PREPROCESSING_MT_QUALITY_STATS:
log:
P_LOG
benchmark:
"%s/benchmarks/PREPROCESSING_MT_QUALITY_STATS.json" % P_OUT
input:
expand('{dir}/{raw}', raw=['MT.R1.fq', 'MT.R2.fq'], dir=P_OUT)
output:
expand(['{dir}/cycle_composition_{n}.{ext}',
'{dir}/cycle_quality_{n}.{ext}',
'{dir}/cycle_quality_box_{n}.{ext}',
'{dir}/info.tab', '{dir}/lane_tile_quality_{n}.{ext}',
'{dir}/quality_QQ.{ext}',
'{dir}/reads_length.{ext}',
'{dir}/reads_quality.{ext}'
], n=['1', '2'], ext=['gnuplot', 'png', 'tab'], dir='%s/stats/MT' % P_OUT)
shell:
"""
ht2-stat --encode=sanger -q -P -t {THREADS} -o {P_OUT}/stats/MT -i {input} >> {log} 2>&1
ht2-stat-draw.pl --dir {P_OUT}/stats/MT >> {log} 2>&1
"""
rule PREPROCESSING_MT_TRIM:
log:
P_LOG
......@@ -66,31 +44,6 @@ rule PREPROCESSING_MT_CAT_TRIMMED_SE:
"cat {input[0]} {input[1]} > {output}"
rule PREPROCESSING_MT_PREPROCESSED_QUALITY_STATS:
log:
P_LOG
benchmark:
"%s/benchmarks/PREPROCESSING_MT_PREPROCESSED_QUALITY_STATS.json" % P_OUT
input:
expand('{dir}/{trim}', trim=[
'MT.R1.trimmed.fq',
'MT.R2.trimmed.fq',
], dir=P_OUT)
output:
expand(['{dir}/cycle_composition_{n}.{ext}',
'{dir}/cycle_quality_{n}.{ext}',
'{dir}/cycle_quality_box_{n}.{ext}',
'{dir}/info.tab', '{dir}/lane_tile_quality_{n}.{ext}',
'{dir}/quality_QQ.{ext}',
'{dir}/reads_length.{ext}',
'{dir}/reads_quality.{ext}'
], n=['1', '2'], ext=['gnuplot', 'png', 'tab'], dir='%s/stats_after_preprocessing/MT' % P_OUT)
shell:
"""
ht2-stat --encode=sanger -q -P -t {THREADS} -o {P_OUT}/stats_after_preprocessing/MT -i {input} >> {log} 2>&1
ht2-stat-draw.pl --dir {P_OUT}/stats_after_preprocessing/MT >> {log} 2>&1
"""
rule PREPROCESSING_MT_FILTER_RRNA:
log:
......
......@@ -15,21 +15,7 @@ def preprocessing_output_files():
"""
Dynamically generate output files names based on parameters
"""
stats = expand([
'{dir}/{stat_flag}/{rtype}/cycle_composition_{n}.{ext}',
'{dir}/{stat_flag}/{rtype}/cycle_quality_{n}.{ext}',
'{dir}/{stat_flag}/{rtype}/cycle_quality_box_{n}.{ext}',
'{dir}/{stat_flag}/{rtype}/info.tab',
'{dir}/{stat_flag}/{rtype}/lane_tile_quality_{n}.{ext}',
'{dir}/{stat_flag}/{rtype}/quality_QQ.{ext}',
'{dir}/{stat_flag}/{rtype}/reads_length.{ext}',
'{dir}/{stat_flag}/{rtype}/reads_quality.{ext}'],
n=['1', '2'],
ext=['gnuplot', 'png', 'tab'],
dir=P_OUT,
stat_flag=['stats', 'stats_after_preprocessing'],
rtype=['MG', 'MT'])
return stats + [preprocessed_mt('R1'), preprocessed_mt('R2'), preprocessed_mt('SE')] + [preprocessed_mg('R1'), preprocessed_mg('R2'), preprocessed_mg('SE')]
return [preprocessed_mt('R1'), preprocessed_mt('R2'), preprocessed_mt('SE')] + [preprocessed_mg('R1'), preprocessed_mg('R2'), preprocessed_mg('SE')]
def preprocessed_mg(target):
......@@ -75,6 +61,7 @@ def prepare_input_files(input, rtype):
p, fname = os.path.split(inp)
outfilename = os.path.join(P_OUT, tmp)
print(inp, '=>', outfilename)
import bz2
# ungunzip
if os.path.splitext(fname)[-1] in ['.gz', '.gzip']:
with open(outfilename, 'wb') as whandle, gzip.open(inp, 'rb') as rhandle:
......
#load 3.3?
#module load Python
#~/.local/bin/virtualenv-3.2 /mnt/nfs/projects/ecosystem_biology/local_tools/IMP
source /mnt/nfs/projects/ecosystem_biology/local_tools/IMP/bin/actviate
#pip install -U pip
#pip install -U setuptools
pip install snakemake
pip install numpy
easy_install scipy
easy_install scikit-learn
#easy_install numpy??
#export PYTHONPATH=env/lib/python3.2/site-packages/
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment