Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
IMP
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
External wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
IMP-dev
IMP
Commits
91ca23eb
Commit
91ca23eb
authored
9 years ago
by
Yohan Jarosz
Browse files
Options
Downloads
Patches
Plain Diff
add ht2-stat draw script
parent
2246a4df
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
docker/Dockerfile
+1
-1
1 addition, 1 deletion
docker/Dockerfile
lib/ht2-stat-draw.pl
+519
-0
519 additions, 0 deletions
lib/ht2-stat-draw.pl
with
520 additions
and
1 deletion
docker/Dockerfile
+
1
−
1
View file @
91ca23eb
...
@@ -23,7 +23,7 @@ RUN apt-get update \
...
@@ -23,7 +23,7 @@ RUN apt-get update \
&&
apt-get
install
-yq
libboost-filesystem1.54-dev libboost-program-options1.54-dev libboost-regex1.54-dev libboost-system1.54-dev libboost-thread1.54-dev libboost-system1.54-dev
\
&&
apt-get
install
-yq
libboost-filesystem1.54-dev libboost-program-options1.54-dev libboost-regex1.54-dev libboost-system1.54-dev libboost-thread1.54-dev libboost-system1.54-dev
\
&&
apt-get
install
-yq
bioperl
=
1.6.923-1
tabix
=
0.2.6-2 zlib1g zlib1g-dev samtools gnuplot python3
\
&&
apt-get
install
-yq
bioperl
=
1.6.923-1
tabix
=
0.2.6-2 zlib1g zlib1g-dev samtools gnuplot python3
\
&&
apt-get
install
-yq
r-base r-base-dev
\
&&
apt-get
install
-yq
r-base r-base-dev
\
&&
apt-get
install
-yq
libncurses5 libncurses5-dev
sudo
libatlas-base-dev python2.7 gfortran python-dev
libchart-clicker-perl
\
&&
apt-get
install
-yq
libncurses5 libncurses5-dev
sudo
libatlas-base-dev python2.7 gfortran python-dev
\
&&
update-alternatives
--set
java /usr/lib/jvm/java-7-openjdk-amd64/jre/bin/java
&&
update-alternatives
--set
java /usr/lib/jvm/java-7-openjdk-amd64/jre/bin/java
...
...
This diff is collapsed.
Click to expand it.
lib/ht2-stat-draw.pl
0 → 100755
+
519
−
0
View file @
91ca23eb
#!/usr/bin/perl
# HTQC - a high-throughput sequencing quality control toolkit
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
use
strict
;
use
warnings
;
use
File::
Path
;
use
File::Spec::
Functions
;
use
Getopt::
Long
;
# output file name base
my
$CYC_QUAL_1
=
'
cycle_quality_1
';
my
$CYC_QUAL_2
=
'
cycle_quality_2
';
my
$CYC_QUAL_1_BOX
=
'
cycle_quality_box_1
';
my
$CYC_QUAL_2_BOX
=
'
cycle_quality_box_2
';
my
$READS_QUAL
=
'
reads_quality
';
my
$LANE_TILE_QUAL_1
=
'
lane_tile_quality_1
';
my
$LANE_TILE_QUAL_2
=
'
lane_tile_quality_2
';
my
$CYC_COMP_1
=
'
cycle_composition_1
';
my
$CYC_COMP_2
=
'
cycle_composition_2
';
my
$UMASK_LEN
=
'
reads_length
';
my
$QUAL_QQ
=
'
quality_QQ
';
my
$dir_data
;
my
$WIDTH
=
600
;
my
$HEIGHT
=
400
;
my
$SZ_TITLE
=
12
;
my
$SZ_TEXT
=
9
;
my
$FORMAT
=
'
png
';
GetOptions
(
'
dir=s
'
=>
\
$dir_data
,
'
format=s
'
=>
\
$FORMAT
,
'
width=s
'
=>
\
$WIDTH
,
'
height=s
'
=>
\
$HEIGHT
,
'
title-size=s
'
=>
\
$SZ_TITLE
,
'
text-size=s
'
=>
\
$SZ_TEXT
,
'
help
'
=>
\
&show_help
);
#
# validate dir
#
die
"
data directory not specified
"
if
!
defined
$dir_data
;
die
"
data directory not exist
"
if
!-
d
$dir_data
;
my
$file_info
=
catfile
$dir_data
,
'
info.tab
';
die
"
info file not exist
"
if
!-
f
$file_info
;
#
# get info
#
open
INFO
,
'
<
',
$file_info
or
die
"
failed to open info file '
$file_info
': $!
";
my
$PAIRED
;
my
$LENGTH
;
my
$QUAL_FROM
;
my
$QUAL_TO
;
my
$MASK
;
while
(
<
INFO
>
)
{
chomp
;
my
(
$key
,
$value
)
=
split
/\t/
;
if
(
$key
eq
'
paired
'
)
{
$PAIRED
=
$value
eq
'
T
'
?
1
:
0
;
}
elsif
(
$key
eq
'
length
'
)
{
$LENGTH
=
$value
;
}
elsif
(
$key
eq
'
quality range
'
)
{
$value
=~
/^(\d+)-(\d+)$/
or
die
"
failed to parse quality range: '
$value
'
";
$QUAL_FROM
=
$
1
;
$QUAL_TO
=
$
2
;
}
elsif
(
$key
eq
'
mask
'
)
{
$MASK
=
$value
eq
'
T
'
?
1
:
0
;
}
}
close
INFO
;
#
# test the existence of gnuplot
#
my
$HAS_GNUPLOT
=
1
;
`
gnuplot -V
`;
if
(
$?
!=
0
)
{
print
STDERR
<<INFO_GNUPLOT_NOTFOUND;
Gnuplot is not found in your system. Plot scripts are generated but are not
rendered.
Please install Gnuplot using the package management system of your Unix
distribution, or visit Gnuplot website: http://sourceforge.net/projects/gnuplot
INFO_GNUPLOT_NOTFOUND
$HAS_GNUPLOT
=
0
;
}
#
# write gnuplot scripts
#
plot_cycle_quality_heatmap
(
$CYC_QUAL_1
,
1
);
plot_cycle_quality_heatmap
(
$CYC_QUAL_2
,
2
)
if
$PAIRED
;
plot_cycle_quality_box
(
$CYC_QUAL_1_BOX
,
1
);
plot_cycle_quality_box
(
$CYC_QUAL_2_BOX
,
2
)
if
$PAIRED
;
plot_reads_quality
(
$READS_QUAL
);
plot_lane_tile_quality
(
$LANE_TILE_QUAL_1
,
1
);
plot_lane_tile_quality
(
$LANE_TILE_QUAL_2
,
2
)
if
$PAIRED
;
plot_cycle_composition
(
$CYC_COMP_1
,
1
);
plot_cycle_composition
(
$CYC_COMP_2
,
2
)
if
$PAIRED
;
plot_reads_length
(
$UMASK_LEN
);
plot_quality_qq
(
$QUAL_QQ
)
if
$PAIRED
;
#
# subs
#
# gnuplot scripts
sub
plot_cycle_quality_heatmap
{
my
$base
=
shift
;
my
$side
=
shift
;
my
$file_tab
=
catfile
$dir_data
,
$base
.
'
.tab
';
my
$file_plot
=
catfile
$dir_data
,
$base
.
'
.gnuplot
';
my
$file_chart
=
catfile
$dir_data
,
$base
.
'
.
'
.
$FORMAT
;
my
$file_dat
=
catfile
$dir_data
,
$base
.
'
.dat
';
# read table
# write data file for gnuplot
open
my
$h_tab
,
'
<
',
$file_tab
or
die
"
failed to open '
$file_tab
' for read: $!
";
open
my
$h_dat
,
'
>
',
$file_dat
or
die
"
failed to open '
$file_dat
' for write: $!
";
my
@qual_list
;
my
$line
=
0
;
while
(
<
$h_tab
>
)
{
chomp
;
if
(
$line
==
0
)
{
(
undef
,
@qual_list
)
=
split
/\t/
;
}
else
{
my
(
$cyc
,
@values
)
=
split
/\t/
;
for
(
my
$i
=
0
;
$i
<
@values
;
$i
++
)
{
print
$h_dat
join
(
"
\t
",
$cyc
,
$qual_list
[
$i
],
$values
[
$i
]
),
"
\n
";
}
}
}
continue
{
$line
++
}
close
$h_tab
;
close
$h_dat
;
# write script
my
$x_min
=
-
0.5
;
my
$x_max
=
$LENGTH
+
0.5
;
my
$y_min
=
$QUAL_FROM
-
0.5
;
my
$y_max
=
$QUAL_TO
+
0.5
;
open
my
$h_plot
,
'
>
',
$file_plot
or
die
$!
;
print
$h_plot
<<HEREDOC;
set title "Cycle Quality of Reads $side" font "*,$SZ_TITLE"
unset key
set terminal $FORMAT size $WIDTH,$HEIGHT
set output "$file_chart"
set palette rgbformulae 21,22,23
set cblabel "Num reads" font "*,$SZ_TEXT"
set cbtics font "*,$SZ_TEXT"
set xrange [$x_min:$x_max]
set yrange [$y_min:$y_max]
set xlabel "Cycle" font "*,$SZ_TEXT"
set ylabel "Quality" font "*,$SZ_TEXT"
set xtics font "*,$SZ_TEXT"
set ytics font "*,$SZ_TEXT"
plot '$file_dat' using 1:2:3 with image
HEREDOC
close
$h_plot
;
# run gnuplot
if
(
$HAS_GNUPLOT
)
{
system
(
'
gnuplot
',
$file_plot
)
==
0
or
die
"
gnuplot failed
";
}
}
sub
plot_cycle_quality_box
{
my
$base
=
shift
;
my
$side
=
shift
;
my
$file_tab
=
catfile
$dir_data
,
$base
.
'
.tab
';
my
$file_plot
=
catfile
$dir_data
,
$base
.
'
.gnuplot
';
my
$file_chart
=
catfile
$dir_data
,
$base
.
'
.
'
.
$FORMAT
;
# write script
my
$x_min
=
-
0.5
;
my
$x_max
=
$LENGTH
+
0.5
;
my
$y_min
=
$QUAL_FROM
-
0.5
;
my
$y_max
=
$QUAL_TO
+
0.5
;
open
my
$h_plot
,
'
>
',
$file_plot
or
die
$!
;
print
$h_plot
<<HEREDOC;
set title "Cycle Quality of Reads $side" font "*,$SZ_TITLE"
unset key
set terminal $FORMAT size $WIDTH,$HEIGHT
set output "$file_chart"
set xrange [$x_min:$x_max]
set yrange [$y_min:$y_max]
set xlabel "Cycle" font "*,$SZ_TEXT"
set ylabel "Quality" font "*,$SZ_TEXT"
set xtics font "*,$SZ_TEXT"
set ytics font "*,$SZ_TEXT"
set boxwidth 0.8 relative
plot '$file_tab' using 1:3:2:6:5 with candlesticks lc rgbcolor '#00007f',\\
'$file_tab' using 1:7 with lines lc rgbcolor 'red'
HEREDOC
close
$h_plot
;
# run gnuplot
if
(
$HAS_GNUPLOT
)
{
system
(
'
gnuplot
',
$file_plot
)
==
0
or
die
"
gnuplot failed
";
}
}
sub
plot_reads_quality
{
my
$base
=
shift
;
my
$file_tab
=
catfile
$dir_data
,
$base
.
'
.tab
';
my
$file_plot
=
catfile
$dir_data
,
$base
.
'
.gnuplot
';
my
$file_chart
=
catfile
$dir_data
,
$base
.
'
.
'
.
$FORMAT
;
# write script
open
my
$h_plot
,
'
>
',
$file_plot
or
die
$!
;
print
$h_plot
<<HEREDOC;
set title "Reads Quality" font "*,$SZ_TITLE"
set terminal $FORMAT size $WIDTH,$HEIGHT
set key font "*,$SZ_TEXT"
set xlabel "Quality" font "*,$SZ_TEXT"
set ylabel "Num reads" font "*,$SZ_TEXT"
set xtics font "*,$SZ_TEXT"
set ytics font "*,$SZ_TEXT"
set output "$file_chart"
HEREDOC
print
$h_plot
"
plot '
$file_tab
' using 1:2 lc rgbcolor '#ff0000' title 'read 1 quality' with lines,
\\\n
";
print
$h_plot
"
'
$file_tab
' using 1:3 lc rgbcolor '#7f0000' title 'read 1 quality accum' with lines
";
if
(
$PAIRED
)
{
print
$h_plot
"
,
\\\n
";
print
$h_plot
"
'
$file_tab
' using 1:4 lc rgbcolor '#0000ff' title 'read 2 quality' with lines,
\\\n
";
print
$h_plot
"
'
$file_tab
' using 1:5 lc rgbcolor '#00007f' title 'read 2 quality accum' with lines
\n
";
}
else
{
print
$h_plot
"
\n
";
}
close
$h_plot
;
# run gnuplot
if
(
$HAS_GNUPLOT
)
{
system
(
'
gnuplot
',
$file_plot
)
==
0
or
die
"
gnuplot failed
";
}
}
sub
plot_lane_tile_quality
{
my
$base
=
shift
;
my
$side
=
shift
;
my
$file_tab
=
catfile
$dir_data
,
$base
.
'
.tab
';
my
$file_plot
=
catfile
$dir_data
,
$base
.
'
.gnuplot
';
my
$file_chart
=
catfile
$dir_data
,
$base
.
'
.
'
.
$FORMAT
;
my
$file_dat
=
catfile
$dir_data
,
$base
.
'
.dat
';
# read ticks
# create dat file
my
@titles
;
my
@ticks_list
;
open
my
$h_tab
,
'
<
',
$file_tab
or
die
"
failed to open '
$file_tab
' for read: $!
";
open
my
$h_dat
,
'
>
',
$file_dat
or
die
"
failed to open '
$file_dat
' for write: $!
";
my
$n
=
0
;
while
(
<
$h_tab
>
)
{
chomp
;
if
(
$n
==
0
)
{
(
undef
,
@titles
)
=
split
/\t/
;
}
else
{
my
(
$lane
,
$tile
,
@data
)
=
split
/\t/
;
push
@ticks_list
,
"
\"
lane
$lane
tile
$tile
\"
$n
";
print
$h_dat
join
(
"
\t
",
$n
,
@data
),
"
\n
";
}
}
continue
{
$n
++
}
close
$h_tab
;
close
$h_dat
;
my
$ticks_str
=
join
'
,
',
@ticks_list
;
# write script
open
my
$h_plot
,
'
>
',
$file_plot
or
die
$!
;
print
$h_plot
<<HEREDOC;
set title "Lane-Tile Quality for Reads $side" font "*,$SZ_TITLE"
set terminal $FORMAT size $WIDTH,$HEIGHT
set key outside font "*,$SZ_TEXT"
set xlabel "Tiles" font "*,$SZ_TEXT"
set ylabel "Num reads" font "*,$SZ_TEXT"
set xtics rotate 90 ($ticks_str) font "*,$SZ_TEXT"
set ytics font "*,$SZ_TEXT"
set style fill solid
set output "$file_chart"
plot '$file_dat' using 1:(\$5+\$4+\$3+\$2):(1) with boxes lc rgbcolor "blue" title "quality > 30", \\
'$file_dat' using 1:(\$4+\$3+\$2):(1) with boxes lc rgbcolor "green" title "20 < quality < 30", \\
'$file_dat' using 1:(\$3+\$2):(1) with boxes lc rgbcolor "red" title "10 < quality < 20", \\
'$file_dat' using 1:2:(1) with boxes lc rgbcolor "black" title "quality < 10"
HEREDOC
close
$h_plot
;
# run gnuplot
if
(
$HAS_GNUPLOT
)
{
system
(
'
gnuplot
',
$file_plot
)
==
0
or
die
"
gnuplot failed
";
}
}
sub
plot_cycle_composition
{
my
$base
=
shift
;
my
$side
=
shift
;
my
$file_tab
=
catfile
$dir_data
,
$base
.
'
.tab
';
my
$file_plot
=
catfile
$dir_data
,
$base
.
'
.gnuplot
';
my
$file_chart
=
catfile
$dir_data
,
$base
.
'
.
'
.
$FORMAT
;
my
$x_min
=
-
0.5
;
my
$x_max
=
$LENGTH
+
0.5
;
open
my
$h_plot
,
'
>
',
$file_plot
or
die
$!
;
print
$h_plot
<<heredoc;
set title "Cycle Composition for Read $side" font "*,$SZ_TITLE"
set terminal $FORMAT size $WIDTH,$HEIGHT
set style fill solid
set key outside font "*,$SZ_TEXT"
set xrange [$x_min:$x_max]
set xlabel "Cycle" font "*,$SZ_TEXT"
set ylabel "Num reads" font "*,$SZ_TEXT"
set xtics font "*,$SZ_TEXT"
set ytics font "*,$SZ_TEXT"
set output "$file_chart"
heredoc
print
$h_plot
"
plot
";
if
(
$MASK
)
{
print
$h_plot
<<heredoc;
'$file_tab' using 1:(\$7+\$6+\$5+\$4+\$3+\$2) with boxes lc rgbcolor "yellow" title "MASK", \\
heredoc
}
print
$h_plot
<<heredoc;
'$file_tab' using 1:(\$6+\$5+\$4+\$3+\$2) with boxes lc rgbcolor "grey" title "N", \\
'$file_tab' using 1:(\$5+\$4+\$3+\$2) with boxes lc rgbcolor "black" title "C", \\
'$file_tab' using 1:(\$4+\$3+\$2) with boxes lc rgbcolor "blue" title "G", \\
'$file_tab' using 1:(\$3+\$2) with boxes lc rgbcolor "red" title "T", \\
'$file_tab' using 1:2 with boxes lc rgbcolor "green" title "A"
heredoc
close
$h_plot
;
# run gnuplot
if
(
$HAS_GNUPLOT
)
{
system
(
'
gnuplot
',
$file_plot
)
==
0
or
die
"
gnuplot failed
";
}
}
sub
plot_reads_length
{
my
$base
=
shift
;
my
$file_tab
=
catfile
$dir_data
,
$base
.
'
.tab
';
my
$file_plot
=
catfile
$dir_data
,
$base
.
'
.gnuplot
';
my
$file_chart
=
catfile
$dir_data
,
$base
.
'
.
'
.
$FORMAT
;
open
my
$h_plot
,
'
>
',
$file_plot
or
die
$!
;
print
$h_plot
<<heredoc;
set title "Reads Length" font "*,$SZ_TITLE"
set terminal $FORMAT size $WIDTH,$HEIGHT
set key font "*,$SZ_TEXT"
set xlabel "Length" font "*,$SZ_TEXT"
set ylabel "Num reads" font "*,$SZ_TEXT"
set xtics font "*,$SZ_TEXT"
set ytics font "*,$SZ_TEXT"
set output "$file_chart"
heredoc
print
$h_plot
"
plot '
$file_tab
' using 1:2 lc rgbcolor '#ff0000' title 'read 1 length' with lines,
\\\n
";
print
$h_plot
"
'
$file_tab
' using 1:3 lc rgbcolor '#7f0000' title 'read 1 length accum' with lines
";
if
(
$PAIRED
)
{
print
$h_plot
"
,
\\\n
";
print
$h_plot
"
'
$file_tab
' using 1:4 lc rgbcolor '#0000ff' title 'read 2 length' with lines,
\\\n
";
print
$h_plot
"
'
$file_tab
' using 1:5 lc rgbcolor '#00007f' title 'read 2 length accum' with lines
\n
";
}
else
{
print
$h_plot
"
\n
";
}
close
$h_plot
;
# run gnuplot
if
(
$HAS_GNUPLOT
)
{
system
(
'
gnuplot
',
$file_plot
)
==
0
or
die
"
gnuplot failed
";
}
}
sub
plot_quality_qq
{
my
$base
=
shift
;
my
$file_tab
=
catfile
$dir_data
,
$base
.
'
.tab
';
my
$file_plot
=
catfile
$dir_data
,
$base
.
'
.gnuplot
';
my
$file_chart
=
catfile
$dir_data
,
$base
.
'
.
'
.
$FORMAT
;
my
$label_x
=
$QUAL_FROM
+
3
;
my
$label_y
=
$QUAL_TO
-
3
;
# get pearson r value
open
my
$h_tab
,
'
<
',
$file_tab
or
die
"
failed to open '
$file_tab
' for read: $!
";
my
$tab_header
=
<
$h_tab
>
;
chomp
$tab_header
;
$tab_header
=~
/# pearson correlation: (\S+)/
or
die
"
failed to parse pearson correlation from '
$tab_header
'
";
my
$pearson_text
=
$
1
;
close
$h_tab
;
# write gnuplot script
open
my
$h_plot
,
'
>
',
$file_plot
or
die
$!
;
print
$h_plot
<<heredoc;
set title "Read Pair Quality QQ plot" font "*,$SZ_TITLE"
set terminal $FORMAT size $WIDTH,$HEIGHT
set output "$file_chart"
unset key
set xrange [$QUAL_FROM:$QUAL_TO]
set yrange [$QUAL_FROM:$QUAL_TO]
set xtics font "*,$SZ_TEXT"
set ytics font "*,$SZ_TEXT"
set label 'r = $pearson_text' at $label_x,$label_y font "*,$SZ_TEXT"
plot '$file_tab' with lines
heredoc
close
$h_plot
;
# run gnuplot
if
(
$HAS_GNUPLOT
)
{
system
(
'
gnuplot
',
$file_plot
)
==
0
or
die
"
gnuplot failed
";
}
}
# help document
sub
show_help
{
print
<<heredoc;
$0 - render ht_stat outputs
Usage:
ht_stat_draw.pl --dir STAT_DIR
Options:
--dir the output directory of ht_stat.
--format picture format. [$FORMAT]
--width picture width. [$WIDTH]
--height picture height. [$HEIGHT]
--title-size font size for picture titles. [$SZ_TITLE]
--text-size font size for axis ticks, legends, etc.. [$SZ_TEXT]
--help show help
heredoc
exit
(
0
);
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment