Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • IMP/IMP
  • IMP-dev/IMP
2 results
Show changes
Showing
with 5373 additions and 850 deletions
source diff could not be displayed: it is too large. Options to address this: view the blob.
"""Pythonic command-line interface parser that will make you smile.
* http://docopt.org
* Repository and issue-tracker: https://github.com/docopt/docopt
* Licensed under terms of MIT license (see LICENSE-MIT)
* Copyright (c) 2013 Vladimir Keleshev, vladimir@keleshev.com
"""
import sys
import re
__all__ = ['docopt']
__version__ = '0.6.1'
class DocoptLanguageError(Exception):
"""Error in construction of usage-message by developer."""
class DocoptExit(SystemExit):
"""Exit in case user invoked program with incorrect arguments."""
usage = ''
def __init__(self, message=''):
SystemExit.__init__(self, (message + '\n' + self.usage).strip())
class Pattern(object):
def __eq__(self, other):
return repr(self) == repr(other)
def __hash__(self):
return hash(repr(self))
def fix(self):
self.fix_identities()
self.fix_repeating_arguments()
return self
def fix_identities(self, uniq=None):
"""Make pattern-tree tips point to same object if they are equal."""
if not hasattr(self, 'children'):
return self
uniq = list(set(self.flat())) if uniq is None else uniq
for i, child in enumerate(self.children):
if not hasattr(child, 'children'):
assert child in uniq
self.children[i] = uniq[uniq.index(child)]
else:
child.fix_identities(uniq)
def fix_repeating_arguments(self):
"""Fix elements that should accumulate/increment values."""
either = [list(child.children) for child in transform(self).children]
for case in either:
for e in [child for child in case if case.count(child) > 1]:
if type(e) is Argument or type(e) is Option and e.argcount:
if e.value is None:
e.value = []
elif type(e.value) is not list:
e.value = e.value.split()
if type(e) is Command or type(e) is Option and e.argcount == 0:
e.value = 0
return self
def transform(pattern):
"""Expand pattern into an (almost) equivalent one, but with single Either.
Example: ((-a | -b) (-c | -d)) => (-a -c | -a -d | -b -c | -b -d)
Quirks: [-a] => (-a), (-a...) => (-a -a)
"""
result = []
groups = [[pattern]]
while groups:
children = groups.pop(0)
parents = [Required, Optional, OptionsShortcut, Either, OneOrMore]
if any(t in map(type, children) for t in parents):
child = [c for c in children if type(c) in parents][0]
children.remove(child)
if type(child) is Either:
for c in child.children:
groups.append([c] + children)
elif type(child) is OneOrMore:
groups.append(child.children * 2 + children)
else:
groups.append(child.children + children)
else:
result.append(children)
return Either(*[Required(*e) for e in result])
class LeafPattern(Pattern):
"""Leaf/terminal node of a pattern tree."""
def __init__(self, name, value=None):
self.name, self.value = name, value
def __repr__(self):
return '%s(%r, %r)' % (self.__class__.__name__, self.name, self.value)
def flat(self, *types):
return [self] if not types or type(self) in types else []
def match(self, left, collected=None):
collected = [] if collected is None else collected
pos, match = self.single_match(left)
if match is None:
return False, left, collected
left_ = left[:pos] + left[pos + 1:]
same_name = [a for a in collected if a.name == self.name]
if type(self.value) in (int, list):
if type(self.value) is int:
increment = 1
else:
increment = ([match.value] if type(match.value) is str
else match.value)
if not same_name:
match.value = increment
return True, left_, collected + [match]
same_name[0].value += increment
return True, left_, collected
return True, left_, collected + [match]
class BranchPattern(Pattern):
"""Branch/inner node of a pattern tree."""
def __init__(self, *children):
self.children = list(children)
def __repr__(self):
return '%s(%s)' % (self.__class__.__name__,
', '.join(repr(a) for a in self.children))
def flat(self, *types):
if type(self) in types:
return [self]
return sum([child.flat(*types) for child in self.children], [])
class Argument(LeafPattern):
def single_match(self, left):
for n, pattern in enumerate(left):
if type(pattern) is Argument:
return n, Argument(self.name, pattern.value)
return None, None
@classmethod
def parse(class_, source):
name = re.findall('(<\S*?>)', source)[0]
value = re.findall('\[default: (.*)\]', source, flags=re.I)
return class_(name, value[0] if value else None)
class Command(Argument):
def __init__(self, name, value=False):
self.name, self.value = name, value
def single_match(self, left):
for n, pattern in enumerate(left):
if type(pattern) is Argument:
if pattern.value == self.name:
return n, Command(self.name, True)
else:
break
return None, None
class Option(LeafPattern):
def __init__(self, short=None, long=None, argcount=0, value=False):
assert argcount in (0, 1)
self.short, self.long, self.argcount = short, long, argcount
self.value = None if value is False and argcount else value
@classmethod
def parse(class_, option_description):
short, long, argcount, value = None, None, 0, False
options, _, description = option_description.strip().partition(' ')
options = options.replace(',', ' ').replace('=', ' ')
for s in options.split():
if s.startswith('--'):
long = s
elif s.startswith('-'):
short = s
else:
argcount = 1
if argcount:
matched = re.findall('\[default: (.*)\]', description, flags=re.I)
value = matched[0] if matched else None
return class_(short, long, argcount, value)
def single_match(self, left):
for n, pattern in enumerate(left):
if self.name == pattern.name:
return n, pattern
return None, None
@property
def name(self):
return self.long or self.short
def __repr__(self):
return 'Option(%r, %r, %r, %r)' % (self.short, self.long,
self.argcount, self.value)
class Required(BranchPattern):
def match(self, left, collected=None):
collected = [] if collected is None else collected
l = left
c = collected
for pattern in self.children:
matched, l, c = pattern.match(l, c)
if not matched:
return False, left, collected
return True, l, c
class Optional(BranchPattern):
def match(self, left, collected=None):
collected = [] if collected is None else collected
for pattern in self.children:
m, left, collected = pattern.match(left, collected)
return True, left, collected
class OptionsShortcut(Optional):
"""Marker/placeholder for [options] shortcut."""
class OneOrMore(BranchPattern):
def match(self, left, collected=None):
assert len(self.children) == 1
collected = [] if collected is None else collected
l = left
c = collected
l_ = None
matched = True
times = 0
while matched:
# could it be that something didn't match but changed l or c?
matched, l, c = self.children[0].match(l, c)
times += 1 if matched else 0
if l_ == l:
break
l_ = l
if times >= 1:
return True, l, c
return False, left, collected
class Either(BranchPattern):
def match(self, left, collected=None):
collected = [] if collected is None else collected
outcomes = []
for pattern in self.children:
matched, _, _ = outcome = pattern.match(left, collected)
if matched:
outcomes.append(outcome)
if outcomes:
return min(outcomes, key=lambda outcome: len(outcome[1]))
return False, left, collected
class Tokens(list):
def __init__(self, source, error=DocoptExit):
self += source.split() if hasattr(source, 'split') else source
self.error = error
@staticmethod
def from_pattern(source):
source = re.sub(r'([\[\]\(\)\|]|\.\.\.)', r' \1 ', source)
source = [s for s in re.split('\s+|(\S*<.*?>)', source) if s]
return Tokens(source, error=DocoptLanguageError)
def move(self):
return self.pop(0) if len(self) else None
def current(self):
return self[0] if len(self) else None
def parse_long(tokens, options):
"""long ::= '--' chars [ ( ' ' | '=' ) chars ] ;"""
long, eq, value = tokens.move().partition('=')
assert long.startswith('--')
value = None if eq == value == '' else value
similar = [o for o in options if o.long == long]
if tokens.error is DocoptExit and similar == []: # if no exact match
similar = [o for o in options if o.long and o.long.startswith(long)]
if len(similar) > 1: # might be simply specified ambiguously 2+ times?
raise tokens.error('%s is not a unique prefix: %s?' %
(long, ', '.join(o.long for o in similar)))
elif len(similar) < 1:
argcount = 1 if eq == '=' else 0
o = Option(None, long, argcount)
options.append(o)
if tokens.error is DocoptExit:
o = Option(None, long, argcount, value if argcount else True)
else:
o = Option(similar[0].short, similar[0].long,
similar[0].argcount, similar[0].value)
if o.argcount == 0:
if value is not None:
raise tokens.error('%s must not have an argument' % o.long)
else:
if value is None:
if tokens.current() in [None, '--']:
raise tokens.error('%s requires argument' % o.long)
value = tokens.move()
if tokens.error is DocoptExit:
o.value = value if value is not None else True
return [o]
def parse_shorts(tokens, options):
"""shorts ::= '-' ( chars )* [ [ ' ' ] chars ] ;"""
token = tokens.move()
assert token.startswith('-') and not token.startswith('--')
left = token.lstrip('-')
parsed = []
while left != '':
short, left = '-' + left[0], left[1:]
similar = [o for o in options if o.short == short]
if len(similar) > 1:
raise tokens.error('%s is specified ambiguously %d times' %
(short, len(similar)))
elif len(similar) < 1:
o = Option(short, None, 0)
options.append(o)
if tokens.error is DocoptExit:
o = Option(short, None, 0, True)
else: # why copying is necessary here?
o = Option(short, similar[0].long,
similar[0].argcount, similar[0].value)
value = None
if o.argcount != 0:
if left == '':
if tokens.current() in [None, '--']:
raise tokens.error('%s requires argument' % short)
value = tokens.move()
else:
value = left
left = ''
if tokens.error is DocoptExit:
o.value = value if value is not None else True
parsed.append(o)
return parsed
def parse_pattern(source, options):
tokens = Tokens.from_pattern(source)
result = parse_expr(tokens, options)
if tokens.current() is not None:
raise tokens.error('unexpected ending: %r' % ' '.join(tokens))
return Required(*result)
def parse_expr(tokens, options):
"""expr ::= seq ( '|' seq )* ;"""
seq = parse_seq(tokens, options)
if tokens.current() != '|':
return seq
result = [Required(*seq)] if len(seq) > 1 else seq
while tokens.current() == '|':
tokens.move()
seq = parse_seq(tokens, options)
result += [Required(*seq)] if len(seq) > 1 else seq
return [Either(*result)] if len(result) > 1 else result
def parse_seq(tokens, options):
"""seq ::= ( atom [ '...' ] )* ;"""
result = []
while tokens.current() not in [None, ']', ')', '|']:
atom = parse_atom(tokens, options)
if tokens.current() == '...':
atom = [OneOrMore(*atom)]
tokens.move()
result += atom
return result
def parse_atom(tokens, options):
"""atom ::= '(' expr ')' | '[' expr ']' | 'options'
| long | shorts | argument | command ;
"""
token = tokens.current()
result = []
if token in '([':
tokens.move()
matching, pattern = {'(': [')', Required], '[': [']', Optional]}[token]
result = pattern(*parse_expr(tokens, options))
if tokens.move() != matching:
raise tokens.error("unmatched '%s'" % token)
return [result]
elif token == 'options':
tokens.move()
return [OptionsShortcut()]
elif token.startswith('--') and token != '--':
return parse_long(tokens, options)
elif token.startswith('-') and token not in ('-', '--'):
return parse_shorts(tokens, options)
elif token.startswith('<') and token.endswith('>') or token.isupper():
return [Argument(tokens.move())]
else:
return [Command(tokens.move())]
def parse_argv(tokens, options, options_first=False):
"""Parse command-line argument vector.
If options_first:
argv ::= [ long | shorts ]* [ argument ]* [ '--' [ argument ]* ] ;
else:
argv ::= [ long | shorts | argument ]* [ '--' [ argument ]* ] ;
"""
parsed = []
while tokens.current() is not None:
if tokens.current() == '--':
return parsed + [Argument(None, v) for v in tokens]
elif tokens.current().startswith('--'):
parsed += parse_long(tokens, options)
elif tokens.current().startswith('-') and tokens.current() != '-':
parsed += parse_shorts(tokens, options)
elif options_first:
return parsed + [Argument(None, v) for v in tokens]
else:
parsed.append(Argument(None, tokens.move()))
return parsed
def parse_defaults(doc):
defaults = []
for s in parse_section('options:', doc):
# FIXME corner case "bla: options: --foo"
_, _, s = s.partition(':') # get rid of "options:"
split = re.split('\n[ \t]*(-\S+?)', '\n' + s)[1:]
split = [s1 + s2 for s1, s2 in zip(split[::2], split[1::2])]
options = [Option.parse(s) for s in split if s.startswith('-')]
defaults += options
return defaults
def parse_section(name, source):
pattern = re.compile('^([^\n]*' + name + '[^\n]*\n?(?:[ \t].*?(?:\n|$))*)',
re.IGNORECASE | re.MULTILINE)
return [s.strip() for s in pattern.findall(source)]
def formal_usage(section):
_, _, section = section.partition(':') # drop "usage:"
pu = section.split()
return '( ' + ' '.join(') | (' if s == pu[0] else s for s in pu[1:]) + ' )'
def extras(help, version, options, doc):
if help and any((o.name in ('-h', '--help')) and o.value for o in options):
print(doc.strip("\n"))
sys.exit()
if version and any(o.name == '--version' and o.value for o in options):
print(version)
sys.exit()
class Dict(dict):
def __repr__(self):
return '{%s}' % ',\n '.join('%r: %r' % i for i in sorted(self.items()))
def docopt(doc, argv=None, help=True, version=None, options_first=False):
"""Parse `argv` based on command-line interface described in `doc`.
`docopt` creates your command-line interface based on its
description that you pass as `doc`. Such description can contain
--options, <positional-argument>, commands, which could be
[optional], (required), (mutually | exclusive) or repeated...
Parameters
----------
doc : str
Description of your command-line interface.
argv : list of str, optional
Argument vector to be parsed. sys.argv[1:] is used if not
provided.
help : bool (default: True)
Set to False to disable automatic help on -h or --help
options.
version : any object
If passed, the object will be printed if --version is in
`argv`.
options_first : bool (default: False)
Set to True to require options precede positional arguments,
i.e. to forbid options and positional arguments intermix.
Returns
-------
args : dict
A dictionary, where keys are names of command-line elements
such as e.g. "--verbose" and "<path>", and values are the
parsed values of those elements.
Example
-------
>>> from docopt import docopt
>>> doc = '''
... Usage:
... my_program tcp <host> <port> [--timeout=<seconds>]
... my_program serial <port> [--baud=<n>] [--timeout=<seconds>]
... my_program (-h | --help | --version)
...
... Options:
... -h, --help Show this screen and exit.
... --baud=<n> Baudrate [default: 9600]
... '''
>>> argv = ['tcp', '127.0.0.1', '80', '--timeout', '30']
>>> docopt(doc, argv)
{'--baud': '9600',
'--help': False,
'--timeout': '30',
'--version': False,
'<host>': '127.0.0.1',
'<port>': '80',
'serial': False,
'tcp': True}
See also
--------
* For video introduction see http://docopt.org
* Full documentation is available in README.rst as well as online
at https://github.com/docopt/docopt#readme
"""
argv = sys.argv[1:] if argv is None else argv
usage_sections = parse_section('usage:', doc)
if len(usage_sections) == 0:
raise DocoptLanguageError('"usage:" (case-insensitive) not found.')
if len(usage_sections) > 1:
raise DocoptLanguageError('More than one "usage:" (case-insensitive).')
DocoptExit.usage = usage_sections[0]
options = parse_defaults(doc)
pattern = parse_pattern(formal_usage(DocoptExit.usage), options)
# [default] syntax for argument is disabled
#for a in pattern.flat(Argument):
# same_name = [d for d in arguments if d.name == a.name]
# if same_name:
# a.value = same_name[0].value
argv = parse_argv(Tokens(argv), list(options), options_first)
pattern_options = set(pattern.flat(Option))
for options_shortcut in pattern.flat(OptionsShortcut):
doc_options = parse_defaults(doc)
options_shortcut.children = list(set(doc_options) - pattern_options)
#if any_options:
# options_shortcut.children += [Option(o.short, o.long, o.argcount)
# for o in argv if type(o) is Option]
extras(help, version, argv, doc)
matched, left, collected = pattern.fix().match(argv)
if matched and left == []: # better error message if left?
return Dict((a.name, a.value) for a in (pattern.flat() + collected))
raise DocoptExit()
#!/usr/bin/perl
# HTQC - a high-throughput sequencing quality control toolkit
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
use strict;
use warnings;
use File::Path;
use File::Spec::Functions;
use Getopt::Long;
# output file name base
my $CYC_QUAL_1 = 'cycle_quality_1';
my $CYC_QUAL_2 = 'cycle_quality_2';
my $CYC_QUAL_1_BOX = 'cycle_quality_box_1';
my $CYC_QUAL_2_BOX = 'cycle_quality_box_2';
my $READS_QUAL = 'reads_quality';
my $LANE_TILE_QUAL_1 = 'lane_tile_quality_1';
my $LANE_TILE_QUAL_2 = 'lane_tile_quality_2';
my $CYC_COMP_1 = 'cycle_composition_1';
my $CYC_COMP_2 = 'cycle_composition_2';
my $UMASK_LEN = 'reads_length';
my $QUAL_QQ = 'quality_QQ';
my $dir_data;
my $WIDTH = 600;
my $HEIGHT = 400;
my $SZ_TITLE = 12;
my $SZ_TEXT = 9;
my $FORMAT = 'png';
GetOptions(
'dir=s' => \$dir_data,
'format=s' => \$FORMAT,
'width=s' => \$WIDTH,
'height=s' => \$HEIGHT,
'title-size=s' => \$SZ_TITLE,
'text-size=s' => \$SZ_TEXT,
'help' => \&show_help
);
#
# validate dir
#
die "data directory not specified" if !defined $dir_data;
die "data directory not exist" if !-d $dir_data;
my $file_info = catfile $dir_data, 'info.tab';
die "info file not exist" if !-f $file_info;
#
# get info
#
open INFO, '<', $file_info or die "failed to open info file '$file_info': $!";
my $PAIRED;
my $LENGTH;
my $QUAL_FROM;
my $QUAL_TO;
my $MASK;
while (<INFO>) {
chomp;
my ( $key, $value ) = split /\t/;
if ( $key eq 'paired' ) {
$PAIRED = $value eq 'T' ? 1 : 0;
}
elsif ( $key eq 'length' ) {
$LENGTH = $value;
}
elsif ( $key eq 'quality range' ) {
$value =~ /^(\d+)-(\d+)$/
or die "failed to parse quality range: '$value'";
$QUAL_FROM = $1;
$QUAL_TO = $2;
}
elsif ( $key eq 'mask' ) {
$MASK = $value eq 'T' ? 1 : 0;
}
}
close INFO;
#
# test the existence of gnuplot
#
my $HAS_GNUPLOT = 1;
`gnuplot -V`;
if ( $? != 0 ) {
print STDERR <<INFO_GNUPLOT_NOTFOUND;
Gnuplot is not found in your system. Plot scripts are generated but are not
rendered.
Please install Gnuplot using the package management system of your Unix
distribution, or visit Gnuplot website: http://sourceforge.net/projects/gnuplot
INFO_GNUPLOT_NOTFOUND
$HAS_GNUPLOT = 0;
}
#
# write gnuplot scripts
#
plot_cycle_quality_heatmap( $CYC_QUAL_1, 1 );
plot_cycle_quality_heatmap( $CYC_QUAL_2, 2 ) if $PAIRED;
plot_cycle_quality_box( $CYC_QUAL_1_BOX, 1 );
plot_cycle_quality_box( $CYC_QUAL_2_BOX, 2 ) if $PAIRED;
plot_reads_quality($READS_QUAL);
plot_lane_tile_quality( $LANE_TILE_QUAL_1, 1 );
plot_lane_tile_quality( $LANE_TILE_QUAL_2, 2 ) if $PAIRED;
plot_cycle_composition( $CYC_COMP_1, 1 );
plot_cycle_composition( $CYC_COMP_2, 2 ) if $PAIRED;
plot_reads_length($UMASK_LEN);
plot_quality_qq($QUAL_QQ) if $PAIRED;
#
# subs
#
# gnuplot scripts
sub plot_cycle_quality_heatmap {
my $base = shift;
my $side = shift;
my $file_tab = catfile $dir_data, $base . '.tab';
my $file_plot = catfile $dir_data, $base . '.gnuplot';
my $file_chart = catfile $dir_data, $base . '.' . $FORMAT;
my $file_dat = catfile $dir_data, $base . '.dat';
# read table
# write data file for gnuplot
open my $h_tab, '<', $file_tab
or die "failed to open '$file_tab' for read: $!";
open my $h_dat, '>', $file_dat
or die "failed to open '$file_dat' for write: $!";
my @qual_list;
my $line = 0;
while (<$h_tab>) {
chomp;
if ( $line == 0 ) {
( undef, @qual_list ) = split /\t/;
}
else {
my ( $cyc, @values ) = split /\t/;
for ( my $i = 0 ; $i < @values ; $i++ ) {
print $h_dat join( "\t", $cyc, $qual_list[$i], $values[$i] ),
"\n";
}
}
}
continue { $line++ }
close $h_tab;
close $h_dat;
# write script
my $x_min = -0.5;
my $x_max = $LENGTH + 0.5;
my $y_min = $QUAL_FROM - 0.5;
my $y_max = $QUAL_TO + 0.5;
open my $h_plot, '>', $file_plot or die $!;
print $h_plot <<HEREDOC;
set title "Cycle Quality of Reads $side" font "*,$SZ_TITLE"
unset key
set terminal $FORMAT size $WIDTH,$HEIGHT
set output "$file_chart"
set palette rgbformulae 21,22,23
set cblabel "Num reads" font "*,$SZ_TEXT"
set cbtics font "*,$SZ_TEXT"
set xrange [$x_min:$x_max]
set yrange [$y_min:$y_max]
set xlabel "Cycle" font "*,$SZ_TEXT"
set ylabel "Quality" font "*,$SZ_TEXT"
set xtics font "*,$SZ_TEXT"
set ytics font "*,$SZ_TEXT"
plot '$file_dat' using 1:2:3 with image
HEREDOC
close $h_plot;
# run gnuplot
if ($HAS_GNUPLOT) {
system( 'gnuplot', $file_plot ) == 0 or die "gnuplot failed";
}
}
sub plot_cycle_quality_box {
my $base = shift;
my $side = shift;
my $file_tab = catfile $dir_data, $base . '.tab';
my $file_plot = catfile $dir_data, $base . '.gnuplot';
my $file_chart = catfile $dir_data, $base . '.' . $FORMAT;
# write script
my $x_min = -0.5;
my $x_max = $LENGTH + 0.5;
my $y_min = $QUAL_FROM - 0.5;
my $y_max = $QUAL_TO + 0.5;
open my $h_plot, '>', $file_plot or die $!;
print $h_plot <<HEREDOC;
set title "Cycle Quality of Reads $side" font "*,$SZ_TITLE"
unset key
set terminal $FORMAT size $WIDTH,$HEIGHT
set output "$file_chart"
set xrange [$x_min:$x_max]
set yrange [$y_min:$y_max]
set xlabel "Cycle" font "*,$SZ_TEXT"
set ylabel "Quality" font "*,$SZ_TEXT"
set xtics font "*,$SZ_TEXT"
set ytics font "*,$SZ_TEXT"
set boxwidth 0.8 relative
plot '$file_tab' using 1:3:2:6:5 with candlesticks lc rgbcolor '#00007f',\\
'$file_tab' using 1:7 with lines lc rgbcolor 'red'
HEREDOC
close $h_plot;
# run gnuplot
if ($HAS_GNUPLOT) {
system( 'gnuplot', $file_plot ) == 0 or die "gnuplot failed";
}
}
sub plot_reads_quality {
my $base = shift;
my $file_tab = catfile $dir_data, $base . '.tab';
my $file_plot = catfile $dir_data, $base . '.gnuplot';
my $file_chart = catfile $dir_data, $base . '.' . $FORMAT;
# write script
open my $h_plot, '>', $file_plot or die $!;
print $h_plot <<HEREDOC;
set title "Reads Quality" font "*,$SZ_TITLE"
set terminal $FORMAT size $WIDTH,$HEIGHT
set key font "*,$SZ_TEXT"
set xlabel "Quality" font "*,$SZ_TEXT"
set ylabel "Num reads" font "*,$SZ_TEXT"
set xtics font "*,$SZ_TEXT"
set ytics font "*,$SZ_TEXT"
set output "$file_chart"
HEREDOC
print $h_plot
"plot '$file_tab' using 1:2 lc rgbcolor '#ff0000' title 'read 1 quality' with lines,\\\n";
print $h_plot
" '$file_tab' using 1:3 lc rgbcolor '#7f0000' title 'read 1 quality accum' with lines";
if ($PAIRED) {
print $h_plot ",\\\n";
print $h_plot
" '$file_tab' using 1:4 lc rgbcolor '#0000ff' title 'read 2 quality' with lines,\\\n";
print $h_plot
" '$file_tab' using 1:5 lc rgbcolor '#00007f' title 'read 2 quality accum' with lines\n";
}
else {
print $h_plot "\n";
}
close $h_plot;
# run gnuplot
if ($HAS_GNUPLOT) {
system( 'gnuplot', $file_plot ) == 0 or die "gnuplot failed";
}
}
sub plot_lane_tile_quality {
my $base = shift;
my $side = shift;
my $file_tab = catfile $dir_data, $base . '.tab';
my $file_plot = catfile $dir_data, $base . '.gnuplot';
my $file_chart = catfile $dir_data, $base . '.' . $FORMAT;
my $file_dat = catfile $dir_data, $base . '.dat';
# read ticks
# create dat file
my @titles;
my @ticks_list;
open my $h_tab, '<', $file_tab
or die "failed to open '$file_tab' for read: $!";
open my $h_dat, '>', $file_dat
or die "failed to open '$file_dat' for write: $!";
my $n = 0;
while (<$h_tab>) {
chomp;
if ( $n == 0 ) {
( undef, @titles ) = split /\t/;
}
else {
my ( $lane, $tile, @data ) = split /\t/;
push @ticks_list, "\"lane $lane tile $tile\" $n";
print $h_dat join( "\t", $n, @data ), "\n";
}
}
continue { $n++ }
close $h_tab;
close $h_dat;
my $ticks_str = join ', ', @ticks_list;
# write script
open my $h_plot, '>', $file_plot or die $!;
print $h_plot <<HEREDOC;
set title "Lane-Tile Quality for Reads $side" font "*,$SZ_TITLE"
set terminal $FORMAT size $WIDTH,$HEIGHT
set key outside font "*,$SZ_TEXT"
set xlabel "Tiles" font "*,$SZ_TEXT"
set ylabel "Num reads" font "*,$SZ_TEXT"
set xtics rotate 90 ($ticks_str) font "*,$SZ_TEXT"
set ytics font "*,$SZ_TEXT"
set style fill solid
set output "$file_chart"
plot '$file_dat' using 1:(\$5+\$4+\$3+\$2):(1) with boxes lc rgbcolor "blue" title "quality > 30", \\
'$file_dat' using 1:(\$4+\$3+\$2):(1) with boxes lc rgbcolor "green" title "20 < quality < 30", \\
'$file_dat' using 1:(\$3+\$2):(1) with boxes lc rgbcolor "red" title "10 < quality < 20", \\
'$file_dat' using 1:2:(1) with boxes lc rgbcolor "black" title "quality < 10"
HEREDOC
close $h_plot;
# run gnuplot
if ($HAS_GNUPLOT) {
system( 'gnuplot', $file_plot ) == 0 or die "gnuplot failed";
}
}
sub plot_cycle_composition {
my $base = shift;
my $side = shift;
my $file_tab = catfile $dir_data, $base . '.tab';
my $file_plot = catfile $dir_data, $base . '.gnuplot';
my $file_chart = catfile $dir_data, $base . '.' . $FORMAT;
my $x_min = -0.5;
my $x_max = $LENGTH + 0.5;
open my $h_plot, '>', $file_plot or die $!;
print $h_plot <<heredoc;
set title "Cycle Composition for Read $side" font "*,$SZ_TITLE"
set terminal $FORMAT size $WIDTH,$HEIGHT
set style fill solid
set key outside font "*,$SZ_TEXT"
set xrange [$x_min:$x_max]
set xlabel "Cycle" font "*,$SZ_TEXT"
set ylabel "Num reads" font "*,$SZ_TEXT"
set xtics font "*,$SZ_TEXT"
set ytics font "*,$SZ_TEXT"
set output "$file_chart"
heredoc
print $h_plot "plot";
if ($MASK) {
print $h_plot <<heredoc;
'$file_tab' using 1:(\$7+\$6+\$5+\$4+\$3+\$2) with boxes lc rgbcolor "yellow" title "MASK", \\
heredoc
}
print $h_plot <<heredoc;
'$file_tab' using 1:(\$6+\$5+\$4+\$3+\$2) with boxes lc rgbcolor "grey" title "N", \\
'$file_tab' using 1:(\$5+\$4+\$3+\$2) with boxes lc rgbcolor "black" title "C", \\
'$file_tab' using 1:(\$4+\$3+\$2) with boxes lc rgbcolor "blue" title "G", \\
'$file_tab' using 1:(\$3+\$2) with boxes lc rgbcolor "red" title "T", \\
'$file_tab' using 1:2 with boxes lc rgbcolor "green" title "A"
heredoc
close $h_plot;
# run gnuplot
if ($HAS_GNUPLOT) {
system( 'gnuplot', $file_plot ) == 0 or die "gnuplot failed";
}
}
sub plot_reads_length {
my $base = shift;
my $file_tab = catfile $dir_data, $base . '.tab';
my $file_plot = catfile $dir_data, $base . '.gnuplot';
my $file_chart = catfile $dir_data, $base . '.' . $FORMAT;
open my $h_plot, '>', $file_plot or die $!;
print $h_plot <<heredoc;
set title "Reads Length" font "*,$SZ_TITLE"
set terminal $FORMAT size $WIDTH,$HEIGHT
set key font "*,$SZ_TEXT"
set xlabel "Length" font "*,$SZ_TEXT"
set ylabel "Num reads" font "*,$SZ_TEXT"
set xtics font "*,$SZ_TEXT"
set ytics font "*,$SZ_TEXT"
set output "$file_chart"
heredoc
print $h_plot
"plot '$file_tab' using 1:2 lc rgbcolor '#ff0000' title 'read 1 length' with lines,\\\n";
print $h_plot
" '$file_tab' using 1:3 lc rgbcolor '#7f0000' title 'read 1 length accum' with lines";
if ($PAIRED) {
print $h_plot ",\\\n";
print $h_plot
" '$file_tab' using 1:4 lc rgbcolor '#0000ff' title 'read 2 length' with lines,\\\n";
print $h_plot
" '$file_tab' using 1:5 lc rgbcolor '#00007f' title 'read 2 length accum' with lines\n";
}
else {
print $h_plot "\n";
}
close $h_plot;
# run gnuplot
if ($HAS_GNUPLOT) {
system( 'gnuplot', $file_plot ) == 0 or die "gnuplot failed";
}
}
sub plot_quality_qq {
my $base = shift;
my $file_tab = catfile $dir_data, $base . '.tab';
my $file_plot = catfile $dir_data, $base . '.gnuplot';
my $file_chart = catfile $dir_data, $base . '.' . $FORMAT;
my $label_x = $QUAL_FROM + 3;
my $label_y = $QUAL_TO - 3;
# get pearson r value
open my $h_tab, '<', $file_tab
or die "failed to open '$file_tab' for read: $!";
my $tab_header = <$h_tab>;
chomp $tab_header;
$tab_header =~ /# pearson correlation: (\S+)/
or die "failed to parse pearson correlation from '$tab_header'";
my $pearson_text = $1;
close $h_tab;
# write gnuplot script
open my $h_plot, '>', $file_plot or die $!;
print $h_plot <<heredoc;
set title "Read Pair Quality QQ plot" font "*,$SZ_TITLE"
set terminal $FORMAT size $WIDTH,$HEIGHT
set output "$file_chart"
unset key
set xrange [$QUAL_FROM:$QUAL_TO]
set yrange [$QUAL_FROM:$QUAL_TO]
set xtics font "*,$SZ_TEXT"
set ytics font "*,$SZ_TEXT"
set label 'r = $pearson_text' at $label_x,$label_y font "*,$SZ_TEXT"
plot '$file_tab' with lines
heredoc
close $h_plot;
# run gnuplot
if ($HAS_GNUPLOT) {
system( 'gnuplot', $file_plot ) == 0 or die "gnuplot failed";
}
}
# help document
sub show_help {
print <<heredoc;
$0 - render ht_stat outputs
Usage:
ht_stat_draw.pl --dir STAT_DIR
Options:
--dir the output directory of ht_stat.
--format picture format. [$FORMAT]
--width picture width. [$WIDTH]
--height picture height. [$HEIGHT]
--title-size font size for picture titles. [$SZ_TITLE]
--text-size font size for axis ticks, legends, etc.. [$SZ_TEXT]
--help show help
heredoc
exit(0);
}
This diff is collapsed.
var koopa = null;
// template
function carousel_tmpl(selector){
return '<div id="${selector}" class="carousel slide" data-interval="false"><ol class="carousel-indicators"></ol><div class="carousel-inner" role="listbox"></div></div>';
}
function simple_carousel(selector){
return '<div class="col-sm-3 col-md-3"></div><div class="col-sm-6 col-md-6"><div id="${selector}" class="carousel slide" data-interval="false"><ol class="carousel-indicators"></ol><div class="carousel-inner" role="listbox"></div></div><a class="left carousel-control" role="button" data-slide="prev"><span class="glyphicon glyphicon-chevron-left" aria-hidden="true"></span><span class="sr-only">Previous</span></a><a class="right carousel-control" role="button" data-slide="next"><span class="glyphicon glyphicon-chevron-right" aria-hidden="true"></span><span class="sr-only">Next</span></a></div><div class="col-sm-3 col-md-3"></div>';
}
// define functions
// format time
String.prototype.toHHMMSS = function () {
var sec_num = parseInt(this, 10); // don't forget the second param
var hours = Math.floor(sec_num / 3600);
......@@ -25,6 +11,8 @@ String.prototype.toHHMMSS = function () {
var time = hours+'h:'+minutes+'m:'+seconds +"s";
return time;
}
// time chart rendering
function renderTimeChart(data, title, key, selector, total_runtime){
var converted = [];
for(item in data){
......@@ -71,6 +59,9 @@ function renderTimeChart(data, title, key, selector, total_runtime){
});
}
// bar chart rendrering
function renderBarChart(data, title, key, selector, total_runtime){
var converted = [];
var total = 0.0;
......@@ -167,83 +158,50 @@ function renderFiles(data, selector, total_runtime){
}
function renderConfiguration(data, selector){
var child = null, val = null, params = null;
var general_config = data['general'];
child = $("<li class='list-group-item'><ul class='list-group'><span class='label label-info'>General</span></ul></li>");
for(var gconf in general_config){
if(gconf == 'raws'){
val = general_config[gconf]['Metagenomics'];
child.append("<li class='list-group-item'><b>Metagenomics</b> : " + val + "</li>");
val = general_config[gconf]['Metatranscriptomics'];
child.append("<li class='list-group-item'><b>Metatranscriptomics</b> : " + val + "</li>");
} else {
val = general_config[gconf];
child.append("<li class='list-group-item'><b>" + gconf + "</b> : " + val + "</li>");
}
}
var child = null, val = null, params = null, p = null;
// rendering version information
child = $("<li class='list-group-item'><ul class='list-group'><span class='label label-info'>Version</span></ul></li>");
child.append("<li class='list-group-item'><b>" + data.IMP_VERSION + "</b></li>");
$(selector).append(child);
for(item in data){
if(item != 'general'){
child = $("<li class='list-group-item'><ul class='list-group'><span class='label label-info'>" + item + "</span></ul></li>");
params = data[item];
for(p in params){
val = params[p];
child.append("<li class='list-group-item'><b>" + p + "</b> : " + val + "</li>");
// rendering raws
child = $("<li class='list-group-item'><ul class='list-group'><span class='label label-info'>Data</span></ul></li>");
child.append("<li class='list-group-item'><b>Metagenomics</b> : " + data.MG + "</li>");
child.append("<li class='list-group-item'><b>Metatranscriptomics</b> : " + data.MT + "</li>");
$(selector).append(child);
// rendering non-params
child = $("<li class='list-group-item'><ul class='list-group'><span class='label label-info'>General parameters</span></ul></li>");
for(var gconf in data){
if(gconf != 'MG' && gconf != 'MT'){
val = data[gconf];
if(typeof val !== 'object'){
child.append("<li class='list-group-item'><b>" + gconf + "</b> : " + val + "</li>");
}
$(selector).append(child);
}
}
}
$(selector).append(child);
function renderStats(selector, path, files){
var fpath = null;
var node = $("<ul class='media-list'></ul>");
for(var idx in files){
fpath = path + '/' + files[idx];
node.append("<li class='media'><img class='img-responsive img-thumbnail' src='" + fpath + "'></img></li>");
}
$(selector).append(node);
}
// rendering nested params
for(var gconf in data){
if(gconf != 'raws'){
val = data[gconf];
if(typeof val === 'object'){
child = $("<li class='list-group-item'><ul class='list-group'><span class='label label-info'>" + gconf + "</span></ul></li>");
for(p in val){
child.append("<li class='list-group-item'><b>" + p + "</b> : " + val[p] + "</li>");
}
$(selector).append(child);
}
function renderStatsCarousel(selector, path, files){
var fpath = null, dt = null, sl = null;
var carousel = $(carousel_tmpl(selector));
var slides = $(carousel).find(".carousel-inner");
for(var idx in files){
fpath = path + '/' + files[idx];
sl = $("<div class='item'><img class='img-responsive img-thumbnail' src='" + fpath + "'></img></div>");
if(idx == 0){
sl.addClass("active");
}
slides.append(sl);
}
$('#' + selector + "-wrapper").append(carousel);
}
function renderSimpleCarousel(selector, path, files){
var fpath = null, dt = null, sl = null;
var carousel = $(simple_carousel(selector));
var slides = $(carousel).find(".carousel-inner");
for(var idx in files){
fpath = path + '/' + files[idx];
sl = $("<div class='item'><img class='img-responsive img-thumbnail' src='" + fpath + "'></img></div>");
if(idx == 0){
sl.addClass("active");
}
slides.append(sl);
}
$('#' + selector + "-wrapper").append(carousel);
}
// Trigger functions
// configuration
if(IMP_CONFIG){
renderConfiguration(IMP_CONFIG, '#configuration');
}
// runtime statistics
if(typeof IMP_STATS !== 'undefined') {
//renderTimeChart(IMP_STATS.rules, '% runtime per task ', 'mean-runtime', 'time-charts-mean', IMP_STATS.total_runtime);
......@@ -251,115 +209,116 @@ if(typeof IMP_STATS !== 'undefined') {
renderBarChart(IMP_STATS.rules, '% runtime per task ', 'mean-runtime', '#bar-chart', IMP_STATS.total_runtime);
} else {
$('#imp-stats').remove();
$('#imp-stats-alert').show();
//renderTimeChart(data['rules'], 'Max runtime', 'max-runtime', 'time-charts-max');
}
// ht_stats
renderStatsCarousel("carousel-mg-raw", "Preprocessing/stats/MG", [
"cycle_composition_2.png",
"cycle_quality_1.png",
"cycle_quality_2.png",
"cycle_quality_box_1.png",
"cycle_quality_box_2.png",
"lane_tile_quality_1.png",
"lane_tile_quality_2.png",
"quality_QQ.png",
"reads_length.png",
"reads_quality.png"
]);
renderStatsCarousel("carousel-mg-preprocessed", "Preprocessing/stats_after_preprocessing/MG", [
"cycle_composition_2.png",
"cycle_quality_1.png",
"cycle_quality_2.png",
"cycle_quality_box_1.png",
"cycle_quality_box_2.png",
"lane_tile_quality_1.png",
"lane_tile_quality_2.png",
"quality_QQ.png",
"reads_length.png",
"reads_quality.png"
]);
renderStatsCarousel("carousel-mt-raw", "Preprocessing/stats/MT", [
"cycle_composition_2.png",
"cycle_quality_1.png",
"cycle_quality_2.png",
"cycle_quality_box_1.png",
"cycle_quality_box_2.png",
"lane_tile_quality_1.png",
"lane_tile_quality_2.png",
"quality_QQ.png",
"reads_length.png",
"reads_quality.png"
]);
renderStatsCarousel("carousel-mt-preprocessed", "Preprocessing/stats_after_preprocessing/MT", [
"cycle_composition_2.png",
"cycle_quality_1.png",
"cycle_quality_2.png",
"cycle_quality_box_1.png",
"cycle_quality_box_2.png",
"lane_tile_quality_1.png",
"lane_tile_quality_2.png",
"quality_QQ.png",
"reads_length.png",
"reads_quality.png"
]);
renderSimpleCarousel('carousel-assembly', 'Analysis/results', [
'IMP-vizbin_length.png',
'IMP-vizbin_length_GC.png'
]);
renderSimpleCarousel('carousel-mapping', 'Analysis/results', [
'IMP-reads_density.png',
'IMP-rpkm_density.png',
'IMP-coverage_density.png',
'IMP-depth_density.png',
'IMP-vizbin_length_MGcov.png',
'IMP-vizbin_length_MTcov.png',
'IMP-vizbin_length_MGdepth.png',
'IMP-vizbin_length_MTdepth.png'
]);
renderSimpleCarousel('carousel-ration', 'Analysis/results', [
'IMP-vizbin_length_depthRatio.png',
'IMP-vizbin_length_rpkmRatio.png'
]);
renderSimpleCarousel('carousel-variant', 'Analysis/results', [
'IMP-var_count.png',
'IMP-var_density.png',
'IMP-vizbin_length_MGvardens.png',
'IMP-vizbin_length_MTvardens.png'
]);
// hide sections
//$("section.row").hide();
// $("section.row").prev("h2").click(function(){
// $(this).find(".glyphicon").toggleClass("glyphicon-plus glyphicon-minus");
// $(this).next("section").toggle();
// });
//carousel
$('.left.carousel-control').click(function(){
$('.carousel').carousel('next');
// BUTTONS
$('#but-raw').on('change', function(){
var val = $('input[name="raw-data"]:checked').val();
$("#ifr-raw-stat").attr('src', val);
});
$('.right.carousel-control').click(function(){
$('.carousel').carousel('prev');
$('#but-preprocess').on('change', function(){
var val = $('input[name="preprocess-data"]:checked').val();
$("#ifr-preprocess-stat").attr('src', val);
});
// show first tab
$('#tabs li:first > a').tab('show');
$('#but-annot').on('change', function(){
var val = $('input[name="annot-data"]:checked').val();
$("#ifr-kronaplot").attr('src', val);
});
$('#but-assembly').on('change', function(){
var val = $('input[name="assembly-data"]:checked').val();
if(val.endsWith('png')){
$("#assembly-wrapper").replaceWith("<div id='assembly-wrapper' class='row'><img class='img-responsive' src='" + val + "'/></div>");
} else {
$("#assembly-wrapper").replaceWith("<div id='assembly-wrapper' class='row'><iframe style='position: absolute; width: 100%;height: 100%; border: none' src='" + val + "'></iframe></div>");
}
if(val.indexOf('vizbin') > -1){
$('.vizbin-img').show();
$('.metaquast-p').hide();
} else {
$('.vizbin-img').hide();
$('.metaquast-p').show();
}
});
$('#but-mapping').on('change', function(){
var val = $('input[name="mapping-data"]:checked').val();
$("#mapping-wrapper").attr('src', val);
if(val.indexOf('vizbin') > -1){
$('.vizbin-img').show();
} else {
$('.vizbin-img').hide();
}
});
$('#but-ratio').on('change', function(){
var val = $('input[name="ratio-data"]:checked').val();
$("#ratio-wrapper").attr('src', val);
});
$('#but-variant').on('change', function(){
var val = $('input[name="variant-data"]:checked').val();
$("#variant-wrapper").attr('src', val);
});
// set last tab in cookie
$('#tabs a[data-toggle="tab"]').on('shown.bs.tab', function (e) {
Cookies('tab', $(e.target).attr('href'));
})
// show lat iopenned tab or first one
if(Cookies('tab')){
$('#tabs li > a[href=' + Cookies('tab') + ']').tab('show')
} else {
$('#tabs li:first > a').tab('show');
}
// load kronaplot if people click on tab. Make a lot of errors if loaded from start.
$('#annottab').on('show.bs.tab', function (e) {
if($("#MG-kronaplot").attr('src') === undefined){
$("#ifr-kronaplot").attr('src', 'Analysis/results/MG.gene_kegg_krona.html');
}
});
//load maxbin results
$.get('Binning/MaxBin/maxbin_res.summary', function(data) {
// start the table
var html = '<table>';
// split into lines
var rows = data.split("\n");
var headers = false;
var col;
var colstart = '<td>';
var colend = '</td>';
// parse lines
rows.forEach( function getvalues(localrow) {
var columns = localrow.split("\t");
if(columns.length > 1){
html += "<tr>";
if(!headers){
colstart = '<th>';
colend = '</th>';
headers = true;
} else {
colstart = '<td>';
colend = '</td>';
}
for(var i in columns){
col = columns[i];
html += colstart + col + colend;
}
html += "</tr>";
}
})
html += "</table>";
$('#maxbin-results').append(html);
//load html tables
$(".table-result").each(function(idx, node) {
var ident = node.id;
//console.log(ident);
$("#" + ident).load("MGMT/results/" + ident + ".html", function() {
//console.log(ident);
});
});
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<link rel="stylesheet" href="Report/lib/bootstrap.min.css">
<script src="Report/lib/d3.min.js"></script>
<script src="Report/lib/jquery-2.1.1.min.js"></script>
<script src="Report/lib/jquery.cookie.min.js"></script>
<script src="Report/data.js"></script>
<script src="Report/lib/bootstrap.min.js"></script>
<style>
body {
padding-top: 70px;
}
.bar {
fill: steelblue;
}
.axis text {
font: 10px sans-serif;
}
.axis path,
.axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
.img-result-container {
width: 350px;
float: left;
}
.table-result {
float: left;
margin: 10px;
}
.table-result table,td {
border: 1px solid steelblue;
padding: 5px;
text-align: center;
}
.imp-logo {
height: 150px;
width: 150px;
background: url(Report/lib/IMP_logo.png);
background-size: 100% 100%;
background-repeat: no-repeat;
}
.vizbin-logo {
width: 220px;
}
</style>
</head>
<body>
<div id="block-stats" class="container-fluid">
<div>
<div class="col-xs-2 col-md-2"><div class='imp-logo'></div>
</div>
<div id="tabs" role="tabpanel">
<ul id="tabnav" class="nav nav-tabs navbar-nav navbar-fixed-top navbar-inverse" role="tablist">
<a class="navbar-brand" href="#"> IMP <small>Integrated Meta-omic Pipeline </small></a>
<li><a href="#raw" aria-controls="raw" role="tab" data-toggle="tab">Raw</a></li>
<li><a href="#preprocess" aria-controls="preprocess" role="tab" data-toggle="tab">Preprocess</a></li>
<li><a href="#assembly" aria-controls="assembly" role="tab" data-toggle="tab">Assembly</a></li>
<li><a href="#mapping" aria-controls="mapping" role="tab" data-toggle="tab">Mapping</a></li>
<li><a id="annottab" href="#annotation" aria-controls="annotation" role="tab" data-toggle="tab">Annotation</a></li>
<li><a id="binningtab" href="#binning" aria-controls="binning" role="tab" data-toggle="tab">Binning</a></li>
<li><a href="#variants" aria-controls="variants" role="tab" data-toggle="tab">Variants</a></li>
<li><a href="#overview" aria-controls="overview" role="tab" data-toggle="tab">Workflow overview</a></li>
<li><a href="#configurations" aria-controls="configurations" role="tab" data-toggle="tab">Configuration</a></li>
<li><a href="#references" aria-controls="references" role="tab" data-toggle="tab">References</a></li>
<p class="navbar-text navbar-right"></p>
</ul>
</div>
<div id="wholecontent" class="container">
<div class="tab-content">
<!-- RAW STATISTICS -->
<div role="tabpanel" class="tab-pane fade" id="raw">
<section id="raw-section" class="row">
<h2><small>Raw data QC</small></h2>
<div id="but-raw" class="btn-group" data-toggle="buttons" name="raw-data">
<label class="btn btn-primary">
<input id="but-raw-mgr1" type="radio" value="Analysis/stats/mg/mg.r1.fq_fastqc.html" class="btn btn-default" name="raw-data">Metagenomic pair-1 FASTQC report
</label>
<label class="btn btn-primary">
<input id="but-raw-mgr2" type="radio" value="Analysis/stats/mg/mg.r2.fq_fastqc.html" class="btn btn-default" name="raw-data">Metagenomic pair-2 FASTQC report
</label>
</div>
<div class="row">
<iframe id="ifr-raw-stat" src="Analysis/stats/mg/mg.r1.fq_fastqc.html" style="position: absolute; width: 100%;height: 100%; border: none"></iframe>
</div>
</section>
</div>
<!-- PREPROCESS STATISTICS -->
<div role="tabpanel" class="tab-pane fade active" id="preprocess">
<section id="preprocess-section" class="row">
<h2><small>Preprocessed data QC</small></h2>
<div id="but-preprocess" class="btn-group" data-toggle="buttons" name="preprocess-data">
<label class="btn btn-primary">
<input id="but-preprocess-mgr1" type="radio" value="Analysis/stats/mg/mg.r1_preprocessed_fastqc.html" class="btn btn-default" name="preprocess-data">Metagenomic pair-1 FASTQC report
</label>
<label class="btn btn-primary">
<input id="but-preprocess-mgr2" type="radio" value="Analysis/stats/mg/mg.r2_preprocessed_fastqc.html" class="btn btn-default" name="preprocess-data">Metagenomic pair-2 FASTQC report
</label>
<label class="btn btn-primary">
<input id="but-preprocess-mgse" type="radio" value="Analysis/stats/mg/mg.se_preprocessed_fastqc.html" class="btn btn-default" name="preprocess-data">Metagenomic single-end FASTQC report
</label>
</div>
<div class="row">
<iframe id="ifr-preprocess-stat" src="Analysis/stats/mg/mg.r1_preprocessed_fastqc.html" style="position: absolute; width: 100%;height: 100%; border: none"></iframe>
</div>
</section>
</div>
<!-- FILTERING -->
<section id="filtering-section" class="row">
<div class="row">
<div class="table-result" id="mg_read_stats"></div>
<div class="table-result" id="mt_read_stats"></div>
</div>
</section>
<!-- ASseMBLY -->
<div role="tabpanel" class="tab-pane fade" id="assembly">
<section id="assembly-section" class="row">
<h2><small>Assembly visualization, QC and taxonomic annotation</small></h2>
<div id="but-assembly" class="row btn-group" data-toggle="buttons" name="assembly-data">
<label class="btn btn-primary active">
<input id="but-assembly1" type="radio" value="Analysis/results/IMP-vizbin_length.png" class="btn btn-default" name="assembly-data" checked>VizBin + contig length
</label>
<label class="btn btn-primary">
<input id="but-assembly2" type="radio" value="Analysis/results/IMP-vizbin_length_GC.png" class="btn btn-default" name="assembly-data">VizBin + GC %
</label>
<label class="btn btn-primary">
<input id="but-assembly3" type="radio" value="Analysis/results/quast/summary/report.html" class="btn btn-default" name="assembly-data">MetaQUAST report
<div class="metaquast-p" style="display:none">
<p class="text-center lead">Sub-report made with <a href="http://bioinf.spbau.ru/metaquast">MetaQUAST</a></p>
</div>
</label>
</div>
<div class="row">
<div id='assembly-wrapper' class='row'><img class='img-responsive' src='Analysis/results/IMP-vizbin_length.png'/></div>
</div>
<div class="vizbin-img row">
<p class="lead text-center">
In order to perform a human augmented binning of the contigs, please import the following
<a href="Analysis/mg.vizbin.filtered.fa">fasta file</a> to
</p>
</div>
<div class="vizbin-img row">
<div style="text-align:center;">
<a href="http://claczny.github.io/VizBin/"><img src='Report/lib/vizbin_logo.png'/></a>
</div>
</div>
</section>
</div>
<!-- MAPPING -->
<div role="tabpanel" class="tab-pane fade" id="mapping">
<section id="mapping-section" class="row">
<h2><small>Read mapping visualizations</small></h2>
<div id="but-mapping" class="row btn-group" data-toggle="buttons" name="mapping-data">
<!-- <label class="btn btn-primary active">
<input id="but-mapping1" type="radio" value="Analysis/results/IMP-rpkm_density.png" class="btn btn-default" name="mapping-data" checked>RPKM density beanplot
</label> -->
<label class="btn btn-primary">
<input id="but-mapping2" type="radio" value="Analysis/results/IMP-MG_coverage_density.png" class="btn btn-default" name="mapping-data">Coverage and depth density beanplot
</label>
<label class="btn btn-primary">
<input id="but-mapping3" type="radio" value="Analysis/results/IMP-MG_reads_density.png" class="btn btn-default" name="mapping-data">Reads density beanplot
</label>
<label class="btn btn-primary">
<input id="but-mapping4" type="radio" value="Analysis/results/IMP-MG_vizbin_length_cov.png" class="btn btn-default" name="mapping-data">VizBin + metagenomic coverage
</label>
<!-- <label class="btn btn-primary">
<input id="but-mapping5" type="radio" value="Analysis/results/IMP-vizbin_length_mgcov.png" class="btn btn-default" name="mapping-data">VizBin + metatranscriptomic coverage
</label> -->
<label class="btn btn-primary">
<input id="but-mapping6" type="radio" value="Analysis/results/IMP-MG_vizbin_length_depth.png" class="btn btn-default" name="mapping-data">VizBin + metagenomic depth of coverage
</label>
<!-- <label class="btn btn-primary">
<input id="but-mapping7" type="radio" value="Analysis/results/IMP-vizbin_length_mgdepth.png" class="btn btn-default" name="mapping-data">VizBin + metatranscriptomic depth of coverage
</label> -->
</div>
<div class="row">
<img id='mapping-wrapper' class='img-responsive' src='Analysis/results/IMP-MG_reads_density.png'/>
</div>
<div class="vizbin-img row">
<p class="lead text-center">
In order to perform a human augmented binning of the contigs, please import the following
<a href="Analysis/mg.vizbin.filtered.fa">fasta file</a> to
</p>
</div>
<div class="vizbin-img row">
<div style="text-align:center;">
<a href="http://claczny.github.io/VizBin/"><img src='Report/lib/vizbin_logo.png'/></a>
</div>
</div>
</section>
</div>
<!-- ANNOTATION -->
<div role="tabpanel" class="tab-pane fade" id="annotation">
<section id="annotation-section" class="row">
<h1><small>KEGG Ontology based Functional Krona Chart</small></h1>
<div id="but-annot" class="btn-group" data-toggle="buttons" name="annot-data">
<label class="btn btn-primary">
<input id="but-annot-mg" type="radio" value="Analysis/results/mg.gene_kegg_krona.html" class="btn btn-default" name="annot-data">Metatranscriptome
</label>
</div>
<div class="row">
<iframe id="ifr-kronaplot" style="position: absolute; width: 80%;height: 80%; border: none"></iframe>
</div>
</section>
</div>
<!-- <div role="tabpanel" class="tab-pane fade" id="annotationmt">
<section id="annotation-section" class="row">
<h1>mt annotation <small>Kronaplot</small></h1>
<div class="row">
<iframe id="mt-kronaplot" src="Analysis/results/mt.gene_kegg_krona.html" style="position: absolute; width: 80%;height: 80%; border: none"></iframe>
</div>
</section>
</div> -->
<!-- BINNING -->
<div role="tabpanel" class="tab-pane fade" id="binning">
<section id="binning-section" class="row">
<h1><small>MaxBin results</small></h1>
<div id="but-binning" class="row btn-group" data-toggle="buttons" name="binning-data">
<label class="btn btn-primary active">
<input id="but-binning1" type="radio" value="Binning/MaxBin/IMP-MaxBin-vizbin_length_bundance.png" class="btn btn-default" name="binning-data" checked>MaxBin bins
</label>
<label class="btn btn-primary">
<input id="but-binning2" type="radio" value="Binning/MaxBin/IMP-MaxBin-vizbin_length_completeness_abundance.png" class="btn btn-default" name="binning-data">MaxBin bin completness
</label>
</div>
<div class="row">
<img id='binning-wrapper' class='img-responsive' src='Binning/MaxBin/IMP-MaxBin-vizbin_length_bundance.png'/>
</div>
<h4>
MaxBin bin information
</h4>
<div id="maxbin-results" class="row"></div>
<p class="lead">
Powered by <a href="https://microbiomejournal.biomedcentral.com/articles/10.1186/2049-2618-2-26">MaxBin</a> and <a href="http://claczny.github.io/VizBin/">VizBin</a>.
</p>
</section>
</div>
<!-- VARIANTS -->
<div role="tabpanel" class="tab-pane fade" id="variants">
<section id="variants-section" class="row">
<h2><small>Variants </small></h2>
<div id="but-variant" class="row btn-group" data-toggle="buttons" name="variant-data">
<label class="btn btn-primary active">
<input id="but-variant1" type="radio" value="Analysis/results/IMP-MG_var_count.png" class="btn btn-default" name="variant-data" checked>Variant count and density beanplot
</label>
<!-- <label class="btn btn-primary">
<input id="but-variant2" type="radio" value="Analysis/results/IMP-var_density.png" class="btn btn-default" name="variant-data">Variant density beanplot
</label> -->
<label class="btn btn-primary">
<input id="but-variant4" type="radio" value="Analysis/results/IMP-MG_vizbin_length_vardens.png" class="btn btn-default" name="variant-data">VizBin + metagenomic variant density
</label>
</div>
<div class="row">
<img id='variant-wrapper' class='img-responsive' src='Analysis/results/IMP-MG_var_count.png'/>
</div>
<div class="row">
<p class="lead vizbin-img text-center" style="display:none">
In order to perform a human augmented binning of the contigs, please import the following
<a href="Analysis/mg.vizbin.filtered.fa">fasta file</a> to
</p>
</div>
<div class="vizbin-img row" style="display:none">
<div style="text-align:center;">
<a href="http://claczny.github.io/VizBin/"><img src='Report/lib/vizbin_logo.png'/></a>
</div>
</div>
</section>
</div>
<!-- IMP STATS -->
<div role="tabpanel" class="tab-pane fade" id="overview">
<section class="row">
<section id="impstatistics-section">
<h2><small>Statistics </small></h2>
<div id="imp-stats" class="row">
<div id="basic-info"></div>
<div id="time-charts-mean"></div>
<div id="bar-chart"></div>
</div>
</section>
<!-- DIAGRAMS -->
<span id="diagrams"></span>
<section id="diagrams-section">
<h2><small>Workflow diagram </small></h2>
<div id="imp-diagram" class="row">
<p>The workflow is also available in <a href="Report/workflow.pdf">PDF</a>.</p>
<img src="Report/workflow.png"/>
</div>
</section>
</section>
</div>
<!-- IMP CONFIG -->
<div role="tabpanel" class="tab-pane fade" id="configurations">
<section id="config-section" class="row">
<h2><small>Configuration </small></h2>
<div>
IMP has run with the configuration file saved at: <a href="Report/config.json">config.json</a>.
<ul id="configuration" class="list-group">
</ul>
</div>
</section>
<!-- IMP LOG -->
<span id="log"></span>
<section id="log-section" class="row">
<h2><small>Log </small></h2>
<div>
<p>The log files register all commands that have been run by IMP.</p>
</div>
<div>
<p>The log are accessible in here IMP steps:
<ul>
<li><a href="Preprocessing/Preprocessing.log">Preprocessing</a></li>
<li><a href="Assembly/Assembly.log">Assembly</a></li>
<li><a href="Analysis/Analysis.log">Analysis</a></li>
<li><a href="Util/Util.log">Util</a></li>
</ul>
</p>
</div>
</section>
<section id="files-section" class="row">
<h2><small>Result files </small></h2>
<p>All results files can be found in the <a href=".">root directory</a> of this report.</p>
</section>
<br/><br/>
</div>
<!-- REFERENCES -->
<div role="tabpanel" class="tab-pane fade" id="references">
<section id="references-section" class="row">
<h2><small>Please cite the pre-print article on <a href="http://biorxiv.org/">http://biorxiv.org</a>.</small></h2>
<div>
<p class="lead">
<strong>IMP: a pipeline for reproducible integrated metagenomic and metatranscriptomic analyses</strong><br/>
<small>Shaman Narayanasamy†, Yohan Jarosz†, Emilie E.L. Muller, Cédric C. Laczny, Malte Herold1, Anne Kaysen, Anna Heintz-Buschart, Nicolás Pinel, Patrick May, and Paul Wilmes*</small>
</p>
</div>
</section>
</div>
</div>
</div>
</div>
</div>
<script src="Report/lib/imp.js"></script>
</body>
</html>
// format time
String.prototype.toHHMMSS = function () {
var sec_num = parseInt(this, 10); // don't forget the second param
var hours = Math.floor(sec_num / 3600);
var minutes = Math.floor((sec_num - (hours * 3600)) / 60);
var seconds = sec_num - (hours * 3600) - (minutes * 60);
if (hours < 10) {hours = "0"+hours;}
if (minutes < 10) {minutes = "0"+minutes;}
if (seconds < 10) {seconds = "0"+seconds;}
var time = hours+'h:'+minutes+'m:'+seconds +"s";
return time;
}
// time chart rendering
function renderTimeChart(data, title, key, selector, total_runtime){
var converted = [];
for(item in data){
converted.push({
label: item,
value: data[item][key],
});
}
var pie = new d3pie(selector, {
header: {
title: {
text: title
},
subtitle: {
text: (total_runtime).toString().toHHMMSS() + " total."
},
location: "pie-center",
},
data: {
//sortOrder: "value-asc",
content: converted
},
size: {
canvasHeight: 500,
canvasWidth: 500,
pieInnerRadius: "80%",
pieOuterRadius: null
},
labels: {
outer: {
format: "label",
hideWhenLessThanPercentage: 0,
pieDistance: 30
},
inner: {
format: "percentage",
hideWhenLessThanPercentage: 4
},
lines: {
enabled: true,
style: "curved",
}
}
});
}
// bar chart rendrering
function renderBarChart(data, title, key, selector, total_runtime){
var converted = [];
var total = 0.0;
for(item in data){
converted.push({
label: item,
value: data[item][key],
});
total += data[item][key];
}
converted.sort(function(a, b){
return b.value - a.value;
});
function percent(value){
var v = (value * 100 / total).toFixed(0);
if(v == 0){
return "<1%";
}
return v + "%";
}
var margin = {top: 30, right: 50, bottom: 10, left: 400},
width = 960 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var x = d3.scale.linear()
.range([0, width])
var y = d3.scale.ordinal()
.rangeRoundBands([0, height], .2);
var xAxis = d3.svg.axis()
.scale(x)
.orient("top");
var svg = d3.select(selector).append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
x.domain(d3.extent(converted, function(d) { return d.value; })).nice();
y.domain(converted.map(function(d) { return d.label; }));
var bar = svg.selectAll(".bar")
.data(converted)
.enter().append("g")
.attr("transform", function(d){
return "translate(" + x(Math.min(0, d.value)) + "," + y(d.label) + ")"});
bar.append("rect")
.attr("class", "bar")
.attr("width", function(d) { return Math.abs(x(d.value) - x(0)); })
.attr("height", y.rangeBand());
bar.append("text")
.attr("x", -6)
.attr("y", y.rangeBand())
.attr("dy", ".35em")
.style("text-anchor", "end")
.text(function(d) { return d.label; });
bar.append("text")
.attr("x", function(d){
return width + 12;
})
.attr("dy", ".35em")
.style("text-anchor", "end")
.text(function(d) { return percent(d.value); });
// axis
svg.append("g")
.attr("class", "x axis")
.call(xAxis);
svg.append("g")
.attr("class", "y axis")
.append("line")
.attr("x1", x(0))
.attr("x2", x(0))
.attr("y2", height);
function type(d) {
d.value = +d.value;
return d;
}
}
function renderFiles(data, selector, total_runtime){
$('#basic-info').html("IMP has run in <span class='label label-success'>" + (total_runtime).toString().toHHMMSS()+ "</span> and <span class='label label-success'>" + Object.keys(data).length + " files were generated</span>");
}
function renderConfiguration(data, selector){
var child = null, val = null, params = null, p = null;
// rendering version information
child = $("<li class='list-group-item'><ul class='list-group'><span class='label label-info'>Version</span></ul></li>");
child.append("<li class='list-group-item'><b>" + data.IMP_VERSION + "</b></li>");
$(selector).append(child);
// rendering raws
child = $("<li class='list-group-item'><ul class='list-group'><span class='label label-info'>Data</span></ul></li>");
child.append("<li class='list-group-item'><b>Metagenomics</b> : " + data.MG + "</li>");
child.append("<li class='list-group-item'><b>Metatranscriptomics</b> : " + data.MT + "</li>");
$(selector).append(child);
// rendering non-params
child = $("<li class='list-group-item'><ul class='list-group'><span class='label label-info'>General parameters</span></ul></li>");
for(var gconf in data){
if(gconf != 'MG' && gconf != 'MT'){
val = data[gconf];
if(typeof val !== 'object'){
child.append("<li class='list-group-item'><b>" + gconf + "</b> : " + val + "</li>");
}
}
}
$(selector).append(child);
// rendering nested params
for(var gconf in data){
if(gconf != 'raws'){
val = data[gconf];
if(typeof val === 'object'){
child = $("<li class='list-group-item'><ul class='list-group'><span class='label label-info'>" + gconf + "</span></ul></li>");
for(p in val){
child.append("<li class='list-group-item'><b>" + p + "</b> : " + val[p] + "</li>");
}
$(selector).append(child);
}
}
}
}
if(IMP_CONFIG){
renderConfiguration(IMP_CONFIG, '#configuration');
}
// runtime statistics
if(typeof IMP_STATS !== 'undefined') {
//renderTimeChart(IMP_STATS.rules, '% runtime per task ', 'mean-runtime', 'time-charts-mean', IMP_STATS.total_runtime);
renderFiles(IMP_STATS.files, '#files-per-samples', IMP_STATS.total_runtime);
renderBarChart(IMP_STATS.rules, '% runtime per task ', 'mean-runtime', '#bar-chart', IMP_STATS.total_runtime);
} else {
$('#imp-stats').remove();
}
// BUTTONS
$('#but-raw').on('change', function(){
var val = $('input[name="raw-data"]:checked').val();
$("#ifr-raw-stat").attr('src', val);
});
$('#but-preprocess').on('change', function(){
var val = $('input[name="preprocess-data"]:checked').val();
$("#ifr-preprocess-stat").attr('src', val);
});
$('#but-annot').on('change', function(){
var val = $('input[name="annot-data"]:checked').val();
$("#ifr-kronaplot").attr('src', val);
});
$('#but-assembly').on('change', function(){
var val = $('input[name="assembly-data"]:checked').val();
if(val.endsWith('png')){
$("#assembly-wrapper").replaceWith("<div id='assembly-wrapper' class='row'><img class='img-responsive' src='" + val + "'/></div>");
} else {
$("#assembly-wrapper").replaceWith("<div id='assembly-wrapper' class='row'><iframe style='position: absolute; width: 100%;height: 100%; border: none' src='" + val + "'></iframe></div>");
}
if(val.indexOf('vizbin') > -1){
$('.vizbin-img').show();
$('.metaquast-p').hide();
} else {
$('.vizbin-img').hide();
$('.metaquast-p').show();
}
});
$('#but-mapping').on('change', function(){
var val = $('input[name="mapping-data"]:checked').val();
$("#mapping-wrapper").attr('src', val);
if(val.indexOf('vizbin') > -1){
$('.vizbin-img').show();
} else {
$('.vizbin-img').hide();
}
});
$('#but-ratio').on('change', function(){
var val = $('input[name="ratio-data"]:checked').val();
$("#ratio-wrapper").attr('src', val);
});
$('#but-variant').on('change', function(){
var val = $('input[name="variant-data"]:checked').val();
$("#variant-wrapper").attr('src', val);
});
// set last tab in cookie
$('#tabs a[data-toggle="tab"]').on('shown.bs.tab', function (e) {
Cookies('tab', $(e.target).attr('href'));
})
// show lat iopenned tab or first one
if(Cookies('tab')){
$('#tabs li > a[href=' + Cookies('tab') + ']').tab('show')
} else {
$('#tabs li:first > a').tab('show');
}
// load kronaplot if people click on tab. Make a lot of errors if loaded from start.
$('#annottab').on('show.bs.tab', function (e) {
if($("#MG-kronaplot").attr('src') === undefined){
$("#ifr-kronaplot").attr('src', 'Analysis/results/mg.gene_kegg_krona.html');
}
});
//load maxbin results
$.get('Binning/MaxBin/maxbin_res.summary', function(data) {
// start the table
var html = '<table>';
// split into lines
var rows = data.split("\n");
var headers = false;
var col;
var colstart = '<td>';
var colend = '</td>';
// parse lines
rows.forEach( function getvalues(localrow) {
var columns = localrow.split("\t");
if(columns.length > 1){
html += "<tr>";
if(!headers){
colstart = '<th>';
colend = '</th>';
headers = true;
} else {
colstart = '<td>';
colend = '</td>';
}
for(var i in columns){
col = columns[i];
html += colstart + col + colend;
}
html += "</tr>";
}
})
html += "</table>";
$('#maxbin-results').append(html);
});
$('#but-binning').on('change', function(){
var val = $('input[name="binning-data"]:checked').val();
$("#binning-wrapper").attr('src', val);
});
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
lib/vizbin_logo.png

28.8 KiB

This diff is collapsed.
rule assembly_contig_length:
input:
"Assembly/%s.assembly.merged.fa" % ASS
output:
"Analysis/%s.assembly.length.txt" % ASS,
"Analysis/%s.assembly.gc_content.txt" % ASS
shell:
"""
echo "[x] LENGTH `date +"%Y/%m/%d %H:%M:%S"`"
echo "Obtaining contig lengths"
perl {SRCDIR}/fastaNamesSizes.pl {input} > {output[0]}
echo "Obtaining GC content"
TMP_GC=$(mktemp --tmpdir={TMPDIR} -t "gc_out_XXXXXX.txt")
perl {SRCDIR}/get_GC_content.pl {input} $TMP_GC
# Th program above provides a file gc_out.txt. This command cleans the output
echo "Clean up output"
cut -f1,2 $TMP_GC | sed -e 's/>//g'> {output[1]}
echo "Remove intermediate files"
rm $TMP_GC
"""
This diff is collapsed.
rule call_contig_length:
input:
"Assembly/{type}.assembly.merged.fa"
output:
"Analysis/{type}.assembly-{type}.length.txt",
"Analysis/{type}.assembly-{type}.gc_content.txt",
shell:
"""
echo "[x] LENGTH `date +"%Y/%m/%d %H:%M:%S"`"
echo "Obtaining contig lengths"
perl {SRCDIR}/fastaNamesSizes.pl {input} > {output[0]}
echo "Obtaining GC content"
TMP_GC=$(mktemp --tmpdir={TMPDIR} -t "gc_out_XXXXXX.txt")
perl {SRCDIR}/get_GC_content.pl {input} $TMP_GC
# Th program above provides a file gc_out.txt. This command cleans the output
echo "Clean up output"
cut -f1,2 $TMP_GC | sed -e 's/>//g'> {output[1]}
echo "Remove intermediate files"
rm $TMP_GC
"""
This diff is collapsed.
This diff is collapsed.