Skip to content
Snippets Groups Projects
Commit 39fffc97 authored by nkindlon's avatar nkindlon
Browse files

Added stranded info to jaccard and merge help, fixed stranded jaccard bug

parent e2be0789
No related branches found
No related tags found
No related merge requests found
......@@ -64,6 +64,15 @@ void jaccard_help(void) {
cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl << endl;
cerr << "\t-s\t" << "Force strandedness. That is, only merge features" << endl;
cerr << "\t\tthat are on the same strand." << endl;
cerr << "\t\t- By default, merging is done without respect to strand." << endl << endl;
cerr << "\t-S\t" << "Force merge for one specific strand only." << endl;
cerr << "\t\t" << "Follow with + or - to force merge from only" << endl;
cerr << "\t\t" << "the forward or reverse strand, respectively." << endl;
cerr << "\t\t" << "- By default, merging is done without respect to strand." << endl << endl;
cerr << "Notes: " << endl;
cerr << "\t(1) Input files must be sorted by chrom, then start position."
<< endl << endl;
......
......@@ -53,13 +53,13 @@ void merge_help(void) {
cerr << "Options: " << endl;
cerr << "\t-s\t" << "Force strandedness. That is, only merge features" << endl;
cerr << "\t\tthat are the same strand." << endl;
cerr << "\t\tthat are on the same strand." << endl;
cerr << "\t\t- By default, merging is done without respect to strand." << endl << endl;
cerr << "\t-S\t" << "Force mergeing for a _specific_ strand. That is, only merge features" << endl;
cerr << "\t\tthat from a specific strabd." << endl;
cerr << "\t\t- For example, -S + will or -S -" << endl;
cerr << "\t\t- By default, merging is done without respect to strand." << endl << endl;
cerr << "\t-S\t" << "Force merge for one specific strand only." << endl;
cerr << "\t\t" << "Follow with + or - to force merge from only" << endl;
cerr << "\t\t" << "the forward or reverse strand, respectively." << endl;
cerr << "\t\t" << "- By default, merging is done without respect to strand." << endl << endl;
cerr << "\t-d\t" << "Maximum distance between features allowed for features" << endl; cerr << "\t\tto be merged." << endl; cerr << "\t\t- Def. 0. That is, overlapping & book-ended features are merged." << endl;
cerr << "\t\t- (INTEGER)" << endl;
......
......@@ -16,3 +16,94 @@ ContextJaccard::ContextJaccard() {
ContextJaccard::~ContextJaccard() {
}
bool ContextJaccard::parseCmdArgs(int argc, char **argv, int skipFirstArgs)
{
_argc = argc;
_argv = argv;
_skipFirstArgs = skipFirstArgs;
if (_argc < 2) {
setShowHelp(true);
return false;
}
setProgram(_programNames[argv[0]]);
_argsProcessed.resize(_argc - _skipFirstArgs, false);
for (_i=_skipFirstArgs; _i < argc; _i++) {
if (isUsed(_i - _skipFirstArgs)) {
continue;
}
else if (strcmp(_argv[_i], "-s") == 0) {
if (!handle_s()) return false;
}
else if (strcmp(_argv[_i], "-S") == 0) {
if (!handle_S()) return false;
}
}
return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs);
}
bool ContextJaccard::isValidState()
{
if (!ContextIntersect::isValidState()) {
return false;
}
// Tests for stranded merge
//
if (_desiredStrand != FileRecordMergeMgr::ANY_STRAND) { // requested stranded merge
for (int i=0; i < getNumInputFiles(); i++) {
// make sure file has strand.
if (!getFile(i)->recordsHaveStrand()) {
_errorMsg = "\n***** ERROR: stranded merge requested, but input file ";
_errorMsg += getInputFileName(i);
_errorMsg += " does not have strands. *****";
return false;
}
//make sure file is not VCF.
if (getFile(1)->getFileType() == FileRecordTypeChecker::VCF_FILE_TYPE) {
_errorMsg = "\n***** ERROR: stranded merge not supported for VCF file ";
_errorMsg += getInputFileName(i);
_errorMsg += ". *****";
return false;
}
}
}
//column operations not allowed with BAM input
if (hasColumnOpsMethods() &&
getFile(0)->getFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) {
_errorMsg = "\n***** ERROR: stranded merge not supported for VCF files. *****";
return false;
}
return true;
}
bool ContextJaccard::handle_s() {
setSameStrand(true);
_desiredStrand = FileRecordMergeMgr::SAME_STRAND_EITHER;
markUsed(_i - _skipFirstArgs);
return true;
}
bool ContextJaccard::handle_S() {
if ((_i+1) < _argc) {
bool validChar = false;
if (_argv[_i+1][0] == '+') {
_desiredStrand = FileRecordMergeMgr::SAME_STRAND_FORWARD;
validChar = true;
} else if (_argv[_i+1][0] == '-') {
validChar = true;
_desiredStrand = FileRecordMergeMgr::SAME_STRAND_REVERSE;
}
if (validChar) {
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
setSameStrand(true);
return true;
}
}
_errorMsg = "\n***** ERROR: -S option must be followed by + or -. *****";
return false;
}
......@@ -14,10 +14,12 @@ class ContextJaccard : public ContextIntersect {
public:
ContextJaccard();
~ContextJaccard();
virtual bool parseCmdArgs(int argc, char **argv, int skipFirstArgs);
virtual bool isValidState();
private:
bool handle_s();
bool handle_S();
};
#endif /* CONTEXTJACCARD_H_ */
chr1 10 50 a1f 2 +
chr1 20 60 b1r 4 -
chr1 25 70 c1q 8 .
chr1 30 75 d1q 16 .
chr1 40 80 e1f 32 +
chr1 45 90 f1r 64 -
chr2 10 50 a2q 2 .
chr2 20 40 b2f 4 +
chr2 25 50 c2r 8 -
chr2 30 60 d2f 16 +
chr2 35 65 e2q 32 .
chr2 39 80 f2r 64 -
\ No newline at end of file
chr1 10 50 2a1r 2 -
chr1 40 70 2b1q 4 .
chr1 60 100 2c1f 8 +
chr2 15 40 2d2f 16 +
chr2 30 100 2e2r 32 -
......@@ -100,3 +100,50 @@ echo \
$BT jaccard -a a.bam -b three_blocks_match.bam > obs
check exp obs
rm exp obs
###########################################################
# Test jaccard with mixed strand files
###########################################################
echo " jaccard.t10...\c"
echo \
"intersection union-intersection jaccard n_intersections
145 180 0.805556 2" >exp
$BT jaccard -a aMixedStrands.bed -b bMixedStrands.bed > obs
check obs exp
rm obs exp
###########################################################
# Test jaccard with mixed strand files, -s option
# (match strand, either forward or reverse)
###########################################################
echo " jaccard.t11...\c"
echo \
"intersection union-intersection jaccard n_intersections
120 290 0.413793 4" >exp
$BT jaccard -a aMixedStrands.bed -b bMixedStrands.bed -s > obs
check obs exp
rm obs exp
###########################################################
# Test jaccard with mixed strand files, -S + option
# (match strand, forward only)
###########################################################
echo " jaccard.t12...\c"
echo \
"intersection union-intersection jaccard n_intersections
40 135 0.296296 2" >exp
$BT jaccard -a aMixedStrands.bed -b bMixedStrands.bed -S + > obs
check obs exp
rm obs exp
###########################################################
# Test jaccard with mixed strand files, -S - option
# (match strand, reverse only)
###########################################################
echo " jaccard.t13...\c"
echo \
"intersection union-intersection jaccard n_intersections
80 155 0.516129 2" > exp
$BT jaccard -a aMixedStrands.bed -b bMixedStrands.bed -S - > obs
check obs exp
rm obs exp
BT=${BT-../../bin/bedtools}
check()
{
if diff $1 $2; then
echo ok
else
echo fail
fi
}
###########################################################
# Test a basic self intersection
###########################################################
echo " jaccard.t01...\c"
echo \
"intersection union-intersection jaccard n_intersections
110 110 1 2" > exp
$BT jaccard -a a.bed -b a.bed > obs
check obs exp
rm obs exp
echo " jaccard.t02...\c"
echo \
"intersection union-intersection jaccard n_intersections
10 140 0.0714286 1" > exp
$BT jaccard -a a.bed -b b.bed > obs
check obs exp
rm obs exp
echo " jaccard.t03...\c"
echo \
"intersection union-intersection jaccard n_intersections
10 200 0.05 1" > exp
$BT jaccard -a a.bed -b c.bed > obs
check obs exp
rm obs exp
echo " jaccard.t04...\c"
echo \
"intersection union-intersection jaccard n_intersections
0 210 0 0" > exp
$BT jaccard -a a.bed -b d.bed > obs
check obs exp
rm obs exp
###########################################################
# Test stdin
###########################################################
echo " jaccard.t05...\c"
echo \
"intersection union-intersection jaccard n_intersections
10 140 0.0714286 1" > exp
cat a.bed | $BT jaccard -a - -b b.bed > obs
check obs exp
rm obs exp
###########################################################
# Test symmetry
###########################################################
echo " jaccard.t06...\c"
$BT jaccard -a a.bed -b b.bed > obs1
$BT jaccard -a b.bed -b a.bed > obs2
check obs1 obs2
rm obs1 obs2
###########################################################
# Test partially matching blocks without -split option.
###########################################################
echo " jaccard.t07...\c"
echo \
"intersection union-intersection jaccard n_intersections
10 50 0.2 1" > exp
$BT jaccard -a three_blocks_match.bed -b e.bed > obs
check obs exp
rm obs exp
###########################################################
# Test partially matching blocks with -split option.
###########################################################
echo " jaccard.t08...\c"
echo \
"intersection union-intersection jaccard n_intersections
5 55 0.0909091 1" > exp
$BT jaccard -a three_blocks_match.bed -b e.bed -split > obs
check obs exp
rm obs exp
intersection union-intersection jaccard n_intersections
10 150 0.0666667 1
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment