diff --git a/src/intersectBed/intersectBed.cpp b/src/intersectBed/intersectBed.cpp index 582370dd071b4f0211625721c52604ea65caf6b0..072c8c27c4970f2df9f07d7c070fa41bb35d5259 100644 --- a/src/intersectBed/intersectBed.cpp +++ b/src/intersectBed/intersectBed.cpp @@ -65,7 +65,7 @@ bool BedIntersect::processHits(const BED &a, const vector<BED> &hits, bool print */ BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit, bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap, - float overlapFraction, bool noHit, bool writeCount, bool forceStrand, + float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand, bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam) { _bedAFile = bedAFile; @@ -78,7 +78,8 @@ BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit, _writeAllOverlap = writeAllOverlap; _writeCount = writeCount; _overlapFraction = overlapFraction; - _forceStrand = forceStrand; + _sameStrand = sameStrand; + _diffStrand = diffStrand; _reciprocal = reciprocal; _obeySplits = obeySplits; _bamInput = bamInput; @@ -113,7 +114,7 @@ bool BedIntersect::FindOverlaps(const BED &a, vector<BED> &hits) { printable = false; // collect and report the sufficient hits - _bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _forceStrand); + _bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand); hitsFound = processHits(a, hits, printable); return hitsFound; @@ -177,11 +178,11 @@ bool BedIntersect::FindOneOrMoreOverlap(const BED &a) { bool overlapsFound; if (_reciprocal == false) { overlapsFound = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, - _forceStrand, _overlapFraction); + _sameStrand, _diffStrand, _overlapFraction); } else { overlapsFound = _bedB->FindOneOrMoreReciprocalOverlapsPerBin(a.chrom, a.start, a.end, a.strand, - _forceStrand, _overlapFraction); + _sameStrand, _diffStrand, _overlapFraction); } return overlapsFound; } diff --git a/src/intersectBed/intersectBed.h b/src/intersectBed/intersectBed.h index 095e4818c6d1c7840c92c5b922cad52d34de12fd..e410693426c64d4275e9528f1b81bec7aed81f7d 100644 --- a/src/intersectBed/intersectBed.h +++ b/src/intersectBed/intersectBed.h @@ -36,7 +36,7 @@ public: // constructor BedIntersect(string bedAFile, string bedBFile, bool anyHit, bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap, - float overlapFraction, bool noHit, bool writeCount, bool forceStrand, + float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand, bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam); // destructor @@ -55,7 +55,8 @@ private: bool _writeOverlap; bool _writeAllOverlap; - bool _forceStrand; + bool _sameStrand; + bool _diffStrand; bool _reciprocal; float _overlapFraction; diff --git a/src/intersectBed/intersectMain.cpp b/src/intersectBed/intersectMain.cpp index e6d35cd5c03b339cc5f811323ab82bd7918f2368..91d0dd445e5f8d8f4919e61d3eeac27dcd4deb80 100644 --- a/src/intersectBed/intersectMain.cpp +++ b/src/intersectBed/intersectMain.cpp @@ -47,7 +47,8 @@ int main(int argc, char* argv[]) { bool writeAllOverlap = false; bool haveFraction = false; bool reciprocalFraction = false; - bool forceStrand = false; + bool sameStrand = false; + bool diffStrand = false; bool obeySplits = false; bool inputIsBam = false; bool outputIsBam = true; @@ -130,7 +131,10 @@ int main(int argc, char* argv[]) { noHit = true; } else if (PARAMETER_CHECK("-s", 2, parameterLength)) { - forceStrand = true; + sameStrand = true; + } + else if (PARAMETER_CHECK("-S", 2, parameterLength)) { + diffStrand = true; } else if (PARAMETER_CHECK("-split", 6, parameterLength)) { obeySplits = true; @@ -195,11 +199,15 @@ int main(int argc, char* argv[]) { showHelp = true; } + if (sameStrand && diffStrand) { + cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl; + showHelp = true; + } if (!showHelp) { BedIntersect *bi = new BedIntersect(bedAFile, bedBFile, anyHit, writeA, writeB, writeOverlap, - writeAllOverlap, overlapFraction, noHit, writeCount, forceStrand, + writeAllOverlap, overlapFraction, noHit, writeCount, sameStrand, diffStrand, reciprocalFraction, obeySplits, inputIsBam, outputIsBam, uncompressedBam); delete bi; return 0; @@ -263,8 +271,12 @@ void ShowHelp(void) { cerr << "\t\t- In other words, if -f is 0.90 and -r is used, this requires" << endl; cerr << "\t\t that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl; - cerr << "\t-s\t" << "Force strandedness. That is, only report hits in B that" << endl; - cerr << "\t\toverlap A on the same strand." << endl; + cerr << "\t-s\t" << "Require same strandedness. That is, only report hits in B that" << endl; + cerr << "\t\toverlap A on the _same_ strand." << endl; + cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl; + + cerr << "\t-S\t" << "Require different strandedness. That is, only report hits in B that" << endl; + cerr << "\t\toverlap A on the _opposite_ strand." << endl; cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl; cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl << endl; diff --git a/src/utils/bedFile/bedFile.cpp b/src/utils/bedFile/bedFile.cpp index cdb715675310ae8ec70dc8deb71a386c40c26c26..c6c9afd72784bb9ab2604ecd6680831e0ea6b820 100644 --- a/src/utils/bedFile/bedFile.cpp +++ b/src/utils/bedFile/bedFile.cpp @@ -198,7 +198,7 @@ BedLineStatus BedFile::GetNextBed(BED &bed, int &lineNum) { void BedFile::FindOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, - string strand, vector<BED> &hits, bool forceStrand) { + string strand, vector<BED> &hits, bool sameStrand, bool diffStrand) { BIN startBin, endBin; startBin = (start >> _binFirstShift); @@ -220,10 +220,18 @@ void BedFile::FindOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, for (; bedItr != bedEnd; ++bedItr) { // do we have sufficient overlap? if (overlaps(bedItr->start, bedItr->end, start, end) > 0) { - // skip the hit if not on the same strand (and we care) - if (forceStrand == false) hits.push_back(*bedItr); - else if ( (forceStrand == true) && (strand == bedItr->strand)) { - hits.push_back(*bedItr); + + bool strands_are_same = (strand == bedItr->strand); + + // test for necessary strandedness + if ( (sameStrand == false && diffStrand == false) + || + (sameStrand == true && strands_are_same == true) + || + (diffStrand == true && strands_are_same == false) + ) + { + hits.push_back(*bedItr); } } } @@ -235,7 +243,7 @@ void BedFile::FindOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, bool BedFile::FindOneOrMoreOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, - bool forceStrand, float overlapFraction) { + bool sameStrand, bool diffStrand, float overlapFraction) { BIN startBin, endBin; startBin = (start >> _binFirstShift); @@ -264,9 +272,17 @@ bool BedFile::FindOneOrMoreOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end // do we have sufficient overlap? if ( (float) overlapBases / (float) aLength >= overlapFraction) { - // skip the hit if not on the same strand (and we care) - if (forceStrand == false) return true; - else if ( (forceStrand == true) && (strand == bedItr->strand)) { + + bool strands_are_same = (strand == bedItr->strand); + + // test for necessary strandedness + if ( (sameStrand == false && diffStrand == false) + || + (sameStrand == true && strands_are_same == true) + || + (diffStrand == true && strands_are_same == false) + ) + { return true; } } @@ -280,7 +296,7 @@ bool BedFile::FindOneOrMoreOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end bool BedFile::FindOneOrMoreReciprocalOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, - bool forceStrand, float overlapFraction) { + bool sameStrand, bool diffStrand, float overlapFraction) { BIN startBin, endBin; startBin = (start >> _binFirstShift); @@ -311,10 +327,17 @@ bool BedFile::FindOneOrMoreReciprocalOverlapsPerBin(string chrom, CHRPOS start, if ( (float) overlapBases / (float) aLength >= overlapFraction) { CHRPOS bLength = (bedItr->end - bedItr->start); float bOverlap = ( (float) overlapBases / (float) bLength ); - if ((forceStrand == false) && (bOverlap >= overlapFraction)) { - return true; - } - else if ( (forceStrand == true) && (strand == bedItr->strand) && (bOverlap >= overlapFraction)) { + bool strands_are_same = (strand == bedItr->strand); + + // test for sufficient reciprocal overlap and strandedness + if ( (bOverlap >= overlapFraction) && + ((sameStrand == false && diffStrand == false) + || + (sameStrand == true && strands_are_same == true) + || + (diffStrand == true && strands_are_same == false)) + ) + { return true; } } @@ -327,7 +350,7 @@ bool BedFile::FindOneOrMoreReciprocalOverlapsPerBin(string chrom, CHRPOS start, } -void BedFile::countHits(const BED &a, bool forceStrand, bool countsOnly) { +void BedFile::countHits(const BED &a, bool sameStrand, bool diffStrand, bool countsOnly) { BIN startBin, endBin; startBin = (a.start >> _binFirstShift); @@ -346,9 +369,13 @@ void BedFile::countHits(const BED &a, bool forceStrand, bool countsOnly) { vector<BEDCOV>::iterator bedItr = bedCovMap[a.chrom][j].begin(); vector<BEDCOV>::iterator bedEnd = bedCovMap[a.chrom][j].end(); for (; bedItr != bedEnd; ++bedItr) { - + + bool strands_are_same = (a.strand == bedItr->strand); // skip the hit if not on the same strand (and we care) - if (forceStrand && (a.strand != bedItr->strand)) { + if ((sameStrand == true && strands_are_same == false) || + (diffStrand == true && strands_are_same == true) + ) + { continue; } else if (overlaps(bedItr->start, bedItr->end, a.start, a.end) > 0) { @@ -378,7 +405,7 @@ void BedFile::countHits(const BED &a, bool forceStrand, bool countsOnly) { } -void BedFile::countSplitHits(const vector<BED> &bedBlocks, bool forceStrand, bool countsOnly) { +void BedFile::countSplitHits(const vector<BED> &bedBlocks, bool sameStrand, bool diffStrand, bool countsOnly) { // set to track the distinct B features that had coverage. // we'll update the counts of coverage for these features by one @@ -407,8 +434,12 @@ void BedFile::countSplitHits(const vector<BED> &bedBlocks, bool forceStrand, boo vector<BEDCOV>::iterator bedEnd = bedCovMap[blockItr->chrom][j].end(); for (; bedItr != bedEnd; ++bedItr) { + bool strands_are_same = (blockItr->strand == bedItr->strand); // skip the hit if not on the same strand (and we care) - if (forceStrand && (blockItr->strand != bedItr->strand)) { + if ((sameStrand == true && strands_are_same == false) || + (diffStrand == true && strands_are_same == true) + ) + { continue; } else if (overlaps(bedItr->start, bedItr->end, blockItr->start, blockItr->end) > 0) { @@ -446,7 +477,7 @@ void BedFile::countSplitHits(const vector<BED> &bedBlocks, bool forceStrand, boo } -void BedFile::countListHits(const BED &a, int index, bool forceStrand) { +void BedFile::countListHits(const BED &a, int index, bool sameStrand, bool diffStrand) { BIN startBin, endBin; startBin = (a.start >> _binFirstShift); @@ -466,7 +497,12 @@ void BedFile::countListHits(const BED &a, int index, bool forceStrand) { vector<BEDCOVLIST>::iterator bedEnd = bedCovListMap[a.chrom][j].end(); for (; bedItr != bedEnd; ++bedItr) { - if (forceStrand && (a.strand != bedItr->strand)) { + bool strands_are_same = (a.strand == bedItr->strand); + // skip the hit if not on the same strand (and we care) + if ((sameStrand == true && strands_are_same == false) || + (diffStrand == true && strands_are_same == true) + ) + { continue; } else if (overlaps(bedItr->start, bedItr->end, a.start, a.end) > 0) { diff --git a/src/utils/bedFile/bedFile.h b/src/utils/bedFile/bedFile.h index 72ff0e1ea9a6b9dcf1d0ca34716c4ab8ba929527..7ff83245d361401e0f338a9aa5056bb4e6ea0392 100644 --- a/src/utils/bedFile/bedFile.h +++ b/src/utils/bedFile/bedFile.h @@ -424,32 +424,32 @@ public: // search for all overlapping features in another BED file. // Searches through each relevant genome bin on the same chromosome // as the single feature. Note: Adapted from kent source "binKeeperFind" - void FindOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, vector<BED> &hits, bool forceStrand); + void FindOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, vector<BED> &hits, bool sameStrand, bool diffStrand); // return true if at least one overlap was found. otherwise, return false. bool FindOneOrMoreOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, - bool forceStrand, float overlapFraction = 0.0); + bool sameStrand, bool diffStrand, float overlapFraction = 0.0); // return true if at least one __reciprocal__ overlap was found. otherwise, return false. bool FindOneOrMoreReciprocalOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, - bool forceStrand, float overlapFraction = 0.0); + bool sameStrand, bool diffStrand, float overlapFraction = 0.0); // Given a chrom, start, end and strand for a single feature, // increment a the number of hits for each feature in B file // that the feature overlaps - void countHits(const BED &a, bool forceStrand = false, bool countsOnly = false); + void countHits(const BED &a, bool sameStrand = false, bool diffStrand = false, bool countsOnly = false); // same as above, but has special logic that processes a set of // BED "blocks" from a single entry so as to avoid over-counting // each "block" of a single BAM/BED12 as distinct coverage. That is, // if one read has four block, we only want to count the coverage as // coming from one read, not four. - void countSplitHits(const vector<BED> &bedBlock, bool forceStrand = false, bool countsOnly = false); + void countSplitHits(const vector<BED> &bedBlock, bool sameStrand = false, bool diffStrand = false, bool countsOnly = false); // Given a chrom, start, end and strand for a single feature, // increment a the number of hits for each feature in B file // that the feature overlaps - void countListHits(const BED &a, int index, bool forceStrand); + void countListHits(const BED &a, int index, bool sameStrand, bool diffStrand); // the bedfile with which this instance is associated string bedFile;