From 2fe5a837157814b2964a4968d4c2ca2d70af2db3 Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Fri, 12 Feb 2010 15:55:17 -0500 Subject: [PATCH] Fixed several bugs and improved BAM performance. 1. Fixed bug to "re-allow" track and "browser" lines. 2. Fixed bug in reporting BEDPE overlaps. 3. Fixed bug when using type "notboth" with BAM files in pairToBed. 4. When comparing BAM files to BED/GFF annotations with intersectBed or pairToBed, the __aligned__ sequence is used, rather than the __original__ sequence. --- src/intersectBed/intersectBed.cpp | 51 +++------ src/intersectBed/intersectBed.h | 2 +- src/pairToBed/pairToBed.cpp | 166 ++++++++---------------------- src/pairToBed/pairToBed.h | 15 +-- src/utils/bedFile/bedFile.cpp | 117 ++++++++++++++++++--- src/utils/bedFile/bedFile.h | 14 +++ src/utils/bedFilePE/bedFilePE.cpp | 35 +------ 7 files changed, 193 insertions(+), 207 deletions(-) diff --git a/src/intersectBed/intersectBed.cpp b/src/intersectBed/intersectBed.cpp index d3be55c1..d9df8815 100755 --- a/src/intersectBed/intersectBed.cpp +++ b/src/intersectBed/intersectBed.cpp @@ -137,39 +137,17 @@ bool BedIntersect::FindOverlaps(const BED &a, vector<BED> &hits) { } -bool BedIntersect::FindOneOrMoreOverlap(const BED &a, vector<BED> &hits) { - - // grab _all_ of the features in B that overlap with a. - bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, this->forceStrand); - - // loop through the hits and report those that meet the user's criteria - vector<BED>::const_iterator h = hits.begin(); - vector<BED>::const_iterator hitsEnd = hits.end(); - for (; h != hitsEnd; ++h) { - - int s = max(a.start, h->start); - int e = min(a.end, h->end); - int overlapBases = (e - s); // the number of overlapping bases b/w a and b - int aLength = (a.end - a.start); // the length of a in b.p. - - // is there enough overlap relative to the user's request? (default ~ 1bp) - if ( ( (float) overlapBases / (float) aLength ) >= this->overlapFraction ) { - - // Report the hit if the user doesn't care about reciprocal overlap between A and B. - if (!reciprocal) { - return true; - } - else { // the user wants there to be sufficient reciprocal overlap - int bLength = (h->end - h->start); - float bOverlap = ( (float) overlapBases / (float) bLength ); - - if (bOverlap >= this->overlapFraction) { - return true; - } - } - } +bool BedIntersect::FindOneOrMoreOverlap(const BED &a) { + bool overlapsFound; + if (this->reciprocal == false) { + overlapsFound = bedB->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, + this->forceStrand, this->overlapFraction); + } + else { + overlapsFound = bedB->FindOneOrMoreReciprocalOverlapsPerBin(a.chrom, a.start, a.end, a.strand, + this->forceStrand, this->overlapFraction); } - return false; + return overlapsFound; } @@ -239,8 +217,9 @@ void BedIntersect::IntersectBam(string bamFile) { BED a; a.chrom = refs.at(bam.RefID).RefName; a.start = bam.Position; - a.end = bam.Position + bam.Length; + a.end = bam.Position + bam.AlignedBases.size(); + // build the name field from the BAM alignment. a.name = bam.Name; if (bam.IsFirstMate()) a.name += "/1"; if (bam.IsSecondMate()) a.name += "/2"; @@ -249,7 +228,7 @@ void BedIntersect::IntersectBam(string bamFile) { a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-"; if (this->bamOutput == true) { - overlapsFound = FindOneOrMoreOverlap(a, hits); + overlapsFound = FindOneOrMoreOverlap(a); if (overlapsFound == true) { if (!this->noHit) writer.SaveAlignment(bam); } @@ -258,9 +237,9 @@ void BedIntersect::IntersectBam(string bamFile) { } } else { - overlapsFound = FindOverlaps(a, hits); + overlapsFound = FindOverlaps(a, hits); + hits.clear(); } - hits.clear(); } } diff --git a/src/intersectBed/intersectBed.h b/src/intersectBed/intersectBed.h index 2c453bb7..656e45f9 100755 --- a/src/intersectBed/intersectBed.h +++ b/src/intersectBed/intersectBed.h @@ -46,7 +46,7 @@ public: void reportB(const BED &); bool FindOverlaps(const BED &a, vector<BED> &hits); - bool FindOneOrMoreOverlap(const BED &a, vector<BED> &hits); + bool FindOneOrMoreOverlap(const BED &a); void IntersectBed(istream &bedInput); void IntersectBam(string bamFile); diff --git a/src/pairToBed/pairToBed.cpp b/src/pairToBed/pairToBed.cpp index 260ea33f..f9fc7fde 100755 --- a/src/pairToBed/pairToBed.cpp +++ b/src/pairToBed/pairToBed.cpp @@ -57,7 +57,7 @@ BedIntersectPE::~BedIntersectPE(void) { -void BedIntersectPE::FindOverlaps(const BEDPE &a, vector<BED> &hits1, vector<BED> &hits2, string &type) { +void BedIntersectPE::FindOverlaps(const BEDPE &a, vector<BED> &hits1, vector<BED> &hits2, const string &type) { // list of hits on each end of BEDPE // that exceed the requested overlap fraction @@ -163,107 +163,62 @@ void BedIntersectPE::FindOverlaps(const BEDPE &a, vector<BED> &hits1, vector<BED } -bool BedIntersectPE::FindOneOrMoreOverlaps(const BEDPE &a, vector<BED> &hits1, vector<BED> &hits2, string &type) { +bool BedIntersectPE::FindOneOrMoreOverlaps(const BEDPE &a, const string &type) { - // list of hits on each end of BEDPE + // flags for the existence of hits on each end of BEDPE // that exceed the requested overlap fraction - vector<BED> qualityHits1; - vector<BED> qualityHits2; + bool end1Found = false; + bool end2Found = false; - // count of hits on each end of BEDPE - // that exceed the requested overlap fraction - int numOverlapsEnd1 = 0; - int numOverlapsEnd2 = 0; - - /* - Look for overlaps in end 1 - */ - - // make sure we have a valid chromosome before we search + // Look for overlaps in end 1 assuming we have an aligned chromosome. if (a.chrom1 != ".") { - // Find the quality hits between ***end1*** of the BEDPE and the B BED file - bedB->FindOverlapsPerBin(a.chrom1, a.start1, a.end1, a.strand1, hits1, this->forceStrand); - - vector<BED>::const_iterator h = hits1.begin(); - vector<BED>::const_iterator hitsEnd = hits1.end(); - for (; h != hitsEnd; ++h) { - - int s = max(a.start1, h->start); - int e = min(a.end1, h->end); - int overlapBases = (e - s); // the number of overlapping bases b/w a and b - int aLength = (a.end1 - a.start1); // the length of a in b.p. - - // is there enough overlap relative to the user's request? (default ~ 1bp) - if ( ( (float) overlapBases / (float) aLength ) >= this->overlapFraction ) { - numOverlapsEnd1++; - qualityHits1.push_back(*h); - - if (type == "either") return true; - else if (type == "neither") return false; - } - } + end1Found = bedB->FindOneOrMoreOverlapsPerBin(a.chrom1, a.start1, a.end1, a.strand1, + this->forceStrand, this->overlapFraction); + + // can we bail out without checking end2? + if ((type == "either") && (end1Found == true)) return true; + else if ((type == "neither") && (end1Found == true)) return false; + else if ((type == "notboth") && (end1Found == false)) return true; + else if ((type == "both") && (end1Found == false)) return false; } - - // if testing for "notboth" or "both", we can bail early depending on end one - if ((type == "notboth") && (numOverlapsEnd1 == 0)) return true; - else if ((type == "both") && (numOverlapsEnd1 == 0)) return false; - - /* - Now look for overlaps in end 2 - */ - - // make sure we have a valid chromosome before we search + + // Now look for overlaps in end 2 assuming we have an aligned chromosome. if (a.chrom2 != ".") { - // Now find the quality hits between ***end2*** of the BEDPE and the B BED file - bedB->FindOverlapsPerBin(a.chrom2, a.start2, a.end2, a.strand2, hits2, this->forceStrand); - - vector<BED>::const_iterator h = hits2.begin(); - vector<BED>::const_iterator hitsEnd = hits2.end(); - for (; h != hitsEnd; ++h) { - - int s = max(a.start2, h->start); - int e = min(a.end2, h->end); - int overlapBases = (e - s); // the number of overlapping bases b/w a and b - int aLength = (a.end2 - a.start2); // the length of a in b.p. - - // is there enough overlap relative to the user's request? (default ~ 1bp) - if ( ( (float) overlapBases / (float) aLength ) >= this->overlapFraction ) { - numOverlapsEnd2++; - qualityHits2.push_back(*h); - - if (type == "either") return true; - else if (type == "neither") return false; - else if ((type == "notboth") && (numOverlapsEnd1 > 0)) return false; - } - } + end2Found = bedB->FindOneOrMoreOverlapsPerBin(a.chrom2, a.start2, a.end2, a.strand2, + this->forceStrand, this->overlapFraction); + + if ((type == "either") && (end2Found == true)) return true; + else if ((type == "neither") && (end2Found == true)) return false; + else if ((type == "notboth") && (end2Found == false)) return true; + else if ((type == "both") && (end2Found == false)) return false; } // Now report the hits depending on what the user has requested. if (type == "notboth") { - if ( (numOverlapsEnd1 == 0) || (numOverlapsEnd2 == 0) ) return true; + if ( (end1Found == false) || (end2Found == false) ) return true; else return false; } + else if (type == "either") { + if ( (end1Found == false) && (end2Found == false) ) return false; + } else if (type == "neither") { - if ( (numOverlapsEnd1 == 0) && (numOverlapsEnd2 == 0) ) return true; + if ( (end1Found == false) && (end2Found == false) ) return true; else return false; } else if (type == "xor") { - if ( (numOverlapsEnd1 > 0) && (numOverlapsEnd2 == 0) ) return true; - else if ( (numOverlapsEnd1 == 0) && (numOverlapsEnd2 > 0) ) return true; + if ( (end1Found == true) && (end2Found == false) ) return true; + else if ( (end1Found == false) && (end2Found == true) ) return true; else return false; } else if (type == "both") { - if ( (numOverlapsEnd1 > 0) && (numOverlapsEnd2 > 0) ) return true; + if ( (end1Found == true) && (end2Found == true) ) return true; return false; } return false; } - - - -void BedIntersectPE::FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, string &type) { +void BedIntersectPE::FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, const string &type) { // count of hits on _between_ end of BEDPE // that exceed the requested overlap fraction @@ -318,12 +273,13 @@ void BedIntersectPE::FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, str } -bool BedIntersectPE::FindOneOrMoreSpanningOverlaps(const BEDPE &a, vector<BED> &hits, string &type) { +bool BedIntersectPE::FindOneOrMoreSpanningOverlaps(const BEDPE &a, const string &type) { int spanStart = 0; int spanEnd = 0; int spanLength = 0; - + bool overlapFound; + if ((type == "ispan") || (type == "notispan")) { spanStart = a.end1; spanEnd = a.start2; @@ -342,24 +298,10 @@ bool BedIntersectPE::FindOneOrMoreSpanningOverlaps(const BEDPE &a, vector<BED> & } spanLength = spanEnd - spanStart; - // get the hits for the span - bedB->FindOverlapsPerBin(a.chrom1, spanStart, spanEnd, a.strand1, hits, this->forceStrand); - - vector<BED>::const_iterator h = hits.begin(); - vector<BED>::const_iterator hitsEnd = hits.end(); - for (; h != hitsEnd; ++h) { - - int s = max(spanStart, h->start); - int e = min(spanEnd, h->end); - int overlapBases = (e - s); // the number of overlapping bases b/w a and b - int spanLength = (spanEnd - spanStart); // the length of a in b.p. - - // is there enough overlap relative to the user's request? (default ~ 1bp) - if ( ( (float) overlapBases / (float) spanLength ) >= this->overlapFraction ) { - return true; - } - } - return false; + overlapFound = bedB->FindOneOrMoreOverlapsPerBin(a.chrom1, spanStart, spanEnd, a.strand1, + this->forceStrand, this->overlapFraction); + + return overlapFound; } @@ -459,9 +401,8 @@ void BedIntersectPE::IntersectBamPE(string bamFile) { BEDPE a; ConvertBamToBedPE(bam, refs, a); // look for overlaps, and write to BAM if >=1 were found - overlapsFound = FindOneOrMoreSpanningOverlaps(a, hits, this->searchType); - if (overlapsFound == false) writer.SaveAlignment(bam); - hits.clear(); + overlapsFound = FindOneOrMoreSpanningOverlaps(a, this->searchType); + if (overlapsFound == true) writer.SaveAlignment(bam); } else if ( IsCorrectMappingForBEDPE(bam) ) { // BEDPE output BEDPE a; @@ -480,9 +421,8 @@ void BedIntersectPE::IntersectBamPE(string bamFile) { BEDPE a; ConvertBamToBedPE(bam, refs, a); // write to BAM if there were no overlaps - overlapsFound = FindOneOrMoreSpanningOverlaps(a, hits, this->searchType); + overlapsFound = FindOneOrMoreSpanningOverlaps(a, this->searchType); if (overlapsFound == false) writer.SaveAlignment(bam); - hits.clear(); } else if ( IsCorrectMappingForBEDPE(bam) ) { // BEDPE output BEDPE a; @@ -497,13 +437,14 @@ void BedIntersectPE::IntersectBamPE(string bamFile) { else if (this->bamOutput == true) writer.SaveAlignment(bam); } else if ( (this->searchType == "either") || (this->searchType == "xor") || - (this->searchType == "both") || (this->searchType == "notboth") ) { + (this->searchType == "both") || (this->searchType == "notboth") || + (this->searchType == "neither") ) { if (this->bamOutput == true) { // BAM output BEDPE a; ConvertBamToBedPE(bam, refs, a); // write to BAM if correct hits found - overlapsFound = FindOneOrMoreOverlaps(a, hits1, hits2, this->searchType); + overlapsFound = FindOneOrMoreOverlaps(a, this->searchType); if (overlapsFound == true) writer.SaveAlignment(bam); } else if ( IsCorrectMappingForBEDPE(bam) ) { // BEDPE output @@ -514,23 +455,6 @@ void BedIntersectPE::IntersectBamPE(string bamFile) { hits2.clear(); } } - else if (this->searchType == "neither") { // BAM output - if (this->bamOutput == true) { - BEDPE a; ConvertBamToBedPE(bam, refs, a); - // write to BAM if not hits found - overlapsFound = FindOneOrMoreOverlaps(a, hits1, hits2, this->searchType); - if (overlapsFound == true) { - writer.SaveAlignment(bam); - } - } - else if ( IsCorrectMappingForBEDPE(bam) ) { // BEDPE output - BEDPE a; - ConvertBamToBedPE(bam, refs, a); - FindOverlaps(a, hits1, hits2, this->searchType); - hits1.clear(); - hits2.clear(); - } - } } reader.Close(); if (this->bamOutput == true) { diff --git a/src/pairToBed/pairToBed.h b/src/pairToBed/pairToBed.h index 665dd13f..bc938f9d 100755 --- a/src/pairToBed/pairToBed.h +++ b/src/pairToBed/pairToBed.h @@ -48,11 +48,12 @@ public: // destructor ~BedIntersectPE(void); - void FindOverlaps(const BEDPE &, vector<BED> &hits1, vector<BED> &hits2, string &type); - bool FindOneOrMoreOverlaps(const BEDPE &, vector<BED> &hits1, vector<BED> &hits2, string &type); + void FindOverlaps(const BEDPE &, vector<BED> &hits1, vector<BED> &hits2, const string &type); + + bool FindOneOrMoreOverlaps(const BEDPE &, const string &type); - void FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, string &type); - bool FindOneOrMoreSpanningOverlaps(const BEDPE &a, vector<BED> &hits, string &type); + void FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, const string &type); + bool FindOneOrMoreSpanningOverlaps(const BEDPE &a, const string &type); void IntersectBedPE(istream &bedInput); void IntersectBamPE(string bamFile); @@ -84,7 +85,7 @@ private: if (bam.IsMapped()) { a.chrom1 = refs.at(bam.RefID).RefName; a.start1 = bam.Position; - a.end1 = bam.Position + bam.Length; + a.end1 = bam.Position + bam.AlignedBases.size(); a.strand1 = "+"; if (bam.IsReverseStrand()) a.strand1 = "-"; } @@ -93,11 +94,11 @@ private: if (bam.IsMateMapped()) { a.chrom2 = refs.at(bam.MateRefID).RefName; a.start2 = bam.MatePosition; - a.end2 = bam.MatePosition + bam.Length; + a.end2 = bam.MatePosition + bam.AlignedBases.size(); a.strand2 = "+"; if (bam.IsMateReverseStrand()) a.strand2 = "-"; } - + a.name = bam.Name; a.score = ToString(bam.MapQuality); }; diff --git a/src/utils/bedFile/bedFile.cpp b/src/utils/bedFile/bedFile.cpp index 6de44618..be87fcb9 100755 --- a/src/utils/bedFile/bedFile.cpp +++ b/src/utils/bedFile/bedFile.cpp @@ -101,6 +101,45 @@ void BedFile::FindOverlapsPerBin(string chrom, int start, int end, string strand startBin = (start >> _binFirstShift); endBin = ((end-1) >> _binFirstShift); + // loop through each bin "level" in the binning hierarchy + for (int i = 0; i < 6; ++i) { + + // loop through each bin at this level of the hierarchy + int offset = binOffsetsExtended[i]; + for (int j = (startBin+offset); j <= (endBin+offset); ++j) { + + // loop through each feature in this chrom/bin and see if it overlaps + // with the feature that was passed in. if so, add the feature to + // the list of hits. + vector<BED>::const_iterator bedItr = bedMap[chrom][j].begin(); + vector<BED>::const_iterator bedEnd = bedMap[chrom][j].end(); + + for (; bedItr != bedEnd; ++bedItr) { + // do we have sufficient overlap? + if (overlaps(bedItr->start, bedItr->end, start, end) > 0) { + // skip the hit if not on the same strand (and we care) + if (forceStrand == false) hits.push_back(*bedItr); + else if ( (forceStrand == true) && (strand == bedItr->strand)) { + hits.push_back(*bedItr); + } + } + } + } + startBin >>= _binNextShift; + endBin >>= _binNextShift; + } +} + + +bool BedFile::FindOneOrMoreOverlapsPerBin(string chrom, int start, int end, string strand, + bool forceStrand, float overlapFraction) { + + int startBin, endBin; + startBin = (start >> _binFirstShift); + endBin = ((end-1) >> _binFirstShift); + + int aLength = (end - start); + // loop through each bin "level" in the binning hierarchy for (int i = 0; i < 6; ++i) { @@ -114,23 +153,74 @@ void BedFile::FindOverlapsPerBin(string chrom, int start, int end, string strand vector<BED>::const_iterator bedItr = bedMap[chrom][j].begin(); vector<BED>::const_iterator bedEnd = bedMap[chrom][j].end(); for (; bedItr != bedEnd; ++bedItr) { - - // skip the hit if not on the same strand (and we care) - if (forceStrand && (strand != bedItr->strand)) { - continue; - } - else if (overlaps(bedItr->start, bedItr->end, start, end) > 0) { - hits.push_back(*bedItr); // it's a hit, add it. - } - + int s = max(start, bedItr->start); + int e = min(end, bedItr->end); + // the number of overlapping bases b/w a and b + int overlapBases = (e - s); + + // do we have sufficient overlap? + if ( (float) overlapBases / (float) aLength >= overlapFraction) { + // skip the hit if not on the same strand (and we care) + if (forceStrand == false) return true; + else if ( (forceStrand == true) && (strand == bedItr->strand)) { + return true; + } + } } } startBin >>= _binNextShift; endBin >>= _binNextShift; } + return false; } +bool BedFile::FindOneOrMoreReciprocalOverlapsPerBin(string chrom, int start, int end, string strand, + bool forceStrand, float overlapFraction) { + + int startBin, endBin; + startBin = (start >> _binFirstShift); + endBin = ((end-1) >> _binFirstShift); + + int aLength = (end - start); + + // loop through each bin "level" in the binning hierarchy + for (int i = 0; i < 6; ++i) { + + // loop through each bin at this level of the hierarchy + int offset = binOffsetsExtended[i]; + for (int j = (startBin+offset); j <= (endBin+offset); ++j) { + + // loop through each feature in this chrom/bin and see if it overlaps + // with the feature that was passed in. if so, add the feature to + // the list of hits. + vector<BED>::const_iterator bedItr = bedMap[chrom][j].begin(); + vector<BED>::const_iterator bedEnd = bedMap[chrom][j].end(); + for (; bedItr != bedEnd; ++bedItr) { + int s = max(start, bedItr->start); + int e = min(end, bedItr->end); + // the number of overlapping bases b/w a and b + int overlapBases = (e - s); + + // do we have sufficient overlap? + if ( (float) overlapBases / (float) aLength >= overlapFraction) { + int bLength = (bedItr->end - bedItr->start); + float bOverlap = ( (float) overlapBases / (float) bLength ); + if ((forceStrand == false) && (bOverlap >= overlapFraction)) { + return true; + } + else if ( (forceStrand == true) && (strand == bedItr->strand) && (bOverlap >= overlapFraction)) { + return true; + } + } + } + } + startBin >>= _binNextShift; + endBin >>= _binNextShift; + } + return false; +} + void BedFile::countHits(const BED &a, bool forceStrand) { @@ -202,7 +292,7 @@ bool BedFile::parseLine (BED &bed, const vector<string> &lineVector, int &lineNu char *p2End, *p3End, *p4End, *p5End; long l2, l3, l4, l5; - if ((lineVector[0] != "track") && (lineVector[0] != "browser") && (lineVector[0].find("#") == string::npos) ) { + if ((lineVector[0].find("track") == string::npos) && (lineVector[0].find("browser") && string::npos) && (lineVector[0].find("#") == string::npos) ) { // we need at least 3 columns if (lineVector.size() >= 3) { @@ -515,7 +605,10 @@ void BedFile::loadBedFileIntoMap() { int bin = getBin(bedEntry.start, bedEntry.end); bedEntry.count = 0; bedEntry.minOverlapStart = INT_MAX; - this->bedMap[bedEntry.chrom][bin].push_back(bedEntry); + + //string key = getChromBinKey(bedEntry.chrom, bin); + //this->bedMap[key].push_back(bedEntry); + this->bedMap[bedEntry.chrom][bin].push_back(bedEntry); } bedFields.clear(); } @@ -531,7 +624,7 @@ void BedFile::loadBedFileIntoMap() { int bin = getBin(bedEntry.start, bedEntry.end); bedEntry.count = 0; bedEntry.minOverlapStart = INT_MAX; - this->bedMap[bedEntry.chrom][bin].push_back(bedEntry); + this->bedMap[bedEntry.chrom][bin].push_back(bedEntry); } bedFields.clear(); } diff --git a/src/utils/bedFile/bedFile.h b/src/utils/bedFile/bedFile.h index 06838f44..08be97e1 100755 --- a/src/utils/bedFile/bedFile.h +++ b/src/utils/bedFile/bedFile.h @@ -71,6 +71,7 @@ struct BED { // return the genome "bin" for a feature with this start and end int getBin(int start, int end); + // return the amount of overlap between two features. Negative if none and the the // number of negative bases is the distance between the two. @@ -118,12 +119,16 @@ bool byChromThenStart(BED const &a, BED const &b); //************************************************* // Common typedefs //************************************************* +typedef vector<BED> bedVector; + typedef map<int, vector<BED>, std::less<int> > binsToBeds; //typedef tr1::unordered_map<int, vector<BED> > binsToBeds; typedef map<string, binsToBeds, std::less<string> > masterBedMap; //typedef tr1::unordered_map<string, binsToBeds> masterBedMap; +//typedef map<string, bedVector, std::less<string> > masterBedMap; + typedef map<string, vector<BED>, std::less<string> > masterBedMapNoBin; @@ -152,6 +157,15 @@ public: // as the single feature. Note: Adapted from kent source "binKeeperFind" void FindOverlapsPerBin(string chrom, int start, int end, string strand, vector<BED> &hits, bool forceStrand); + // return true if at least one overlap was found. otherwise, return false. + bool FindOneOrMoreOverlapsPerBin(string chrom, int start, int end, string strand, + bool forceStrand, float overlapFraction); + + // return true if at least one __reciprocal__ overlap was found. otherwise, return false. + bool FindOneOrMoreReciprocalOverlapsPerBin(string chrom, int start, int end, string strand, + bool forceStrand, float overlapFraction); + + // Given a chrom, start, end and strand for a single feature, // increment a the number of hits for each feature in B file // that the feature overlaps diff --git a/src/utils/bedFilePE/bedFilePE.cpp b/src/utils/bedFilePE/bedFilePE.cpp index ca0a8d6f..2a675f77 100755 --- a/src/utils/bedFilePE/bedFilePE.cpp +++ b/src/utils/bedFilePE/bedFilePE.cpp @@ -15,31 +15,6 @@ #include "bedFilePE.h" -/*void BedFilePE::binKeeperFind(map<int, vector<BED>, std::less<int> > &bk, const int start, const int end, vector<BED> &hits) { - - int startBin, endBin; - startBin = (start >>_binFirstShift); - endBin = ((end-1) >>_binFirstShift); - - for (int i = 0; i < 6; ++i) { - int offset = binOffsetsExtended[i]; - - for (int j = (startBin+offset); j <= (endBin+offset); ++j) { - for (vector<BED>::const_iterator el = bk[j].begin(); el != bk[j].end(); ++el) { - - if (overlaps(el->start, el->end, start, end) > 0) { - hits.push_back(*el); - } - - } - } - startBin >>= _binNextShift; - endBin >>= _binNextShift; - } -} -*/ - - /* Adapted from kent source "binKeeperFind" */ @@ -267,7 +242,7 @@ bool BedFilePE::parseBedPELine (BEDPE &bed, const vector<string> &lineVector, co bed.strand1 = lineVector[8]; bed.strand2 = lineVector[9]; - for (unsigned int i = 6; i < lineVector.size(); ++i) { + for (unsigned int i = 10; i < lineVector.size(); ++i) { bed.otherFields.push_back(lineVector[i]); } return true; @@ -360,7 +335,7 @@ bool BedFilePE::parseBedPELine (BEDPE &bed, const vector<string> &lineVector, co bed.strand1 = lineVector[8]; bed.strand2 = lineVector[9]; - for (unsigned int i = 6; i < lineVector.size(); ++i) { + for (unsigned int i = 10; i < lineVector.size(); ++i) { bed.otherFields.push_back(lineVector[i]); } return true; @@ -433,9 +408,9 @@ void BedFilePE::loadBedPEFileIntoMap() { // load end1 into a UCSC bin map bin1 = getBin(bedEntry1.start, bedEntry1.end); this->bedMapEnd1[bedEntry1.chrom][bin1].push_back(bedEntry1); - + // load end2 into a UCSC bin map - bin2 = getBin(bedEntry2.start, bedEntry2.end); + bin2 = getBin(bedEntry2.start, bedEntry2.end); this->bedMapEnd2[bedEntry2.chrom][bin2].push_back(bedEntry2); } bedFields.clear(); @@ -459,7 +434,7 @@ void BedFilePE::loadBedPEFileIntoMap() { // load end2 into a UCSC bin map bin2 = getBin(bedEntry2.start, bedEntry2.end); - this->bedMapEnd2[bedEntry2.chrom][bin2].push_back(bedEntry2); + this->bedMapEnd2[bedEntry2.chrom][bin2].push_back(bedEntry2); } bedFields.clear(); } -- GitLab