From 2fe5a837157814b2964a4968d4c2ca2d70af2db3 Mon Sep 17 00:00:00 2001
From: Aaron <aaronquinlan@gmail.com>
Date: Fri, 12 Feb 2010 15:55:17 -0500
Subject: [PATCH] Fixed several bugs and improved BAM performance.

	1. Fixed bug to "re-allow" track and "browser" lines.
	2. Fixed bug in reporting BEDPE overlaps.
	3. Fixed bug when using type "notboth" with BAM files in pairToBed.
	4. When comparing BAM files to BED/GFF annotations with intersectBed or pairToBed, the __aligned__ sequence is used, rather than the __original__ sequence.
---
 src/intersectBed/intersectBed.cpp |  51 +++------
 src/intersectBed/intersectBed.h   |   2 +-
 src/pairToBed/pairToBed.cpp       | 166 ++++++++----------------------
 src/pairToBed/pairToBed.h         |  15 +--
 src/utils/bedFile/bedFile.cpp     | 117 ++++++++++++++++++---
 src/utils/bedFile/bedFile.h       |  14 +++
 src/utils/bedFilePE/bedFilePE.cpp |  35 +------
 7 files changed, 193 insertions(+), 207 deletions(-)

diff --git a/src/intersectBed/intersectBed.cpp b/src/intersectBed/intersectBed.cpp
index d3be55c1..d9df8815 100755
--- a/src/intersectBed/intersectBed.cpp
+++ b/src/intersectBed/intersectBed.cpp
@@ -137,39 +137,17 @@ bool BedIntersect::FindOverlaps(const BED &a, vector<BED> &hits) {
 }
 
 
-bool BedIntersect::FindOneOrMoreOverlap(const BED &a, vector<BED> &hits) {
-	
-	// grab _all_ of the features in B that overlap with a.
-	bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, this->forceStrand); 
-	
-	// loop through the hits and report those that meet the user's criteria
-	vector<BED>::const_iterator h = hits.begin();
-	vector<BED>::const_iterator hitsEnd = hits.end();
-	for (; h != hitsEnd; ++h) {
-		
-		int s = max(a.start, h->start);
-		int e = min(a.end, h->end);
-		int overlapBases = (e - s);				// the number of overlapping bases b/w a and b
-		int aLength = (a.end - a.start);		// the length of a in b.p.
-		
-		// is there enough overlap relative to the user's request? (default ~ 1bp)
-		if ( ( (float) overlapBases / (float) aLength ) >= this->overlapFraction ) { 
-		
-			// Report the hit if the user doesn't care about reciprocal overlap between A and B.
-			if (!reciprocal) {
-				return true;
-			}
-			else {			// the user wants there to be sufficient reciprocal overlap
-				int bLength = (h->end - h->start);
-				float bOverlap = ( (float) overlapBases / (float) bLength );
-			
-				if (bOverlap >= this->overlapFraction) {
-					return true;
-				}
-			}
-		}
+bool BedIntersect::FindOneOrMoreOverlap(const BED &a) {
+	bool overlapsFound;
+	if (this->reciprocal == false) {
+		overlapsFound = bedB->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, 
+			this->forceStrand, this->overlapFraction); 
+	}
+	else {
+		overlapsFound = bedB->FindOneOrMoreReciprocalOverlapsPerBin(a.chrom, a.start, a.end, a.strand, 
+			this->forceStrand, this->overlapFraction);
 	}
-	return false;
+	return overlapsFound;
 }
  
 
@@ -239,8 +217,9 @@ void BedIntersect::IntersectBam(string bamFile) {
 			BED a;
 			a.chrom = refs.at(bam.RefID).RefName;
 			a.start = bam.Position;
-			a.end = bam.Position + bam.Length;
+			a.end = bam.Position + bam.AlignedBases.size();
 
+			// build the name field from the BAM alignment.
 			a.name = bam.Name;
 			if (bam.IsFirstMate()) a.name += "/1";
 			if (bam.IsSecondMate()) a.name += "/2";
@@ -249,7 +228,7 @@ void BedIntersect::IntersectBam(string bamFile) {
 			a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-"; 
 	
 			if (this->bamOutput == true) {
-				overlapsFound = FindOneOrMoreOverlap(a, hits);
+				overlapsFound = FindOneOrMoreOverlap(a);
 				if (overlapsFound == true) {
 					if (!this->noHit) writer.SaveAlignment(bam);
 				}
@@ -258,9 +237,9 @@ void BedIntersect::IntersectBam(string bamFile) {
 				}
 			}
 			else {
-				overlapsFound = FindOverlaps(a, hits);				
+				overlapsFound = FindOverlaps(a, hits);
+				hits.clear();
 			}
-			hits.clear();
 		}
 	}
 	
diff --git a/src/intersectBed/intersectBed.h b/src/intersectBed/intersectBed.h
index 2c453bb7..656e45f9 100755
--- a/src/intersectBed/intersectBed.h
+++ b/src/intersectBed/intersectBed.h
@@ -46,7 +46,7 @@ public:
 	void reportB(const BED &);
 
 	bool FindOverlaps(const BED &a, vector<BED> &hits);
-	bool FindOneOrMoreOverlap(const BED &a, vector<BED> &hits);
+	bool FindOneOrMoreOverlap(const BED &a);
 	
 	void IntersectBed(istream &bedInput);
 	void IntersectBam(string bamFile);
diff --git a/src/pairToBed/pairToBed.cpp b/src/pairToBed/pairToBed.cpp
index 260ea33f..f9fc7fde 100755
--- a/src/pairToBed/pairToBed.cpp
+++ b/src/pairToBed/pairToBed.cpp
@@ -57,7 +57,7 @@ BedIntersectPE::~BedIntersectPE(void) {
 
 
 
-void BedIntersectPE::FindOverlaps(const BEDPE &a, vector<BED> &hits1, vector<BED> &hits2, string &type) {
+void BedIntersectPE::FindOverlaps(const BEDPE &a, vector<BED> &hits1, vector<BED> &hits2, const string &type) {
 
 	// list of hits on each end of BEDPE
 	// that exceed the requested overlap fraction
@@ -163,107 +163,62 @@ void BedIntersectPE::FindOverlaps(const BEDPE &a, vector<BED> &hits1, vector<BED
 }
 
 
-bool BedIntersectPE::FindOneOrMoreOverlaps(const BEDPE &a, vector<BED> &hits1, vector<BED> &hits2, string &type) {
+bool BedIntersectPE::FindOneOrMoreOverlaps(const BEDPE &a, const string &type) {
 
-	// list of hits on each end of BEDPE
+	// flags for the existence of hits on each end of BEDPE
 	// that exceed the requested overlap fraction
-	vector<BED> qualityHits1;
-	vector<BED> qualityHits2;
+	bool end1Found = false;
+	bool end2Found = false;
 
-	// count of hits on each end of BEDPE
-	// that exceed the requested overlap fraction
-	int numOverlapsEnd1 = 0;
-	int numOverlapsEnd2 = 0;
-
-	/* 
-	   Look for overlaps in end 1
-	*/
-	
-	// make sure we have a valid chromosome before we search
+	// Look for overlaps in end 1 assuming we have an aligned chromosome.
 	if (a.chrom1 != ".") {
-		// Find the quality hits between ***end1*** of the BEDPE and the B BED file
-		bedB->FindOverlapsPerBin(a.chrom1, a.start1, a.end1, a.strand1, hits1, this->forceStrand);
-	
-		vector<BED>::const_iterator h = hits1.begin();
-		vector<BED>::const_iterator hitsEnd = hits1.end();
-		for (; h != hitsEnd; ++h) {
-	
-			int s = max(a.start1, h->start);
-			int e = min(a.end1, h->end);
-			int overlapBases = (e - s);				// the number of overlapping bases b/w a and b
-			int aLength = (a.end1 - a.start1);		// the length of a in b.p.
-		
-			// is there enough overlap relative to the user's request? (default ~ 1bp)
-			if ( ( (float) overlapBases / (float) aLength ) >= this->overlapFraction ) {
-				numOverlapsEnd1++;
-				qualityHits1.push_back(*h);
-				
-				if (type == "either") return true;
-				else if (type == "neither") return false;
-			}
-		}
+		end1Found = bedB->FindOneOrMoreOverlapsPerBin(a.chrom1, a.start1, a.end1, a.strand1, 
+			this->forceStrand, this->overlapFraction);
+
+		// can we bail out without checking end2?
+		if ((type == "either") && (end1Found == true)) return true;
+		else if ((type == "neither") && (end1Found == true)) return false;
+		else if ((type == "notboth") && (end1Found == false)) return true;
+		else if ((type == "both") && (end1Found == false)) return false;
 	}
-	
-	// if testing for "notboth" or "both", we can bail early depending on end one
-	if ((type == "notboth") && (numOverlapsEnd1 == 0)) return true;
-	else if ((type == "both") && (numOverlapsEnd1 == 0)) return false;
-	
-	/* 
-	   Now look for overlaps in end 2
-	*/
-	
-	// make sure we have a valid chromosome before we search
+		
+	// Now look for overlaps in end 2 assuming we have an aligned chromosome.
 	if (a.chrom2 != ".") {
-		// Now find the quality hits between ***end2*** of the BEDPE and the B BED file
-		bedB->FindOverlapsPerBin(a.chrom2, a.start2, a.end2, a.strand2, hits2, this->forceStrand);
-	
-		vector<BED>::const_iterator h = hits2.begin();
-		vector<BED>::const_iterator hitsEnd = hits2.end();
-		for (; h != hitsEnd; ++h) {
-	
-			int s = max(a.start2, h->start);
-			int e = min(a.end2, h->end);
-			int overlapBases = (e - s);				// the number of overlapping bases b/w a and b
-			int aLength = (a.end2 - a.start2);		// the length of a in b.p.
-
-			// is there enough overlap relative to the user's request? (default ~ 1bp)
-			if ( ( (float) overlapBases / (float) aLength ) >= this->overlapFraction ) {
-				numOverlapsEnd2++;
-				qualityHits2.push_back(*h);
-				
-				if (type == "either") return true;
-				else if (type == "neither") return false;
-				else if ((type == "notboth") && (numOverlapsEnd1 > 0)) return false;	
-			}
-		}
+		end2Found = bedB->FindOneOrMoreOverlapsPerBin(a.chrom2, a.start2, a.end2, a.strand2, 
+			this->forceStrand, this->overlapFraction);
+			
+		if ((type == "either") && (end2Found == true)) return true;
+		else if ((type == "neither") && (end2Found == true)) return false;
+		else if ((type == "notboth") && (end2Found == false)) return true;
+		else if ((type == "both") && (end2Found == false)) return false;		
 	}
 	
 	// Now report the hits depending on what the user has requested.
 	if (type == "notboth") {
-		if ( (numOverlapsEnd1 == 0) || (numOverlapsEnd2 == 0) ) return true;
+		if ( (end1Found == false) || (end2Found == false) ) return true;
 		else return false;
 	}
+	else if (type == "either") {
+		if ( (end1Found == false) && (end2Found == false) ) return false;
+	}
 	else if (type == "neither") {
-		if ( (numOverlapsEnd1 == 0) && (numOverlapsEnd2 == 0) ) return true;
+		if ( (end1Found == false) && (end2Found == false) ) return true;
 		else return false;		
 	}
 	else if (type == "xor") {
-		if ( (numOverlapsEnd1 > 0) && (numOverlapsEnd2 == 0) ) return true;
-		else if ( (numOverlapsEnd1 == 0) && (numOverlapsEnd2 > 0) ) return true;
+		if ( (end1Found == true) && (end2Found == false) ) return true;
+		else if ( (end1Found == false) && (end2Found == true) ) return true;
 		else return false;
 	}
 	else if (type == "both") {
-		if ( (numOverlapsEnd1 > 0) && (numOverlapsEnd2 > 0) ) return true;
+		if ( (end1Found == true) && (end2Found == true) ) return true;
 		return false;
 	}
 	return false;
 }
 
 
-
-
-
-void BedIntersectPE::FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, string &type) {
+void BedIntersectPE::FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, const string &type) {
 
 	// count of hits on _between_ end of BEDPE
 	// that exceed the requested overlap fraction
@@ -318,12 +273,13 @@ void BedIntersectPE::FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, str
 }
 
 
-bool BedIntersectPE::FindOneOrMoreSpanningOverlaps(const BEDPE &a, vector<BED> &hits, string &type) {
+bool BedIntersectPE::FindOneOrMoreSpanningOverlaps(const BEDPE &a, const string &type) {
 
 	int spanStart = 0;
 	int spanEnd = 0;
 	int spanLength = 0;
-
+	bool overlapFound;
+	
 	if ((type == "ispan") || (type == "notispan")) {
 		spanStart = a.end1;
 		spanEnd = a.start2;
@@ -342,24 +298,10 @@ bool BedIntersectPE::FindOneOrMoreSpanningOverlaps(const BEDPE &a, vector<BED> &
 	}
 	spanLength = spanEnd - spanStart;
 
-	// get the hits for the span	
-	bedB->FindOverlapsPerBin(a.chrom1, spanStart, spanEnd, a.strand1, hits, this->forceStrand);
-	
-	vector<BED>::const_iterator h = hits.begin();
-	vector<BED>::const_iterator hitsEnd = hits.end();
-	for (; h != hitsEnd; ++h) {
-	
-		int s = max(spanStart, h->start);
-		int e = min(spanEnd, h->end);
-		int overlapBases = (e - s);					// the number of overlapping bases b/w a and b
-		int spanLength = (spanEnd - spanStart);		// the length of a in b.p.
-		
-		// is there enough overlap relative to the user's request? (default ~ 1bp)
-		if ( ( (float) overlapBases / (float) spanLength ) >= this->overlapFraction ) {
-			return true;
-		}
-	}
-	return false;
+	overlapFound = bedB->FindOneOrMoreOverlapsPerBin(a.chrom1, spanStart, spanEnd, a.strand1, 
+		this->forceStrand, this->overlapFraction);
+
+	return overlapFound;
 }
 
  
@@ -459,9 +401,8 @@ void BedIntersectPE::IntersectBamPE(string bamFile) {
 						BEDPE a;  ConvertBamToBedPE(bam, refs, a);
 						
 						// look for overlaps, and write to BAM if >=1 were found	
-						overlapsFound = FindOneOrMoreSpanningOverlaps(a, hits, this->searchType);
-						if (overlapsFound == false) writer.SaveAlignment(bam);
-						hits.clear();
+						overlapsFound = FindOneOrMoreSpanningOverlaps(a, this->searchType);
+						if (overlapsFound == true) writer.SaveAlignment(bam);
 					}
 					else if ( IsCorrectMappingForBEDPE(bam) ) {	// BEDPE output
 						BEDPE a;
@@ -480,9 +421,8 @@ void BedIntersectPE::IntersectBamPE(string bamFile) {
 						BEDPE a;  ConvertBamToBedPE(bam, refs, a);
 						
 						// write to BAM if there were no overlaps
-						overlapsFound = FindOneOrMoreSpanningOverlaps(a, hits, this->searchType);
+						overlapsFound = FindOneOrMoreSpanningOverlaps(a, this->searchType);
 						if (overlapsFound == false) writer.SaveAlignment(bam);
-						hits.clear();	
 					}
 					else if ( IsCorrectMappingForBEDPE(bam) ) {	// BEDPE output
 						BEDPE a;
@@ -497,13 +437,14 @@ void BedIntersectPE::IntersectBamPE(string bamFile) {
 			else if (this->bamOutput == true) writer.SaveAlignment(bam);
 		}
 		else if ( (this->searchType == "either") || (this->searchType == "xor") || 
-				  (this->searchType == "both") || (this->searchType == "notboth") ) {
+				  (this->searchType == "both") || (this->searchType == "notboth") ||
+				  (this->searchType == "neither") ) {
 					
 			if (this->bamOutput == true) {	// BAM output
 				BEDPE a;  ConvertBamToBedPE(bam, refs, a);
 
 				// write to BAM if correct hits found
-				overlapsFound = FindOneOrMoreOverlaps(a, hits1, hits2, this->searchType);
+				overlapsFound = FindOneOrMoreOverlaps(a, this->searchType);
 				if (overlapsFound == true) writer.SaveAlignment(bam);
 			}
 			else if ( IsCorrectMappingForBEDPE(bam) ) {	// BEDPE output
@@ -514,23 +455,6 @@ void BedIntersectPE::IntersectBamPE(string bamFile) {
 				hits2.clear();
 			}
 		}
-		else if (this->searchType == "neither") {	// BAM output
-			if (this->bamOutput == true) {
-				BEDPE a;  ConvertBamToBedPE(bam, refs, a);
-				// write to BAM if not hits found
-				overlapsFound = FindOneOrMoreOverlaps(a, hits1, hits2, this->searchType);
-				if (overlapsFound == true) {		
-					writer.SaveAlignment(bam);
-				}
-			}
-			else if ( IsCorrectMappingForBEDPE(bam) ) {  // BEDPE output
-				BEDPE a;
-				ConvertBamToBedPE(bam, refs, a);
-				FindOverlaps(a, hits1, hits2, this->searchType);
-				hits1.clear();
-				hits2.clear();
-			}
-		}
 	}
 	reader.Close();
 	if (this->bamOutput == true) {
diff --git a/src/pairToBed/pairToBed.h b/src/pairToBed/pairToBed.h
index 665dd13f..bc938f9d 100755
--- a/src/pairToBed/pairToBed.h
+++ b/src/pairToBed/pairToBed.h
@@ -48,11 +48,12 @@ public:
 	// destructor
 	~BedIntersectPE(void);
 
-	void FindOverlaps(const BEDPE &, vector<BED> &hits1, vector<BED> &hits2, string &type); 
-	bool FindOneOrMoreOverlaps(const BEDPE &, vector<BED> &hits1, vector<BED> &hits2, string &type); 
+	void FindOverlaps(const BEDPE &, vector<BED> &hits1, vector<BED> &hits2, const string &type); 
+	
+	bool FindOneOrMoreOverlaps(const BEDPE &, const string &type); 
 
-	void FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, string &type); 
-	bool FindOneOrMoreSpanningOverlaps(const BEDPE &a, vector<BED> &hits, string &type);
+	void FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, const string &type); 
+	bool FindOneOrMoreSpanningOverlaps(const BEDPE &a, const string &type);
 
 	void IntersectBedPE(istream &bedInput);
 	void IntersectBamPE(string bamFile);
@@ -84,7 +85,7 @@ private:
 		if (bam.IsMapped()) {
 			a.chrom1 = refs.at(bam.RefID).RefName;
 			a.start1 = bam.Position;
-			a.end1 = bam.Position + bam.Length;
+			a.end1 = bam.Position + bam.AlignedBases.size();
 			a.strand1 = "+";
 			if (bam.IsReverseStrand()) a.strand1 = "-";	
 		}
@@ -93,11 +94,11 @@ private:
 		if (bam.IsMateMapped()) {
 			a.chrom2 = refs.at(bam.MateRefID).RefName;
 			a.start2 = bam.MatePosition;
-			a.end2 = bam.MatePosition + bam.Length;
+			a.end2 = bam.MatePosition + bam.AlignedBases.size();
 			a.strand2 = "+";
 			if (bam.IsMateReverseStrand()) a.strand2 = "-";	
 		}
-
+		
 		a.name = bam.Name;
 		a.score = ToString(bam.MapQuality);
 	};
diff --git a/src/utils/bedFile/bedFile.cpp b/src/utils/bedFile/bedFile.cpp
index 6de44618..be87fcb9 100755
--- a/src/utils/bedFile/bedFile.cpp
+++ b/src/utils/bedFile/bedFile.cpp
@@ -101,6 +101,45 @@ void BedFile::FindOverlapsPerBin(string chrom, int start, int end, string strand
 	startBin = (start >> _binFirstShift);
 	endBin = ((end-1) >> _binFirstShift);
 
+	// loop through each bin "level" in the binning hierarchy
+	for (int i = 0; i < 6; ++i) {
+		
+		// loop through each bin at this level of the hierarchy
+		int offset = binOffsetsExtended[i];
+		for (int j = (startBin+offset); j <= (endBin+offset); ++j)  {
+			
+			// loop through each feature in this chrom/bin and see if it overlaps
+			// with the feature that was passed in.  if so, add the feature to 
+			// the list of hits.
+		 	vector<BED>::const_iterator bedItr = bedMap[chrom][j].begin();
+			vector<BED>::const_iterator bedEnd = bedMap[chrom][j].end();
+
+			for (; bedItr != bedEnd; ++bedItr) {
+				// do we have sufficient overlap?
+				if (overlaps(bedItr->start, bedItr->end, start, end) > 0) {
+					// skip the hit if not on the same strand (and we care)
+					if (forceStrand == false) hits.push_back(*bedItr);
+					else if ( (forceStrand == true) && (strand == bedItr->strand)) {
+						hits.push_back(*bedItr);
+					}
+				}			
+			}
+		}
+		startBin >>= _binNextShift;
+		endBin >>= _binNextShift;
+	}
+}
+
+
+bool BedFile::FindOneOrMoreOverlapsPerBin(string chrom, int start, int end, string strand, 
+	bool forceStrand, float overlapFraction) {
+
+	int startBin, endBin;
+	startBin = (start >> _binFirstShift);
+	endBin = ((end-1) >> _binFirstShift);
+
+	int aLength = (end - start);
+	
 	// loop through each bin "level" in the binning hierarchy
 	for (int i = 0; i < 6; ++i) {
 		
@@ -114,23 +153,74 @@ void BedFile::FindOverlapsPerBin(string chrom, int start, int end, string strand
 			vector<BED>::const_iterator bedItr = bedMap[chrom][j].begin();
 			vector<BED>::const_iterator bedEnd = bedMap[chrom][j].end();
 			for (; bedItr != bedEnd; ++bedItr) {
-				
-				// skip the hit if not on the same strand (and we care)
-				if (forceStrand && (strand != bedItr->strand)) {
-					continue;
-				}
-				else if (overlaps(bedItr->start, bedItr->end, start, end) > 0) {
-					hits.push_back(*bedItr);	// it's a hit, add it.
-				}
-							
+				int s = max(start, bedItr->start);
+				int e = min(end, bedItr->end);
+				// the number of overlapping bases b/w a and b
+				int overlapBases = (e - s);
+
+				// do we have sufficient overlap?
+				if ( (float) overlapBases / (float) aLength  >= overlapFraction) {					
+					// skip the hit if not on the same strand (and we care)
+					if (forceStrand == false) return true;
+					else if ( (forceStrand == true) && (strand == bedItr->strand)) {
+						return true;
+					}
+				}			
 			}
 		}
 		startBin >>= _binNextShift;
 		endBin >>= _binNextShift;
 	}
+	return false;
 }
 
 
+bool BedFile::FindOneOrMoreReciprocalOverlapsPerBin(string chrom, int start, int end, string strand, 
+	bool forceStrand, float overlapFraction) {
+
+	int startBin, endBin;
+	startBin = (start >> _binFirstShift);
+	endBin = ((end-1) >> _binFirstShift);
+
+	int aLength = (end - start);
+	
+	// loop through each bin "level" in the binning hierarchy
+	for (int i = 0; i < 6; ++i) {
+		
+		// loop through each bin at this level of the hierarchy
+		int offset = binOffsetsExtended[i];
+		for (int j = (startBin+offset); j <= (endBin+offset); ++j)  {
+			
+			// loop through each feature in this chrom/bin and see if it overlaps
+			// with the feature that was passed in.  if so, add the feature to 
+			// the list of hits.
+			vector<BED>::const_iterator bedItr = bedMap[chrom][j].begin();
+			vector<BED>::const_iterator bedEnd = bedMap[chrom][j].end();
+			for (; bedItr != bedEnd; ++bedItr) {
+				int s = max(start, bedItr->start);
+				int e = min(end, bedItr->end);
+				// the number of overlapping bases b/w a and b
+				int overlapBases = (e - s);
+				
+				// do we have sufficient overlap?
+				if ( (float) overlapBases / (float) aLength  >= overlapFraction) {					
+					int bLength = (bedItr->end - bedItr->start);
+					float bOverlap = ( (float) overlapBases / (float) bLength );
+					if ((forceStrand == false) && (bOverlap >= overlapFraction)) {
+						return true;
+					}
+					else if ( (forceStrand == true) && (strand == bedItr->strand) && (bOverlap >= overlapFraction)) {
+						return true;
+					}
+				}			
+			}
+		}
+		startBin >>= _binNextShift;
+		endBin >>= _binNextShift;
+	}
+	return false;
+}
+
 
 void BedFile::countHits(const BED &a, bool forceStrand) {
 
@@ -202,7 +292,7 @@ bool BedFile::parseLine (BED &bed, const vector<string> &lineVector, int &lineNu
 	char *p2End, *p3End, *p4End, *p5End;
 	long l2, l3, l4, l5;
 	
-	if ((lineVector[0] != "track") && (lineVector[0] != "browser") && (lineVector[0].find("#") == string::npos) ) {
+	if ((lineVector[0].find("track") == string::npos) && (lineVector[0].find("browser") && string::npos) && (lineVector[0].find("#") == string::npos) ) {
 
 		// we need at least 3 columns
 		if (lineVector.size() >= 3) {
@@ -515,7 +605,10 @@ void BedFile::loadBedFileIntoMap() {
 				int bin = getBin(bedEntry.start, bedEntry.end);
 				bedEntry.count = 0;
 				bedEntry.minOverlapStart = INT_MAX;
-				this->bedMap[bedEntry.chrom][bin].push_back(bedEntry);	
+
+				//string key = getChromBinKey(bedEntry.chrom, bin);
+				//this->bedMap[key].push_back(bedEntry);	
+				this->bedMap[bedEntry.chrom][bin].push_back(bedEntry);
 			}
 			bedFields.clear();
 		}
@@ -531,7 +624,7 @@ void BedFile::loadBedFileIntoMap() {
 				int bin = getBin(bedEntry.start, bedEntry.end);
 				bedEntry.count = 0;
 				bedEntry.minOverlapStart = INT_MAX;
-				this->bedMap[bedEntry.chrom][bin].push_back(bedEntry);	
+				this->bedMap[bedEntry.chrom][bin].push_back(bedEntry);
 			}
 			bedFields.clear();
 		}
diff --git a/src/utils/bedFile/bedFile.h b/src/utils/bedFile/bedFile.h
index 06838f44..08be97e1 100755
--- a/src/utils/bedFile/bedFile.h
+++ b/src/utils/bedFile/bedFile.h
@@ -71,6 +71,7 @@ struct BED {
 
 // return the genome "bin" for a feature with this start and end
 int getBin(int start, int end);
+
 	
 // return the amount of overlap between two features.  Negative if none and the the 
 // number of negative bases is the distance between the two.
@@ -118,12 +119,16 @@ bool byChromThenStart(BED const &a, BED const &b);
 //*************************************************
 // Common typedefs
 //*************************************************
+typedef vector<BED> bedVector;
+
 typedef map<int, vector<BED>, std::less<int> > binsToBeds;
 //typedef tr1::unordered_map<int, vector<BED> > binsToBeds;
 
 typedef map<string, binsToBeds, std::less<string> > masterBedMap;
 //typedef tr1::unordered_map<string, binsToBeds> masterBedMap;
 
+//typedef map<string, bedVector, std::less<string> > masterBedMap;
+
 typedef map<string, vector<BED>, std::less<string> > masterBedMapNoBin;
 
 
@@ -152,6 +157,15 @@ public:
 	// as the single feature. Note: Adapted from kent source "binKeeperFind"
 	void FindOverlapsPerBin(string chrom, int start, int end, string strand, vector<BED> &hits, bool forceStrand);
 
+	// return true if at least one overlap was found.  otherwise, return false.
+	bool FindOneOrMoreOverlapsPerBin(string chrom, int start, int end, string strand, 
+										bool forceStrand, float overlapFraction);
+
+	// return true if at least one __reciprocal__ overlap was found.  otherwise, return false.
+	bool FindOneOrMoreReciprocalOverlapsPerBin(string chrom, int start, int end, string strand, 
+													bool forceStrand, float overlapFraction);
+	
+
 	// Given a chrom, start, end and strand for a single feature,
 	// increment a the number of hits for each feature in B file
 	// that the feature overlaps
diff --git a/src/utils/bedFilePE/bedFilePE.cpp b/src/utils/bedFilePE/bedFilePE.cpp
index ca0a8d6f..2a675f77 100755
--- a/src/utils/bedFilePE/bedFilePE.cpp
+++ b/src/utils/bedFilePE/bedFilePE.cpp
@@ -15,31 +15,6 @@
 #include "bedFilePE.h"
 
 
-/*void BedFilePE::binKeeperFind(map<int, vector<BED>, std::less<int> > &bk, const int start, const int end, vector<BED> &hits) {
-
-	int startBin, endBin;
-	startBin = (start >>_binFirstShift);
-	endBin = ((end-1) >>_binFirstShift);
-	
-	for (int i = 0; i < 6; ++i) {
-		int offset = binOffsetsExtended[i];
-
-		for (int j = (startBin+offset); j <= (endBin+offset); ++j)  {
-			for (vector<BED>::const_iterator el = bk[j].begin(); el != bk[j].end(); ++el) {
-				
-				if (overlaps(el->start, el->end, start, end) > 0) {
-					hits.push_back(*el);
-				}
-				
-			}
-		}
-		startBin >>= _binNextShift;
-		endBin >>= _binNextShift;
-	}
-}
-*/
-
-
 /*
 	Adapted from kent source "binKeeperFind"
 */
@@ -267,7 +242,7 @@ bool BedFilePE::parseBedPELine (BEDPE &bed, const vector<string> &lineVector, co
 			bed.strand1 = lineVector[8];
 			bed.strand2 = lineVector[9];
 			
-			for (unsigned int i = 6; i < lineVector.size(); ++i) {
+			for (unsigned int i = 10; i < lineVector.size(); ++i) {
 				bed.otherFields.push_back(lineVector[i]); 
 			}
 			return true;
@@ -360,7 +335,7 @@ bool BedFilePE::parseBedPELine (BEDPE &bed, const vector<string> &lineVector, co
 			bed.strand1 = lineVector[8];
 			bed.strand2 = lineVector[9];
 			
-			for (unsigned int i = 6; i < lineVector.size(); ++i) {
+			for (unsigned int i = 10; i < lineVector.size(); ++i) {
 				bed.otherFields.push_back(lineVector[i]); 
 			}
 			return true;
@@ -433,9 +408,9 @@ void BedFilePE::loadBedPEFileIntoMap() {
 				// load end1 into a UCSC bin map
 				bin1 = getBin(bedEntry1.start, bedEntry1.end);
 				this->bedMapEnd1[bedEntry1.chrom][bin1].push_back(bedEntry1);	
-
+				
 				// load end2 into a UCSC bin map
-				bin2 = getBin(bedEntry2.start, bedEntry2.end);				
+				bin2 = getBin(bedEntry2.start, bedEntry2.end);
 				this->bedMapEnd2[bedEntry2.chrom][bin2].push_back(bedEntry2);		
 			}
 			bedFields.clear();
@@ -459,7 +434,7 @@ void BedFilePE::loadBedPEFileIntoMap() {
 
 					// load end2 into a UCSC bin map
 					bin2 = getBin(bedEntry2.start, bedEntry2.end);				
-					this->bedMapEnd2[bedEntry2.chrom][bin2].push_back(bedEntry2);
+					this->bedMapEnd2[bedEntry2.chrom][bin2].push_back(bedEntry2);		
 				}
 				bedFields.clear();			
 			}
-- 
GitLab