From 7527f02d666d88701a7b6e0057a2a8ab4915a637 Mon Sep 17 00:00:00 2001
From: Aaron <aaronquinlan@gmail.com>
Date: Thu, 15 Apr 2010 09:45:16 -0400
Subject: [PATCH] Updated all of the tools to use the Open/Get/Close idiom.

	- also standardized the naming convention for private variables.
	- loadIntoMap methods use Open/Get/Close idiom.
---
 src/annotateBed/annotateBed.cpp  | 136 ++++++++++++++++++++++++++++
 src/annotateBed/annotateBed.h    |  53 +++++++++++
 src/annotateBed/annotateMain.cpp | 124 +++++++++++++++++++++++++
 src/closestBed/closestMain.cpp   |   1 +
 src/subtractBed/subtractBed.cpp  |  83 +++++++----------
 src/subtractBed/subtractBed.h    |  29 +++---
 src/subtractBed/subtractMain.cpp |   2 +-
 src/utils/bedFile/bedFile.cpp    | 150 ++++++++-----------------------
 src/utils/bedFile/bedFile.h      |  19 ++--
 src/utils/gzstream/gzstream.h    | 121 +++++++++++++++++++++++++
 src/windowBed/windowBed.cpp      | 103 ++++++++-------------
 src/windowBed/windowBed.h        |  34 ++++---
 src/windowBed/windowMain.cpp     |   2 +-
 13 files changed, 582 insertions(+), 275 deletions(-)
 create mode 100755 src/annotateBed/annotateBed.cpp
 create mode 100755 src/annotateBed/annotateBed.h
 create mode 100755 src/annotateBed/annotateMain.cpp
 create mode 100644 src/utils/gzstream/gzstream.h

diff --git a/src/annotateBed/annotateBed.cpp b/src/annotateBed/annotateBed.cpp
new file mode 100755
index 00000000..b270c8e3
--- /dev/null
+++ b/src/annotateBed/annotateBed.cpp
@@ -0,0 +1,136 @@
+/*****************************************************************************
+  annotateBed.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0+ license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "annotateBed.h"
+
+
+BedAnnotate::BedAnnotate (const string &bedFile, const vector<string> &annotationFiles, bool &forceStrand) {
+
+	this->bedFile = bedFile;
+	this->annotationFiles = annotationFiles;
+	
+	this->bed = new BedFile(bedFile);
+	this->forceStrand = forceStrand;
+}
+
+
+
+BedAnnotate::~BedAnnotate (void) {
+}
+
+
+void BedAnnotate::ProcessAnnotations (istream &bedInput) {
+	
+	// loop through each of the annotation files and compute the coverage
+	// of each with respect to the input BED file.
+	vector<string>::const_iterator annotItr = this->annotationFiles.begin();
+	vector<string>::const_iterator annotEnd = this->annotationFiles.end();
+	for (; annotItr != annotEnd; ++annotItr) {
+		// create a BED file of the current annotation file.
+		BedFile annotation = new BedFile(*annotItr);
+		// compute the coverage of the annotation file with respect to the input file.
+		GetCoverage(annotation, bedInput);
+	}	
+}
+
+ 
+void BedAnnotate::GetCoverage (const BedFile &annotation, istream &bedInput) {
+	
+	// load the annotation bed file into a map so
+	// that we can easily compare "A" to it for overlaps
+	annotation->loadBedFileIntoMap();
+
+	string bedLine;                                                                                                                    
+	int lineNum = 0;					// current input line number
+	vector<string> bedFields;			// vector for a BED entry
+	bedFields.reserve(12);	
+		
+	// process each entry in A
+	while (getline(bedInput, bedLine)) {
+
+		lineNum++;
+		Tokenize(bedLine,bedFields);
+		BED a;
+
+		if (bedA->parseLine(a, bedFields, lineNum)) {	
+			// count a as a hit with all the relevant features in B
+			bedB->countHits(a, this->forceStrand);
+		}	
+		// reset for the next input line
+		bedFields.clear();
+	}
+						
+	// now, report the count of hits for each feature in B.
+	masterBedMap::const_iterator chromItr = bedB->bedMap.begin();
+	masterBedMap::const_iterator chromEnd = bedB->bedMap.end();
+	for (; chromItr != chromEnd; ++chromItr) {
+	
+		binsToBeds::const_iterator binItr = chromItr->second.begin();
+		binsToBeds::const_iterator binEnd = chromItr->second.end();
+		for (; binItr != binEnd; ++binItr) {
+
+			vector<BED>::const_iterator bedItr = binItr->second.begin();
+			vector<BED>::const_iterator bedEnd = binItr->second.end();
+			for (; bedItr != bedEnd; ++bedItr) {
+									
+				int zeroDepthCount = 0;
+				int depth = 0;
+				int start = min(bedItr->minOverlapStart, bedItr->start);
+				
+				for (int pos = start+1; pos <= bedItr->end; pos++) {
+					
+					if (bedItr->depthMap.find(pos) != bedItr->depthMap.end()) {
+						
+						map<unsigned int, DEPTH> dMap = bedItr->depthMap;
+						depth += dMap[pos].starts;
+																				
+						if ((depth == 0) && (pos > bedItr->start) && (pos <= bedItr->end)) {
+							zeroDepthCount++;
+						}
+						
+						depth = depth - dMap[pos].ends;
+					}
+					else {
+						if ((depth == 0) && (pos > bedItr->start) && (pos <= bedItr->end)) {
+							zeroDepthCount++;
+						}
+					}
+				}
+
+				// Report the coverage for the current interval.
+				int length = bedItr->end - bedItr->start;
+				int nonZeroBases =  (length-zeroDepthCount);
+				float fractCovered = (float) nonZeroBases /length;
+				
+				bedB->reportBedTab(*bedItr);
+				printf("%d\t%d\t%d\t%0.7f\n", bedItr->count, nonZeroBases, length, fractCovered);			
+			}
+		}
+	}
+}
+
+
+void BedAnnotate::DetermineBedInput () {
+	if (bedA->bedFile != "stdin") {   // process a file
+		ifstream beds(bedA->bedFile.c_str(), ios::in);
+		if ( !beds ) {
+			cerr << "Error: The requested bed file (" << bedA->bedFile << ") could not be opened. Exiting!" << endl;
+			exit (1);
+		}
+		GetCoverage(beds);
+	}
+	else {   						// process stdin
+		GetCoverage(cin);		
+	}
+}
+
+
diff --git a/src/annotateBed/annotateBed.h b/src/annotateBed/annotateBed.h
new file mode 100755
index 00000000..25d340a0
--- /dev/null
+++ b/src/annotateBed/annotateBed.h
@@ -0,0 +1,53 @@
+/*****************************************************************************
+  coverageBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0+ license.
+******************************************************************************/
+#ifndef	COVERAGEBED_H
+#define COVERAGEBED_H
+
+#include "bedFile.h"
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <stdlib.h>
+
+using namespace std;
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedCoverage {
+
+public:
+
+	// constructor 
+	BedCoverage(string &, string &, bool &);
+
+	// destructor
+	~BedCoverage(void);
+	
+	void GetCoverage(istream &bedInput);
+	
+	void DetermineBedInput();
+	
+private:
+
+	string bedAFile;
+	string bedBFile;
+
+	// instance of a bed file class.
+	BedFile *bedA, *bedB;
+	
+	bool forceStrand;
+
+};
+#endif /* COVERAGEBED_H */
diff --git a/src/annotateBed/annotateMain.cpp b/src/annotateBed/annotateMain.cpp
new file mode 100755
index 00000000..db41fc28
--- /dev/null
+++ b/src/annotateBed/annotateMain.cpp
@@ -0,0 +1,124 @@
+/*****************************************************************************
+  coverageMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0+ license.
+******************************************************************************/
+#include "coverageBed.h"
+#include "version.h"
+
+using namespace std;
+
+// define the version
+#define PROGRAM_NAME "coverageBed"
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+	// our configuration variables
+	bool showHelp = false;
+
+	// input files
+	string bedAFile;
+	string bedBFile;
+	bool forceStrand = false;
+
+	bool haveBedA = false;
+	bool haveBedB = false;
+
+
+	// check to see if we should print out some help
+	if(argc <= 1) showHelp = true;
+
+	for(int i = 1; i < argc; i++) {
+		int parameterLength = (int)strlen(argv[i]);
+
+		if((PARAMETER_CHECK("-h", 2, parameterLength)) || 
+		(PARAMETER_CHECK("--help", 5, parameterLength))) {
+			showHelp = true;
+		}
+	}
+
+	if(showHelp) ShowHelp();
+
+	// do some parsing (all of these parameters require 2 strings)
+	for(int i = 1; i < argc; i++) {
+
+		int parameterLength = (int)strlen(argv[i]);
+
+		if(PARAMETER_CHECK("-a", 2, parameterLength)) {
+			if ((i+1) < argc) {
+				haveBedA = true;
+				bedAFile = argv[i + 1];
+				i++;
+			}
+		}
+		else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
+			if ((i+1) < argc) {
+				haveBedB = true;
+				bedBFile = argv[i + 1];
+				i++;
+			}
+		}
+		else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
+			forceStrand = true;
+		}	
+		else {
+			cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+			showHelp = true;
+		}		
+	}
+
+	// make sure we have both input files
+	if (!haveBedA || !haveBedB) {
+		cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;
+		showHelp = true;
+	}
+
+	if (!showHelp) {
+
+		BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, forceStrand);
+		bg->DetermineBedInput();
+		return 0;
+	}
+	else {
+		ShowHelp();
+	}
+}
+
+void ShowHelp(void) {
+	
+	cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+	
+	cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+	
+	cerr << "Summary: Returns the depth and breadth of coverage of features from A" << endl;
+	cerr << "\t on the intervals in B." << endl << endl;
+	
+	cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl;
+
+	cerr << "Options: " << endl;
+	cerr << "\t-s\t"	 	    << "Force strandedness.  That is, only include hits in A that" << endl;
+	cerr						<< "\t\toverlap B on the same strand." << endl;
+	cerr						<< "\t\t- By default, hits are included without respect to strand." << endl << endl;
+
+	cerr << "Output:  " << endl;
+	cerr << "\t" << " After each entry in B, reports: " << endl; 
+	cerr << "\t  1) The number of features in A that overlapped the B interval." << endl;
+	cerr << "\t  2) The number of bases in B that had non-zero coverage." << endl;
+	cerr << "\t  3) The length of the entry in B." << endl;
+	cerr << "\t  4) The fraction of bases in B that had non-zero coverage." << endl;
+	
+	exit(1);
+
+}
diff --git a/src/closestBed/closestMain.cpp b/src/closestBed/closestMain.cpp
index 6cafec77..c076bd99 100755
--- a/src/closestBed/closestMain.cpp
+++ b/src/closestBed/closestMain.cpp
@@ -98,6 +98,7 @@ int main(int argc, char* argv[]) {
 	
 	if (!showHelp) {
 		BedClosest *bc = new BedClosest(bedAFile, bedBFile, forceStrand, tieMode);
+		delete bc;
 		return 0;
 	}
 	else {
diff --git a/src/subtractBed/subtractBed.cpp b/src/subtractBed/subtractBed.cpp
index c3c82de7..0bc3f7f3 100755
--- a/src/subtractBed/subtractBed.cpp
+++ b/src/subtractBed/subtractBed.cpp
@@ -18,13 +18,15 @@
 */
 BedSubtract::BedSubtract(string &bedAFile, string &bedBFile, float &overlapFraction, bool &forceStrand) {
 
-	this->bedAFile = bedAFile;
-	this->bedBFile = bedBFile;
-	this->overlapFraction = overlapFraction;
-	this->forceStrand = forceStrand;
+	_bedAFile = bedAFile;
+	_bedBFile = bedBFile;
+	_overlapFraction = overlapFraction;
+	_forceStrand = forceStrand;
 
-	this->bedA = new BedFile(bedAFile);
-	this->bedB = new BedFile(bedBFile);
+	_bedA = new BedFile(bedAFile);
+	_bedB = new BedFile(bedBFile);
+	
+	SubtractBed();
 }
 
 
@@ -35,10 +37,10 @@ BedSubtract::~BedSubtract(void) {
 }
 
 
-void BedSubtract::FindOverlaps(BED &a, vector<BED> &hits) {
+void BedSubtract::FindAndSubtractOverlaps(BED &a, vector<BED> &hits) {
 	
 	// find all of the overlaps between a and B.
-	bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, this->forceStrand);
+	_bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _forceStrand);
 	
 	//  is A completely spanned by an entry in B?
 	//  if so, A should not be reported.
@@ -64,7 +66,7 @@ void BedSubtract::FindOverlaps(BED &a, vector<BED> &hits) {
 				numOverlaps++;
 				numConsumedByB++;
 			}
-			else if ( overlap >= this->overlapFraction ) {
+			else if ( overlap >= _overlapFraction ) {
 				numOverlaps++;
 				bOverlaps.push_back(*h);
 			}
@@ -73,7 +75,7 @@ void BedSubtract::FindOverlaps(BED &a, vector<BED> &hits) {
 	
 	if (numOverlaps == 0) {
 		// no overlap found, so just report A as-is.
-		bedA->reportBedNewLine(a);
+		_bedA->reportBedNewLine(a);
 	}
 	else if (numOverlaps == 1) {
 		// one overlap found.  only need to look at the single
@@ -88,26 +90,26 @@ void BedSubtract::FindOverlaps(BED &a, vector<BED> &hits) {
 			// B        ----
 			// Res. ====    ====					
 			if ( (theHit.start > a.start) && (theHit.end < a.end) ) {
-				bedA->reportBedRangeNewLine(a,a.start,theHit.start);
-				bedA->reportBedRangeNewLine(a,theHit.end,a.end);
+				_bedA->reportBedRangeNewLine(a,a.start,theHit.start);
+				_bedA->reportBedRangeNewLine(a,theHit.end,a.end);
 			}
 			// A	++++++++++++
 			// B    ----------
 			// Res.           ==        			
 			else if (theHit.start == a.start) {
-				bedA->reportBedRangeNewLine(a,theHit.end,a.end);
+				_bedA->reportBedRangeNewLine(a,theHit.end,a.end);
 			}	
 			// A	      ++++++++++++
 			// B    ----------
 			// Res.       ====
 			else if (theHit.start < a.start) {
-				bedA->reportBedRangeNewLine(a,theHit.end,a.end);
+				_bedA->reportBedRangeNewLine(a,theHit.end,a.end);
 			}
 			// A	++++++++++++
 			// B           ----------
 			// Res. =======
 			else if (theHit.start > a.start) {
-				bedA->reportBedRangeNewLine(a,a.start,theHit.start);	
+				_bedA->reportBedRangeNewLine(a,a.start,theHit.start);	
 			}
 		}
 	}
@@ -137,7 +139,7 @@ void BedSubtract::FindOverlaps(BED &a, vector<BED> &hits) {
 					}
 					int blockEnd = i + a.start;
 					blockEnd = min(a.end, blockEnd);
-					bedA->reportBedRangeNewLine(a,blockStart,blockEnd);
+					_bedA->reportBedRangeNewLine(a,blockStart,blockEnd);
 				}
 			}
 		}
@@ -146,52 +148,27 @@ void BedSubtract::FindOverlaps(BED &a, vector<BED> &hits) {
 
  
 
-void BedSubtract::SubtractBed(istream &bedInput) {
+void BedSubtract::SubtractBed() {
 
 	// load the "B" bed file into a map so
 	// that we can easily compare "A" to it for overlaps
-	bedB->loadBedFileIntoMap();
+	_bedB->loadBedFileIntoMap();
 
-	string bedLine;                                                                                                                    
+	BED a;                                                                                                                    
 	int lineNum = 0;					// current input line number
-	vector<BED> hits;					// vector of potential hits
-	vector<string> bedFields;			// vector for a BED entry
-	
+	vector<BED> hits;					// vector of potential hits	
 	// reserve some space
 	hits.reserve(100);
-	bedFields.reserve(12);	
-		
-	// process each entry in A
-	while (getline(bedInput, bedLine)) {
-
-		lineNum++;
-		Tokenize(bedLine,bedFields);
-		BED a;
-			
-		// find the overlaps with B if it's a valid BED entry. 
-		if (bedA->parseLine(a, bedFields, lineNum)) {
-			FindOverlaps(a, hits);
-			hits.clear();
-		}
-		
-		// reset for the next input line
-		bedFields.clear();
+	
+	// 	
+	_bedA->Open();
+	while (_bedA->GetNextBed(a, lineNum)) {
+		FindAndSubtractOverlaps(a, hits);
+		hits.clear();
 	}
+	_bedA->Close();
+	
 }
 // END Intersect
 
 
-void BedSubtract::DetermineBedInput() {
-	if (bedA->bedFile != "stdin") {   // process a file
-		ifstream beds(bedA->bedFile.c_str(), ios::in);
-		if ( !beds ) {
-			cerr << "Error: The requested bed file (" << bedA->bedFile << ") could not be opened. Exiting!" << endl;
-			exit (1);
-		}
-		SubtractBed(beds);
-	}
-	else {   						// process stdin
-		SubtractBed(cin);		
-	}
-}
-
diff --git a/src/subtractBed/subtractBed.h b/src/subtractBed/subtractBed.h
index b11d8619..d60374a7 100755
--- a/src/subtractBed/subtractBed.h
+++ b/src/subtractBed/subtractBed.h
@@ -27,29 +27,26 @@ class BedSubtract {
 public:
 
 	// constructor 
-	BedSubtract(string &, string &, float &, bool &);
+	BedSubtract(string &bedAFile, string &bedBFile, float &overlapFraction, bool &forceStrand);
 
 	// destructor
 	~BedSubtract(void);
-	
-	void reportARemainder(BED &, int &, int &);	
-	void reportA(BED &);
 
-	void FindOverlaps(BED &, vector<BED> &); 
-	void SubtractBed(istream &bedInput);
-	void DetermineBedInput();
-	
 private:
 
-	string bedAFile;
-	string bedBFile;
-	float overlapFraction;
-	bool noHit;
-	bool forceStrand;
+	// processing variables
+	string _bedAFile;
+	string _bedBFile;
+	float _overlapFraction;
+	bool _noHit;
+	bool _forceStrand;
 	
-	// instance of a bed file class.
-	BedFile *bedA, *bedB;
-
+	// instances of bed file class.
+	BedFile *_bedA, *_bedB;
+	
+	// methods
+	void FindAndSubtractOverlaps(BED &a, vector<BED> &hits); 
+	void SubtractBed();
 };
 
 #endif /* SUBTRACTBED_H */
diff --git a/src/subtractBed/subtractMain.cpp b/src/subtractBed/subtractMain.cpp
index 3efccc19..8f635115 100755
--- a/src/subtractBed/subtractMain.cpp
+++ b/src/subtractBed/subtractMain.cpp
@@ -99,7 +99,7 @@ int main(int argc, char* argv[]) {
 	if (!showHelp) {
 
 		BedSubtract *bs = new BedSubtract(bedAFile, bedBFile, overlapFraction, forceStrand);
-		bs->DetermineBedInput();
+		delete bs;
 		return 0;
 	}
 	else {
diff --git a/src/utils/bedFile/bedFile.cpp b/src/utils/bedFile/bedFile.cpp
index fc5ec7d7..d0457bf3 100755
--- a/src/utils/bedFile/bedFile.cpp
+++ b/src/utils/bedFile/bedFile.cpp
@@ -112,7 +112,7 @@ BedFile::~BedFile(void) {
 void BedFile::Open(void) {
 	
 	if (bedFile == "stdin") {
-		bedStream = &cin;
+		_bedStream = &cin;
 	}
 	else {
 		size_t foundPos;
@@ -130,7 +130,7 @@ void BedFile::Open(void) {
 				// now set a pointer to the stream so that we
 				// can read the file later on.
 				// Thank God for Josuttis, p. 631!
-				bedStream = new igzstream(bedFile.c_str(), ios::in);
+				_bedStream = new igzstream(bedFile.c_str(), ios::in);
 			}
 		}  
 		// not GZIPPED.
@@ -148,7 +148,7 @@ void BedFile::Open(void) {
 				// now set a pointer to the stream so that we
 				// can read the file later on.
 				// Thank God for Josuttis, p. 631!
-				bedStream = new ifstream(bedFile.c_str(), ios::in);
+				_bedStream = new ifstream(bedFile.c_str(), ios::in);
 			}
 		}
 	}
@@ -157,7 +157,7 @@ void BedFile::Open(void) {
 
 // Close the BED file
 void BedFile::Close(void) {
-	delete bedStream;
+	delete _bedStream;
 }
 
 
@@ -165,13 +165,13 @@ bool BedFile::GetNextBed (BED &bed, int &lineNum) {
 
 	// make sure there are still lines to process.
 	// if so, tokenize, validate and return the BED entry.
-	if (bedStream->good()) {
+	if (_bedStream->good()) {
 		string bedLine;
 		vector<string> bedFields;
 		bedFields.reserve(12);
 		
 		// parse the bedStream pointer
-		getline(*bedStream, bedLine);
+		getline(*_bedStream, bedLine);
 		lineNum++;
 
 		// split into a string vector.
@@ -356,8 +356,8 @@ void BedFile::countHits(const BED &a, bool forceStrand) {
 
 
 void BedFile::setGff (bool gff) {
-	if (gff == true) this->isGff = true;
-	else this->isGff = false;
+	if (gff == true) this->_isGff = true;
+	else this->_isGff = false;
 }
 
 
@@ -577,7 +577,7 @@ bool BedFile::parseGffLine (BED &bed, const vector<string> &lineVector, int line
 */
 	if ( (lineNum > 1) && (lineVector.size() == this->bedType)) {
 		
-		if (this->bedType == 9 && isGff) {
+		if (this->bedType == 9 && _isGff) {
 			bed.chrom = lineVector[0];
 			// substract 1 to force the start to be BED-style
 			bed.start = atoi(lineVector[3].c_str()) - 1;
@@ -608,7 +608,7 @@ bool BedFile::parseGffLine (BED &bed, const vector<string> &lineVector, int line
 	else if ((lineNum == 1) && (lineVector.size() == 9)) {
 		this->bedType = lineVector.size();
 		
-		if (this->bedType == 9 && isGff) {
+		if (this->bedType == 9 && _isGff) {
 			bed.chrom = lineVector[0];
 			// substract 1 to force the start to be BED-style
 			bed.start = atoi(lineVector[3].c_str()) - 1;
@@ -624,7 +624,7 @@ bool BedFile::parseGffLine (BED &bed, const vector<string> &lineVector, int line
 			return true;
 		}
 		else {
-			cout << this->bedType << " " << isGff << endl;
+			cout << this->bedType << " " << _isGff << endl;
 			cerr << "Error: unexpected number of fields at line: " << lineNum << 
 					".  Verify that your files are TAB-delimited and that your GFF file has 9 fields.  Exiting..." << endl;
 			exit(1);
@@ -658,112 +658,32 @@ bool BedFile::parseGffLine (BED &bed, const vector<string> &lineVector, int line
 
 void BedFile::loadBedFileIntoMap() {
 
-	string bedLine;                                                                                                                       
+	BED bedEntry;
 	int lineNum = 0;
-	vector<string> bedFields;			// vector for a BED entry
-	bedFields.reserve(12);				// reserve the max BED size
 	
-	// Case 1: Proper BED File.
-	if ( (this->bedFile != "") && (this->bedFile != "stdin") ) {
-		
-		// open the BED file for reading                                                                                                                                      
-		ifstream bed(bedFile.c_str(), ios::in);
-		if ( !bed ) {
-			cerr << "Error: The requested bed file (" <<bedFile << ") could not be opened. Exiting!" << endl;
-			exit (1);
-		}
-
-		while (getline(bed, bedLine)) {
-					
-			Tokenize(bedLine,bedFields);	// load the fields into the vector
-			lineNum++;
-			
-			BED bedEntry;					// new BED struct for the current line
-			if (parseLine(bedEntry, bedFields, lineNum)) {
-				int bin = getBin(bedEntry.start, bedEntry.end);
-				bedEntry.count = 0;
-				bedEntry.minOverlapStart = INT_MAX;
-
-				//string key = getChromBinKey(bedEntry.chrom, bin);
-				//this->bedMap[key].push_back(bedEntry);	
-				this->bedMap[bedEntry.chrom][bin].push_back(bedEntry);
-			}
-			bedFields.clear();
-		}
-	}
-	else {
-		while (getline(cin, bedLine)) {
-		
-			Tokenize(bedLine,bedFields);	// load the fields into the vector
-			lineNum++;
-			
-			BED bedEntry;					// new BED struct for the current line
-			if (parseLine(bedEntry, bedFields, lineNum)) {
-				int bin = getBin(bedEntry.start, bedEntry.end);
-				bedEntry.count = 0;
-				bedEntry.minOverlapStart = INT_MAX;
-				this->bedMap[bedEntry.chrom][bin].push_back(bedEntry);
-			}
-			bedFields.clear();
-		}
-	}
+	Open();
+	while (this->GetNextBed(bedEntry, lineNum)) {
+		int bin = getBin(bedEntry.start, bedEntry.end);
+		bedEntry.count = 0;
+		bedEntry.minOverlapStart = INT_MAX;
+		bedMap[bedEntry.chrom][bin].push_back(bedEntry);
+	}
+	Close();
 }
 
 
 void BedFile::loadBedFileIntoMapNoBin() {
-
-	string bedLine;                                                                                                                       
+	
+	BED bedEntry;
 	int lineNum = 0;
-	vector<string> bedFields;			// vector for a BED entry
-	bedFields.reserve(12);				// reserve the max BED size
-
-	// Case 1: Proper BED File.
-	if ( (this->bedFile != "") && (this->bedFile != "stdin") ) {
-
-		// open the BED file for reading                                                                                                                                      
-		ifstream bed(bedFile.c_str(), ios::in);
-		if ( !bed ) {
-			cerr << "Error: The requested bed file (" <<bedFile << ") could not be opened. Exiting!" << endl;
-			exit (1);
-		}
-
-		while (getline(bed, bedLine)) {
-		
-			Tokenize(bedLine,bedFields);	// load the fields into the vector
-			lineNum++;
-			
-			BED bedEntry;					// new BED struct for the current line
-			if (parseLine(bedEntry, bedFields, lineNum)) {
-				bedEntry.count = 0;
-				bedEntry.minOverlapStart = INT_MAX;
-				this->bedMapNoBin[bedEntry.chrom].push_back(bedEntry);	
-			}
-			bedFields.clear();
-		}
-	}
-	// Case 2: STDIN.
-	else {
-
-		while (getline(cin, bedLine)) {
-		
-			Tokenize(bedLine,bedFields);	// load the fields into the vector
-			lineNum++;
-
-			BED bedEntry;					// new BED struct for the current line
-			if (parseLine(bedEntry, bedFields, lineNum)) {
-				bedEntry.count = 0;
-				bedEntry.minOverlapStart = INT_MAX;
-				this->bedMapNoBin[bedEntry.chrom].push_back(bedEntry);	
-			}
-			bedFields.clear();
-		}
-	}
-
-	// sort the BED entries for each chromosome
-	// in ascending order of start position
-	for (masterBedMapNoBin::iterator m = this->bedMapNoBin.begin(); m != this->bedMapNoBin.end(); ++m) {
-		sort(m->second.begin(), m->second.end(), sortByStart);		
+	
+	Open();
+	while (this->GetNextBed(bedEntry, lineNum)) {
+		bedEntry.count = 0;
+		bedEntry.minOverlapStart = INT_MAX;
+		bedMapNoBin[bedEntry.chrom].push_back(bedEntry);
 	}
+	Close();
 }
 
 
@@ -776,7 +696,7 @@ void BedFile::loadBedFileIntoMapNoBin() {
 */
 void BedFile::reportBedTab(const BED &bed) {
 	
-	if (isGff == false) {
+	if (_isGff == false) {
 		if (this->bedType == 3) {
 			printf ("%s\t%d\t%d\t", bed.chrom.c_str(), bed.start, bed.end);
 		}
@@ -822,7 +742,7 @@ void BedFile::reportBedTab(const BED &bed) {
 */
 void BedFile::reportBedNewLine(const BED &bed) {
 	
-	if (isGff == false) {
+	if (_isGff == false) {
 		if (this->bedType == 3) {
 			printf ("%s\t%d\t%d\n", bed.chrom.c_str(), bed.start, bed.end);
 		}
@@ -870,7 +790,7 @@ void BedFile::reportBedNewLine(const BED &bed) {
 */
 void BedFile::reportBedRangeTab(const BED &bed, int start, int end) {
 
-	if (isGff == false) {
+	if (_isGff == false) {
 		if (this->bedType == 3) {
 			printf ("%s\t%d\t%d\t", bed.chrom.c_str(), start, end);
 		}
@@ -917,7 +837,7 @@ void BedFile::reportBedRangeTab(const BED &bed, int start, int end) {
 */
 void BedFile::reportBedRangeNewLine(const BED &bed, int start, int end) {
 
-	if (isGff == false) {
+	if (_isGff == false) {
 		if (this->bedType == 3) {
 			printf ("%s\t%d\t%d\n", bed.chrom.c_str(), start, end);
 		}
@@ -959,7 +879,7 @@ void BedFile::reportBedRangeNewLine(const BED &bed, int start, int end) {
 */
 void BedFile::reportNullBedTab() {
 	
-	if (isGff == false) {
+	if (_isGff == false) {
 		if (this->bedType == 3) {
 			printf (".\t-1\t-1\t");
 		}
@@ -990,7 +910,7 @@ void BedFile::reportNullBedTab() {
 */
 void BedFile::reportNullBedNewLine() {
 	
-	if (isGff == false) {
+	if (_isGff == false) {
 		if (this->bedType == 3) {
 			printf (".\t-1\t-1\n");
 		}
diff --git a/src/utils/bedFile/bedFile.h b/src/utils/bedFile/bedFile.h
index e0cc18d8..4873fb69 100755
--- a/src/utils/bedFile/bedFile.h
+++ b/src/utils/bedFile/bedFile.h
@@ -146,10 +146,13 @@ public:
 	// Destructor
 	~BedFile(void);
 	
+	// Open a BED file for reading (creates an istream pointer)
 	void Open(void);
 	
+	// Close an opened BED file.
 	void Close(void);
 	
+	// Get the next BED entry in an opened BED file.
 	bool GetNextBed (BED &bed, int &lineNum);
 
 	// load a BED file into a map keyed by chrom, then bin. value is vector of BEDs
@@ -194,22 +197,24 @@ public:
 	string bedFile;
 	unsigned int bedType;  // 3-6, 12 for BED
 						   // 9 for GFF
-						
-	bool isGff;
 	masterBedMap bedMap;
 	masterBedMapNoBin bedMapNoBin;
-	
+						
 private:
 	
-	// process as line from a BED file
-	bool parseBedLine (BED &bed, const vector<string> &lineVector, int lineNum);
+	// data
+	bool _isGff;
+	istream *_bedStream;
+
+	// methods
 	
+	// process as line from a BED file
+	bool parseBedLine (BED &bed, const vector<string> &lineVector, int lineNum);	
 	// process as line from a GFF file. convert to a BED feature.
 	bool parseGffLine (BED &bed, const vector<string> &lineVector, int lineNum);
-	
 	void setGff (bool isGff);
 	
-	istream *bedStream;
+
 };
 
 #endif /* BEDFILE_H */
diff --git a/src/utils/gzstream/gzstream.h b/src/utils/gzstream/gzstream.h
new file mode 100644
index 00000000..861653f4
--- /dev/null
+++ b/src/utils/gzstream/gzstream.h
@@ -0,0 +1,121 @@
+// ============================================================================
+// gzstream, C++ iostream classes wrapping the zlib compression library.
+// Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+// ============================================================================
+//
+// File          : gzstream.h
+// Revision      : $Revision: 1.5 $
+// Revision_date : $Date: 2002/04/26 23:30:15 $
+// Author(s)     : Deepak Bandyopadhyay, Lutz Kettner
+// 
+// Standard streambuf implementation following Nicolai Josuttis, "The 
+// Standard C++ Library".
+// ============================================================================
+
+#ifndef GZSTREAM_H
+#define GZSTREAM_H 1
+
+// standard C++ with new header file names and std:: namespace
+#include <iostream>
+#include <fstream>
+#include <zlib.h>
+
+#ifdef GZSTREAM_NAMESPACE
+namespace GZSTREAM_NAMESPACE {
+#endif
+
+// ----------------------------------------------------------------------------
+// Internal classes to implement gzstream. See below for user classes.
+// ----------------------------------------------------------------------------
+
+class gzstreambuf : public std::streambuf {
+private:
+    static const int bufferSize = 47+256;    // size of data buff
+    // totals 512 bytes under g++ for igzstream at the end.
+
+    gzFile           file;               // file handle for compressed file
+    char             buffer[bufferSize]; // data buffer
+    char             opened;             // open/close state of stream
+    int              mode;               // I/O mode
+
+    int flush_buffer();
+public:
+    gzstreambuf() : opened(0) {
+        setp( buffer, buffer + (bufferSize-1));
+        setg( buffer + 4,     // beginning of putback area
+              buffer + 4,     // read position
+              buffer + 4);    // end position      
+        // ASSERT: both input & output capabilities will not be used together
+    }
+    int is_open() { return opened; }
+    gzstreambuf* open( const char* name, int open_mode);
+    gzstreambuf* close();
+    ~gzstreambuf() { close(); }
+    
+    virtual int     overflow( int c = EOF);
+    virtual int     underflow();
+    virtual int     sync();
+};
+
+class gzstreambase : virtual public std::ios {
+protected:
+    gzstreambuf buf;
+public:
+    gzstreambase() { init(&buf); }
+    gzstreambase( const char* name, int open_mode);
+    ~gzstreambase();
+    void open( const char* name, int open_mode);
+    void close();
+    gzstreambuf* rdbuf() { return &buf; }
+};
+
+// ----------------------------------------------------------------------------
+// User classes. Use igzstream and ogzstream analogously to ifstream and
+// ofstream respectively. They read and write files based on the gz* 
+// function interface of the zlib. Files are compatible with gzip compression.
+// ----------------------------------------------------------------------------
+
+class igzstream : public gzstreambase, public std::istream {
+public:
+    igzstream() : std::istream( &buf) {} 
+    igzstream( const char* name, int open_mode = std::ios::in)
+        : gzstreambase( name, open_mode), std::istream( &buf) {}  
+    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
+    void open( const char* name, int open_mode = std::ios::in) {
+        gzstreambase::open( name, open_mode);
+    }
+};
+
+class ogzstream : public gzstreambase, public std::ostream {
+public:
+    ogzstream() : std::ostream( &buf) {}
+    ogzstream( const char* name, int mode = std::ios::out)
+        : gzstreambase( name, mode), std::ostream( &buf) {}  
+    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
+    void open( const char* name, int open_mode = std::ios::out) {
+        gzstreambase::open( name, open_mode);
+    }
+};
+
+#ifdef GZSTREAM_NAMESPACE
+} // namespace GZSTREAM_NAMESPACE
+#endif
+
+#endif // GZSTREAM_H
+// ============================================================================
+// EOF //
+
diff --git a/src/windowBed/windowBed.cpp b/src/windowBed/windowBed.cpp
index d6d48b55..99328554 100755
--- a/src/windowBed/windowBed.cpp
+++ b/src/windowBed/windowBed.cpp
@@ -19,20 +19,23 @@
 BedWindow::BedWindow(string &bedAFile, string &bedBFile, int &leftSlop, int &rightSlop, bool &anyHit, bool &noHit, 
 					bool &writeCount, bool &strandWindows, bool &matchOnStrand) {
 
-	this->bedAFile = bedAFile;
-	this->bedBFile = bedBFile;
+	_bedAFile      = bedAFile;
+	_bedBFile      = bedBFile;
 
-	this->leftSlop = leftSlop;
-	this->rightSlop = rightSlop;
+	_leftSlop      = leftSlop;
+	_rightSlop     = rightSlop;
 
-	this->anyHit = anyHit;
-	this->noHit = noHit;
-	this->writeCount = writeCount;
-	this->strandWindows = strandWindows;	
-	this->matchOnStrand = matchOnStrand;
+	_anyHit        = anyHit;
+	_noHit         = noHit;
+	_writeCount    = writeCount;
+	_strandWindows = strandWindows;	
+	_matchOnStrand = matchOnStrand;
 		
-	this->bedA = new BedFile(bedAFile);
-	this->bedB = new BedFile(bedBFile);
+	_bedA          = new BedFile(bedAFile);
+	_bedB          = new BedFile(bedBFile);
+	
+	// do the work
+	WindowIntersectBed();
 }
 
 
@@ -60,23 +63,23 @@ void BedWindow::FindWindowOverlaps(BED &a, vector<BED> &hits) {
 	// If so, 
 	// if "+", then left is left and right is right
 	// if "-", the left is right and right is left.
-	if (this->strandWindows) {
+	if (_strandWindows) {
 		if (a.strand == "+") {
-			if ((a.start - this->leftSlop) > 0) aFudgeStart = a.start - this->leftSlop;
+			if ((a.start - _leftSlop) > 0) aFudgeStart = a.start - _leftSlop;
 			else aFudgeStart = 0;
-			aFudgeEnd = a.end + this->rightSlop;
+			aFudgeEnd = a.end + _rightSlop;
 		}
 		else {
-			if ((a.start - this->rightSlop) > 0) aFudgeStart = a.start - this->rightSlop;
+			if ((a.start - _rightSlop) > 0) aFudgeStart = a.start - _rightSlop;
 			else aFudgeStart = 0;
-			aFudgeEnd = a.end + this->leftSlop;
+			aFudgeEnd = a.end + _leftSlop;
 		}
 	}
 	// If not, add the windows irrespective of strand
 	else {
-		if ((a.start - this->leftSlop) > 0) aFudgeStart = a.start - this->leftSlop;
+		if ((a.start - _leftSlop) > 0) aFudgeStart = a.start - _leftSlop;
 		else aFudgeStart = 0;
-		aFudgeEnd = a.end + this->rightSlop;
+		aFudgeEnd = a.end + _rightSlop;
 	}
 	
 	
@@ -84,7 +87,7 @@ void BedWindow::FindWindowOverlaps(BED &a, vector<BED> &hits) {
 		Now report the hits (if any) based on the window around a.
 	*/
 	// get the hits in B for the A feature
-	bedB->FindOverlapsPerBin(a.chrom, aFudgeStart, aFudgeEnd, a.strand, hits, this->matchOnStrand);
+	_bedB->FindOverlapsPerBin(a.chrom, aFudgeStart, aFudgeEnd, a.strand, hits, _matchOnStrand);
 
 	int numOverlaps = 0;
 	
@@ -102,70 +105,42 @@ void BedWindow::FindWindowOverlaps(BED &a, vector<BED> &hits) {
 			// is there enough overlap (default ~ 1bp)
 			if ( ((float) overlapBases / (float) aLength) > 0 ) { 
 				numOverlaps++;	
-				if (!anyHit && !noHit && !writeCount) {			
-					bedA->reportBedTab(a);
-					bedB->reportBedNewLine(*h);
+				if (_anyHit == false && _noHit == false && _writeCount == false) {			
+					_bedA->reportBedTab(a);
+					_bedB->reportBedNewLine(*h);
 				}
 			}
 		}
 	}
-	if (anyHit && (numOverlaps >= 1)) {
-		bedA->reportBedNewLine(a);	}
-	else if (writeCount) {
-		bedA->reportBedTab(a); printf("\t%d\n", numOverlaps);
+	if (_anyHit == true && (numOverlaps >= 1)) {
+		_bedA->reportBedNewLine(a);	}
+	else if (_writeCount == true) {
+		_bedA->reportBedTab(a); printf("\t%d\n", numOverlaps);
 	}
-	else if (noHit && (numOverlaps == 0)) {
-		bedA->reportBedNewLine(a);
+	else if (_noHit == true && (numOverlaps == 0)) {
+		_bedA->reportBedNewLine(a);
 	}
 }
 
  
-void BedWindow::WindowIntersectBed(istream &bedInput) {
+void BedWindow::WindowIntersectBed() {
 
 	// load the "B" bed file into a map so
 	// that we can easily compare "A" to it for overlaps
-	bedB->loadBedFileIntoMap();
+	_bedB->loadBedFileIntoMap();
 
-	string bedLine;                                                                                                                    
+	BED a;                                                                                                                    
 	int lineNum = 0;					// current input line number
 	vector<BED> hits;					// vector of potential hits
-	vector<string> bedFields;			// vector for a BED entry
-	
-	// reserve some space
 	hits.reserve(100);
-	bedFields.reserve(12);	
 
 	// process each entry in A
-	while (getline(bedInput, bedLine)) {
-
-		lineNum++;
-		Tokenize(bedLine,bedFields);
-		BED a;
-		
-		// find the overlaps between "a" and "B" if "a" is a valid BED entry. 
-		if (bedA->parseLine(a, bedFields, lineNum)) {
-			FindWindowOverlaps(a, hits);
-			hits.clear();
-		}
-		
-		// reset for the next input line
-		bedFields.clear();
+	_bedA->Open();
+	while (_bedA->GetNextBed(a, lineNum)) {
+		FindWindowOverlaps(a, hits);
+		hits.clear();
 	}
+	_bedA->Close();
 }
-// END WindowIntersectBed
 
 
-void BedWindow::DetermineBedInput() {
-	if (bedA->bedFile != "stdin") {   // process a file
-		ifstream beds(bedA->bedFile.c_str(), ios::in);
-		if ( !beds ) {
-			cerr << "Error: The requested bed file (" << bedA->bedFile << ") could not be opened. Exiting!" << endl;
-			exit (1);
-		}
-		WindowIntersectBed(beds);
-	}
-	else {   						// process stdin
-		WindowIntersectBed(cin);		
-	}
-}
-
diff --git a/src/windowBed/windowBed.h b/src/windowBed/windowBed.h
index ec3700dc..9aa28a84 100755
--- a/src/windowBed/windowBed.h
+++ b/src/windowBed/windowBed.h
@@ -27,32 +27,30 @@ class BedWindow {
 public:
 
 	// constructor 
-	BedWindow(string &, string &, int &, int &, bool &, bool &, bool &, bool &, bool &);
+	BedWindow(string &bedAFile, string &bedBFile, int &leftSlop, int &rightSlop, bool &anyHit, bool &noHit, 
+			  bool &writeCount, bool &strandWindows, bool &matchOnStrand);
 
 	// destructor
 	~BedWindow(void);
 		
-	void reportA(const BED &);
-	void reportB(const BED &);
-
-	void WindowIntersectBed(istream &bedInput);
-	void FindWindowOverlaps(BED &, vector<BED> &);
-	void DetermineBedInput();
-		
 private:
 
-	string bedAFile;
-	string bedBFile;
-	bool anyHit;
-	bool writeCount;
-	int leftSlop;
-	int rightSlop;
-	bool noHit;
-	bool strandWindows;
-	bool matchOnStrand;
+	string _bedAFile;
+	string _bedBFile;
+	bool _anyHit;
+	bool _writeCount;
+	int _leftSlop;
+	int _rightSlop;
+	bool _noHit;
+	bool _strandWindows;
+	bool _matchOnStrand;
 
 	// instance of a bed file class.
-	BedFile *bedA, *bedB;
+	BedFile *_bedA, *_bedB;
+	
+	// methods
+	void WindowIntersectBed();
+	void FindWindowOverlaps(BED &a, vector<BED> &hits);
 
 };
 #endif /* WINDOWBED_H */
diff --git a/src/windowBed/windowMain.cpp b/src/windowBed/windowMain.cpp
index bc8f1e12..2fdc2f70 100755
--- a/src/windowBed/windowMain.cpp
+++ b/src/windowBed/windowMain.cpp
@@ -162,7 +162,7 @@ int main(int argc, char* argv[]) {
 	
 	if (!showHelp) {
 		BedWindow *bi = new BedWindow(bedAFile, bedBFile, leftSlop, rightSlop, anyHit, noHit, writeCount, strandWindows, matchOnStrand);
-		bi->DetermineBedInput();
+		delete bi;
 		return 0;
 	}
 	else {
-- 
GitLab