From 4f95177029075e3422970506f75d7bda8e2cd726 Mon Sep 17 00:00:00 2001
From: Aaron <aaronquinlan@gmail.com>
Date: Fri, 16 Sep 2011 13:06:20 -0400
Subject: [PATCH] Added new -sorted option to intersectBed.  No memory and
 filthy fast.

---
 Makefile                                  |   2 +-
 src/chromsweep/Makefile                   |  46 ----
 src/chromsweep/chromsweepMain.cpp         | 277 ----------------------
 src/intersectBed/Makefile                 |   7 +-
 src/intersectBed/intersectBed.cpp         | 107 +++++----
 src/intersectBed/intersectBed.h           |  11 +-
 src/intersectBed/intersectMain.cpp        |   8 +-
 src/utils/chromsweep/Makefile             |  32 +++
 src/{ => utils}/chromsweep/chromsweep.cpp |  21 +-
 src/{ => utils}/chromsweep/chromsweep.h   |  14 +-
 10 files changed, 124 insertions(+), 401 deletions(-)
 delete mode 100644 src/chromsweep/Makefile
 delete mode 100644 src/chromsweep/chromsweepMain.cpp
 create mode 100644 src/utils/chromsweep/Makefile
 rename src/{ => utils}/chromsweep/chromsweep.cpp (91%)
 rename src/{ => utils}/chromsweep/chromsweep.h (89%)

diff --git a/Makefile b/Makefile
index d1d4924f..2209af58 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,6 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
 		  $(SRC_DIR)/bedToBam \
 		  $(SRC_DIR)/bedToIgv \
 		  $(SRC_DIR)/bed12ToBed6 \
-		  $(SRC_DIR)/chromsweep \
 		  $(SRC_DIR)/closestBed \
 		  $(SRC_DIR)/complementBed \
 		  $(SRC_DIR)/coverageBed \
@@ -45,6 +44,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
 UTIL_SUBDIRS =	$(SRC_DIR)/utils/lineFileUtilities \
 				$(SRC_DIR)/utils/bedFile \
 				$(SRC_DIR)/utils/bedGraphFile \
+				$(SRC_DIR)/utils/chromsweep \
 				$(SRC_DIR)/utils/gzstream \
 				$(SRC_DIR)/utils/fileType \
 				$(SRC_DIR)/utils/bedFilePE \
diff --git a/src/chromsweep/Makefile b/src/chromsweep/Makefile
deleted file mode 100644
index 4cd8f20f..00000000
--- a/src/chromsweep/Makefile
+++ /dev/null
@@ -1,46 +0,0 @@
-UTILITIES_DIR = ../utils/
-OBJ_DIR = ../../obj/
-BIN_DIR = ../../bin/
-
-# -------------------
-# define our includes
-# -------------------
-INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
-           -I$(UTILITIES_DIR)/lineFileUtilities/ \
-           -I$(UTILITIES_DIR)/version/ \
-           -I$(UTILITIES_DIR)/gzstream/ \
-           -I$(UTILITIES_DIR)/fileType/
-
-# ----------------------------------
-# define our source and object files
-# ----------------------------------
-SOURCES= chromsweepMain.cpp chromsweep.cpp
-OBJECTS= $(SOURCES:.cpp=.o)
-_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
-EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
-BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
-PROGRAM= chromsweep
-
-all: $(PROGRAM)
-
-.PHONY: all
-
-$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
-	@echo "  * linking $(PROGRAM)"
-	@$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
-
-$(BUILT_OBJECTS): $(SOURCES)
-	@echo "  * compiling" $(*F).cpp
-	@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
-
-$(EXT_OBJECTS):
-	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
-	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
-	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
-	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
-		
-clean:
-	@echo "Cleaning up."
-	@rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
-
-.PHONY: clean
diff --git a/src/chromsweep/chromsweepMain.cpp b/src/chromsweep/chromsweepMain.cpp
deleted file mode 100644
index 90ed322c..00000000
--- a/src/chromsweep/chromsweepMain.cpp
+++ /dev/null
@@ -1,277 +0,0 @@
-/*****************************************************************************
-  chromsweepMain.cpp
-
-  (c) 2009 - Aaron Quinlan
-  Hall Laboratory
-  Department of Biochemistry and Molecular Genetics
-  University of Virginia
-  aaronquinlan@gmail.com
-
-  Licenced under the GNU General Public License 2.0 license.
-******************************************************************************/
-#include "chromsweep.h"
-#include "version.h"
-
-using namespace std;
-
-// define our program name
-#define PROGRAM_NAME "chromsweep"
-
-
-// define our parameter checking macro
-#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
-
-// function declarations
-void ShowHelp(void);
-
-int main(int argc, char* argv[]) {
-
-    // our configuration variables
-    bool showHelp = false;
-
-    // input files
-    string bedAFile;
-    string bedBFile;
-
-    // input arguments
-    float overlapFraction = 1E-9;
-
-    bool haveBedA           = false;
-    bool haveBedB           = false;
-    bool noHit              = false;
-    bool anyHit             = false;
-    bool writeA             = false;
-    bool writeB             = false;
-    bool writeCount         = false;
-    bool writeOverlap       = false;
-    bool writeAllOverlap    = false;
-    bool haveFraction       = false;
-    bool reciprocalFraction = false;
-    bool forceStrand        = false;
-    bool obeySplits         = false;
-    bool inputIsBam         = false;
-    bool outputIsBam        = true;
-
-    // check to see if we should print out some help
-    if(argc <= 1) showHelp = true;
-
-    for(int i = 1; i < argc; i++) {
-        int parameterLength = (int)strlen(argv[i]);
-
-        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
-        (PARAMETER_CHECK("--help", 5, parameterLength))) {
-            showHelp = true;
-        }
-    }
-
-    if(showHelp) ShowHelp();
-
-    // do some parsing (all of these parameters require 2 strings)
-    for(int i = 1; i < argc; i++) {
-
-        int parameterLength = (int)strlen(argv[i]);
-
-        if(PARAMETER_CHECK("-a", 2, parameterLength)) {
-            if ((i+1) < argc) {
-                haveBedA = true;
-                outputIsBam = false;
-                bedAFile = argv[i + 1];
-                i++;
-            }
-        }
-        else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {
-            if ((i+1) < argc) {
-                haveBedA = true;
-                inputIsBam = true;
-                bedAFile = argv[i + 1];
-                i++;
-            }
-        }
-        else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
-            if ((i+1) < argc) {
-                haveBedB = true;
-                bedBFile = argv[i + 1];
-                i++;
-            }
-        }
-        else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {
-            outputIsBam = false;
-        }
-        else if(PARAMETER_CHECK("-u", 2, parameterLength)) {
-            anyHit = true;
-        }
-        else if(PARAMETER_CHECK("-f", 2, parameterLength)) {
-            if ((i+1) < argc) {
-                haveFraction = true;
-                overlapFraction = atof(argv[i + 1]);
-                i++;
-            }
-        }
-        else if(PARAMETER_CHECK("-wa", 3, parameterLength)) {
-            writeA = true;
-        }
-        else if(PARAMETER_CHECK("-wb", 3, parameterLength)) {
-            writeB = true;
-        }
-        else if(PARAMETER_CHECK("-wo", 3, parameterLength)) {
-            writeOverlap = true;
-        }
-        else if(PARAMETER_CHECK("-wao", 4, parameterLength)) {
-            writeAllOverlap = true;
-            writeOverlap = true;
-        }
-        else if(PARAMETER_CHECK("-c", 2, parameterLength)) {
-            writeCount = true;
-        }
-        else if(PARAMETER_CHECK("-r", 2, parameterLength)) {
-            reciprocalFraction = true;
-        }
-        else if (PARAMETER_CHECK("-v", 2, parameterLength)) {
-            noHit = true;
-        }
-        else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
-            forceStrand = true;
-        }
-        else if (PARAMETER_CHECK("-split", 6, parameterLength)) {
-            obeySplits = true;
-        }
-        else {
-            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
-            showHelp = true;
-        }
-    }
-
-    // make sure we have both input files
-    if (!haveBedA || !haveBedB) {
-        cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;
-        showHelp = true;
-    }
-
-    if (anyHit && noHit) {
-        cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -v, not both." << endl << "*****" << endl;
-        showHelp = true;
-    }
-
-    if (writeB && writeCount) {
-        cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -c, not both." << endl << "*****" << endl;
-        showHelp = true;
-    }
-
-    if (writeCount && writeOverlap) {
-        cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -wo, not both." << endl << "*****" << endl;
-        showHelp = true;
-    }
-
-    if (writeA && writeOverlap) {
-        cerr << endl << "*****" << endl << "*****ERROR: Request either -wa OR -wo, not both." << endl << "*****" << endl;
-        showHelp = true;
-    }
-
-    if (writeB && writeOverlap) {
-        cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -wo, not both." << endl << "*****" << endl;
-        showHelp = true;
-    }
-
-    if (reciprocalFraction && !haveFraction) {
-        cerr << endl << "*****" << endl << "*****ERROR: If using -r, you need to define -f." << endl << "*****" << endl;
-        showHelp = true;
-    }
-
-    if (anyHit && writeCount) {
-        cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -c, not both." << endl << "*****" << endl;
-        showHelp = true;
-    }
-
-    if (anyHit && writeB) {
-        cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -wb, not both." << endl << "*****" << endl;
-        showHelp = true;
-    }
-
-    if (anyHit && writeOverlap) {
-        cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -wo, not both." << endl << "*****" << endl;
-        showHelp = true;
-    }
-
-
-    if (!showHelp) {
-
-        ChromSweep *sweep = new ChromSweep(bedAFile, bedBFile, anyHit, writeA, writeB, writeOverlap,
-                                            writeAllOverlap, overlapFraction, noHit, writeCount, forceStrand,
-                                            reciprocalFraction, obeySplits, inputIsBam, outputIsBam);
-        
-        pair<BED, vector<BED> > hit_set;
-        while (sweep->Next(hit_set)) {
-            sweep->ReportQuery(hit_set.first);
-            cout << hit_set.second.size() << "\n";
-        }
-        delete sweep;
-        return 0;
-    }
-    else {
-        ShowHelp();
-    }
-}
-
-void ShowHelp(void) {
-
-    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
-
-    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
-
-    cerr << "Summary: Report overlaps between two feature files." << endl << endl;
-
-    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;
-
-    cerr << "Options: " << endl;
-
-    cerr << "\t-abam\t"         << "The A input file is in BAM format.  Output will be BAM as well." << endl << endl;
-
-    cerr << "\t-bed\t"          << "When using BAM input (-abam), write output as BED. The default" << endl;
-    cerr                        << "\t\tis to write output in BAM when using -abam." << endl << endl;
-
-    cerr << "\t-wa\t"           << "Write the original entry in A for each overlap." << endl << endl;
-
-    cerr << "\t-wb\t"           << "Write the original entry in B for each overlap." << endl;
-    cerr                        << "\t\t- Useful for knowing _what_ A overlaps. Restricted by -f and -r." << endl << endl;
-
-    cerr << "\t-wo\t"           << "Write the original A and B entries plus the number of base" << endl;
-    cerr                        << "\t\tpairs of overlap between the two features." << endl;
-    cerr                        << "\t\t- Overlaps restricted by -f and -r." << endl;
-    cerr                        << "\t\t  Only A features with overlap are reported." << endl << endl;
-
-    cerr << "\t-wao\t"          << "Write the original A and B entries plus the number of base" << endl;
-    cerr                        << "\t\tpairs of overlap between the two features." << endl;
-    cerr                        << "\t\t- Overlapping features restricted by -f and -r." << endl;
-    cerr                        << "\t\t  However, A features w/o overlap are also reported" << endl;
-    cerr                        << "\t\t  with a NULL B feature and overlap = 0." << endl << endl;
-
-    cerr << "\t-u\t"            << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;
-    cerr                        << "\t\t- In other words, just report the fact >=1 hit was found." << endl;
-    cerr                        << "\t\t- Overlaps restricted by -f and -r." << endl << endl;
-
-    cerr << "\t-c\t"            << "For each entry in A, report the number of overlaps with B." << endl;
-    cerr                        << "\t\t- Reports 0 for A entries that have no overlap with B." << endl;
-    cerr                        << "\t\t- Overlaps restricted by -f and -r." << endl << endl;
-
-    cerr << "\t-v\t"            << "Only report those entries in A that have _no overlaps_ with B." << endl;
-    cerr                        << "\t\t- Similar to \"grep -v\" (an homage)." << endl << endl;
-
-    cerr << "\t-f\t"            << "Minimum overlap required as a fraction of A." << endl;
-    cerr                        << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl;
-    cerr                        << "\t\t- FLOAT (e.g. 0.50)" << endl << endl;
-
-    cerr << "\t-r\t"            << "Require that the fraction overlap be reciprocal for A and B." << endl;
-    cerr                        << "\t\t- In other words, if -f is 0.90 and -r is used, this requires" << endl;
-    cerr                        << "\t\t  that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl;
-
-    cerr << "\t-s\t"            << "Force strandedness.  That is, only report hits in B that" << endl;
-    cerr                        << "\t\toverlap A on the same strand." << endl;
-    cerr                        << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl;
-
-    cerr << "\t-split\t"        << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl << endl;
-
-
-    // end the program here
-    exit(1);
-
-}
diff --git a/src/intersectBed/Makefile b/src/intersectBed/Makefile
index 41b4e25e..50ca671a 100644
--- a/src/intersectBed/Makefile
+++ b/src/intersectBed/Makefile
@@ -12,13 +12,15 @@ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
            -I$(UTILITIES_DIR)/lineFileUtilities/ \
            -I$(UTILITIES_DIR)/fileType/ \
            -I$(UTILITIES_DIR)/BamTools/include \
-           -I$(UTILITIES_DIR)/BamTools-Ancillary
+           -I$(UTILITIES_DIR)/BamTools-Ancillary \
+           -I$(UTILITIES_DIR)/chromsweep \
+
 # ----------------------------------
 # define our source and object files
 # ----------------------------------
 SOURCES= intersectMain.cpp intersectBed.cpp
 OBJECTS= $(SOURCES:.cpp=.o)
-_EXT_OBJECTS=bedFile.o lineFileUtilities.o BamAncillary.o gzstream.o fileType.o
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o BamAncillary.o gzstream.o fileType.o chromsweep.o
 EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
 BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
 PROGRAM= intersectBed
@@ -42,6 +44,7 @@ $(EXT_OBJECTS):
 	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/
 	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
 	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+	@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/chromsweep/
 		
 clean:
 	@echo "Cleaning up."
diff --git a/src/intersectBed/intersectBed.cpp b/src/intersectBed/intersectBed.cpp
index 072c8c27..8f2dd4f3 100644
--- a/src/intersectBed/intersectBed.cpp
+++ b/src/intersectBed/intersectBed.cpp
@@ -15,7 +15,7 @@
 /************************************
 Helper functions
 ************************************/
-bool BedIntersect::processHits(const BED &a, const vector<BED> &hits, bool printable) {
+bool BedIntersect::processHits(const BED &a, const vector<BED> &hits) {
 
     // how many overlaps are there b/w the bed and the set of hits?
     int s, e, overlapBases;
@@ -37,7 +37,7 @@ bool BedIntersect::processHits(const BED &a, const vector<BED> &hits, bool print
             if (_reciprocal == false) {
                 hitsFound = true;
                 numOverlaps++;
-                if (printable == true)
+                if (_printable == true)
                     ReportOverlapDetail(overlapBases, a, *h, s, e);
             }
             // we require there to be sufficient __reciprocal__ overlap
@@ -47,7 +47,7 @@ bool BedIntersect::processHits(const BED &a, const vector<BED> &hits, bool print
                 if (bOverlap >= _overlapFraction) {
                     hitsFound = true;
                     numOverlaps++;
-                    if (printable == true)
+                    if (_printable == true)
                         ReportOverlapDetail(overlapBases, a, *h, s, e);
                 }
             }
@@ -66,7 +66,8 @@ bool BedIntersect::processHits(const BED &a, const vector<BED> &hits, bool print
 BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,
                            bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
                            float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand,
-                           bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam) {
+                           bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam,
+                           bool sortedInput) {
 
     _bedAFile            = bedAFile;
     _bedBFile            = bedBFile;
@@ -85,11 +86,13 @@ BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,
     _bamInput            = bamInput;
     _bamOutput           = bamOutput;
     _isUncompressedBam   = isUncompressedBam;
+    _sortedInput         = sortedInput;
 
-    // create new BED file objects for A and B
-    _bedA = new BedFile(bedAFile);
-    _bedB = new BedFile(bedBFile);
-
+    // should we print each overlap, or does the user want summary information?
+    _printable = true;
+    if (_anyHit || _noHit || _writeCount)
+        _printable = false;
+        
     if (_bamInput == false)
         IntersectBed();
     else
@@ -108,14 +111,9 @@ bool BedIntersect::FindOverlaps(const BED &a, vector<BED> &hits) {
 
     bool hitsFound = false;
 
-    // should we print each overlap, or does the user want summary information?
-    bool printable = true;
-    if (_anyHit || _noHit || _writeCount)
-        printable = false;
-
     // collect and report the sufficient hits
     _bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand);
-    hitsFound = processHits(a, hits, printable);
+    hitsFound = processHits(a, hits);
 
     return hitsFound;
 }
@@ -190,42 +188,61 @@ bool BedIntersect::FindOneOrMoreOverlap(const BED &a) {
 
 void BedIntersect::IntersectBed() {
 
-    // load the "B" file into a map in order to
-    // compare each entry in A to it in search of overlaps.
-    _bedB->loadBedFileIntoMap();
-
-    int lineNum = 0;
-    vector<BED> hits;
-    hits.reserve(100);
-    BED a, nullBed;
-    BedLineStatus bedStatus;
-
-    // open the "A" file, process each BED entry and searh for overlaps.
-    _bedA->Open();
-    while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {
-        if (bedStatus == BED_VALID) {
-            // treat the BED as a single "block"
-            if (_obeySplits == false) {
-                FindOverlaps(a, hits);
-                hits.clear();
-                a = nullBed;
-            }
-            // split the BED12 into blocks and look for overlaps in each discrete block
-            else {
-                bedVector bedBlocks;  // vec to store the discrete BED "blocks"
-                splitBedIntoBlocks(a, lineNum, bedBlocks);
-
-                vector<BED>::const_iterator bedItr  = bedBlocks.begin();
-                vector<BED>::const_iterator bedEnd  = bedBlocks.end();
-                for (; bedItr != bedEnd; ++bedItr) {
-                    FindOverlaps(*bedItr, hits);
+    // create new BED file objects for A and B
+    _bedA = new BedFile(_bedAFile);
+    _bedB = new BedFile(_bedBFile);
+
+    if (_sortedInput == false) {
+        // load the "B" file into a map in order to
+        // compare each entry in A to it in search of overlaps.
+        _bedB->loadBedFileIntoMap();
+
+        int lineNum = 0;
+        vector<BED> hits;
+        hits.reserve(100);
+        BED a, nullBed;
+        BedLineStatus bedStatus;
+
+        // open the "A" file, process each BED entry and searh for overlaps.
+        _bedA->Open();
+        while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {
+            if (bedStatus == BED_VALID) {
+                // treat the BED as a single "block"
+                if (_obeySplits == false) {
+                    FindOverlaps(a, hits);
                     hits.clear();
+                    a = nullBed;
+                }
+                // split the BED12 into blocks and look for overlaps in each discrete block
+                else {
+                    bedVector bedBlocks;  // vec to store the discrete BED "blocks"
+                    splitBedIntoBlocks(a, lineNum, bedBlocks);
+
+                    vector<BED>::const_iterator bedItr  = bedBlocks.begin();
+                    vector<BED>::const_iterator bedEnd  = bedBlocks.end();
+                    for (; bedItr != bedEnd; ++bedItr) {
+                        FindOverlaps(*bedItr, hits);
+                        hits.clear();
+                    }
+                    a = nullBed;
                 }
-                a = nullBed;
             }
         }
+        _bedA->Close();
+    }
+    else {
+        // use the chromsweep algorithm to detect overlaps on the fly.
+        ChromSweep sweep = ChromSweep(_bedA, _bedB, _anyHit, _writeA, _writeB, _writeOverlap,
+                                      _writeAllOverlap, _overlapFraction, _noHit, _writeCount, _sameStrand, _diffStrand,
+                                      _reciprocal, _obeySplits, _bamInput, _bamOutput);
+        
+        // hit_set.first  = current entry (a) from A
+        // hit_set.second = list of hits in B for this a
+        pair<BED, vector<BED> > hit_set;
+        while (sweep.Next(hit_set)) {
+            processHits(hit_set.first, hit_set.second);
+        }
     }
-    _bedA->Close();
 }
 
 
diff --git a/src/intersectBed/intersectBed.h b/src/intersectBed/intersectBed.h
index e4106934..1a8d74a3 100644
--- a/src/intersectBed/intersectBed.h
+++ b/src/intersectBed/intersectBed.h
@@ -13,7 +13,7 @@
 #define INTERSECTBED_H
 
 #include "bedFile.h"
-
+#include "chromsweep.h"
 #include "api/BamReader.h"
 #include "api/BamWriter.h"
 #include "api/BamAux.h"
@@ -37,7 +37,8 @@ public:
     BedIntersect(string bedAFile, string bedBFile, bool anyHit,
                                bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
                                float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand,
-                               bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam);
+                               bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam,
+                               bool sortedInput);
 
     // destructor
     ~BedIntersect(void);
@@ -67,7 +68,9 @@ private:
     bool  _bamInput;
     bool  _bamOutput;
     bool  _isUncompressedBam;
-
+    bool  _sortedInput;
+    bool  _printable;
+    
     // instance of a bed file class.
     BedFile *_bedA, *_bedB;
 
@@ -80,7 +83,7 @@ private:
 
     void IntersectBam(string bamFile);
 
-    bool processHits(const BED &a, const vector<BED> &hits, bool printable);
+    bool processHits(const BED &a, const vector<BED> &hits);
 
     bool FindOverlaps(const BED &a, vector<BED> &hits);
 
diff --git a/src/intersectBed/intersectMain.cpp b/src/intersectBed/intersectMain.cpp
index 91d0dd44..a8d393f7 100644
--- a/src/intersectBed/intersectMain.cpp
+++ b/src/intersectBed/intersectMain.cpp
@@ -53,6 +53,7 @@ int main(int argc, char* argv[]) {
     bool inputIsBam         = false;
     bool outputIsBam        = true;
     bool uncompressedBam    = false;
+    bool sortedInput        = false;
     // check to see if we should print out some help
     if(argc <= 1) showHelp = true;
 
@@ -142,6 +143,9 @@ int main(int argc, char* argv[]) {
         else if(PARAMETER_CHECK("-ubam", 5, parameterLength)) {
             uncompressedBam = true;
         }
+        else if(PARAMETER_CHECK("-sorted", 7, parameterLength)) {
+            sortedInput = true;
+        }        
         else {
             cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
             showHelp = true;
@@ -208,7 +212,7 @@ int main(int argc, char* argv[]) {
 
         BedIntersect *bi = new BedIntersect(bedAFile, bedBFile, anyHit, writeA, writeB, writeOverlap,
                                             writeAllOverlap, overlapFraction, noHit, writeCount, sameStrand, diffStrand,
-                                            reciprocalFraction, obeySplits, inputIsBam, outputIsBam, uncompressedBam);
+                                            reciprocalFraction, obeySplits, inputIsBam, outputIsBam, uncompressedBam, sortedInput);
         delete bi;
         return 0;
     }
@@ -281,6 +285,8 @@ void ShowHelp(void) {
 
     cerr << "\t-split\t"        << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl << endl;
 
+    cerr << "\t-sorted\t"        << "Use the \"chromsweep\" algorithm for sorted (-k1,1 -k2,2n) input" << endl << endl;
+
 
     // end the program here
     exit(1);
diff --git a/src/utils/chromsweep/Makefile b/src/utils/chromsweep/Makefile
new file mode 100644
index 00000000..005e928d
--- /dev/null
+++ b/src/utils/chromsweep/Makefile
@@ -0,0 +1,32 @@
+OBJ_DIR = ../../../obj/
+BIN_DIR = ../../../bin/
+UTILITIES_DIR = ../../utils/
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= chromsweep.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=lineFileUtilities.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+
+$(BUILT_OBJECTS): $(SOURCES)
+	@echo "  * compiling" $(*F).cpp
+	@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+	@$(MAKE) --no-print-directory -C $(INCLUDES)
+
+clean:
+	@echo "Cleaning up."
+	@rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
diff --git a/src/chromsweep/chromsweep.cpp b/src/utils/chromsweep/chromsweep.cpp
similarity index 91%
rename from src/chromsweep/chromsweep.cpp
rename to src/utils/chromsweep/chromsweep.cpp
index 70c7cdc8..e4788e31 100644
--- a/src/chromsweep/chromsweep.cpp
+++ b/src/utils/chromsweep/chromsweep.cpp
@@ -12,7 +12,6 @@
 #include "lineFileUtilities.h"
 #include "chromsweep.h"
 #include <queue>
-#include <set>
 
 bool after(const BED &a, const BED &b);
 void report_hits(const BED &curr_qy, const vector<BED> &hits);
@@ -22,13 +21,13 @@ vector<BED> scan_cache(const BED &curr_qy, BedLineStatus qy_status, const vector
 /*
     Constructor
 */
-ChromSweep::ChromSweep(string bedAFile, string bedBFile, bool anyHit,
+ChromSweep::ChromSweep(BedFile *bedA, BedFile *bedB, bool anyHit,
                            bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
-                           float overlapFraction, bool noHit, bool writeCount, bool forceStrand,
+                           float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand,
                            bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput) {
 
-    _bedAFile            = bedAFile;
-    _bedBFile            = bedBFile;
+    _bedA                = bedA;
+    _bedB                = bedB;
     _anyHit              = anyHit;
     _noHit               = noHit;
     _writeA              = writeA;
@@ -37,20 +36,12 @@ ChromSweep::ChromSweep(string bedAFile, string bedBFile, bool anyHit,
     _writeAllOverlap     = writeAllOverlap;
     _writeCount          = writeCount;
     _overlapFraction     = overlapFraction;
-    _forceStrand         = forceStrand;
+    _sameStrand          = sameStrand;
+    _diffStrand          = diffStrand;
     _reciprocal          = reciprocal;
     _obeySplits          = obeySplits;
     _bamInput            = bamInput;
     _bamOutput           = bamOutput;
-
-    if (_anyHit || _noHit || _writeCount)
-        _printable = false;
-    else
-        _printable = true;
-
-    // create new BED file objects for A and B
-    _bedA = new BedFile(bedAFile);
-    _bedB = new BedFile(bedBFile);
     
     // prime the results pump.
     _qy_lineNum = 0;
diff --git a/src/chromsweep/chromsweep.h b/src/utils/chromsweep/chromsweep.h
similarity index 89%
rename from src/chromsweep/chromsweep.h
rename to src/utils/chromsweep/chromsweep.h
index 1fb58405..5a18ab54 100644
--- a/src/chromsweep/chromsweep.h
+++ b/src/utils/chromsweep/chromsweep.h
@@ -13,13 +13,6 @@
 #define CHROMSWEEP_H
 
 #include "bedFile.h"
-// #include "BamReader.h"
-// #include "BamWriter.h"
-// #include "BamAncillary.h"
-// #include "BamAux.h"
-// using namespace BamTools;
-
-
 #include <vector>
 #include <queue>
 #include <iostream>
@@ -34,9 +27,9 @@ class ChromSweep {
 public:
 
     // constructor
-    ChromSweep(string bedAFile, string bedBFile, bool anyHit,
+    ChromSweep(BedFile *bedA, BedFile *bedB, bool anyHit,
                                bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
-                               float overlapFraction, bool noHit, bool writeCount, bool forceStrand,
+                               float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand,
                                bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput);
 
     // destructor
@@ -59,7 +52,8 @@ private:
     bool  _writeOverlap;
     bool  _writeAllOverlap;
 
-    bool  _forceStrand;
+    bool  _sameStrand;
+    bool  _diffStrand;
     bool  _reciprocal;
     float _overlapFraction;
 
-- 
GitLab