diff --git a/src/closestBed/closestBed.cpp b/src/closestBed/closestBed.cpp index dbf03e3c2d39885295b45058640ed91724b4cd70..fae465e0298c23d083c685110f0fec816b88ec3e 100644 --- a/src/closestBed/closestBed.cpp +++ b/src/closestBed/closestBed.cpp @@ -21,15 +21,16 @@ const int SLOPGROWTH = 2048000; /* Constructor */ -BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool &forceStrand, string &tieMode) { +BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool forceStrand, string &tieMode, bool reportDistance) { - _bedAFile = bedAFile; - _bedBFile = bedBFile; - _forceStrand = forceStrand; - _tieMode = tieMode; + _bedAFile = bedAFile; + _bedBFile = bedBFile; + _forceStrand = forceStrand; + _tieMode = tieMode; + _reportDistance = reportDistance; - _bedA = new BedFile(bedAFile); - _bedB = new BedFile(bedBFile); + _bedA = new BedFile(bedAFile); + _bedB = new BedFile(bedBFile); FindClosestBed(); } @@ -54,6 +55,7 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) { vector<BED> closestB; float maxOverlap = 0; CHRPOS minDistance = INT_MAX; + vector<CHRPOS> distances; if(_bedB->bedMap.find(a.chrom) != _bedB->bedMap.end()) { @@ -67,7 +69,7 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) { else aFudgeStart = 0; - if ((a.start + slop) < (2 * MAXSLOP)) + if ((static_cast<int>(a.start) + slop) < (2 * MAXSLOP)) aFudgeEnd = a.end + slop; else aFudgeEnd = 2 * MAXSLOP; @@ -87,34 +89,55 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) { int overlapBases = (e - s); // the number of overlapping bases b/w a and b int aLength = (a.end - a.start); // the length of a in b.p. + // there is overlap if (s < e) { // is there enough overlap (default ~ 1bp) float overlap = (float) overlapBases / (float) aLength; if ( overlap > 0 ) { // is this hit the closest? if (overlap > maxOverlap) { + maxOverlap = overlap; + closestB.clear(); closestB.push_back(*h); - maxOverlap = overlap; + distances.clear(); + distances.push_back(0); } - else if (overlap == maxOverlap) closestB.push_back(*h); + else if (overlap == maxOverlap) { + closestB.push_back(*h); + distances.push_back(0); + } } } + // the hit is to the "left" of A else if (h->end < a.start){ if ((a.start - h->end) < minDistance) { + minDistance = a.start - h->end; + closestB.clear(); closestB.push_back(*h); - minDistance = a.start - h->end; + distances.clear(); + distances.push_back(minDistance); } - else if ((a.start - h->end) == minDistance) closestB.push_back(*h); + else if ((a.start - h->end) == minDistance) { + closestB.push_back(*h); + distances.push_back(minDistance); + } } + // the hit is to the "right" of A else { if ((h->start - a.end) < minDistance) { + minDistance = h->start - a.end; + closestB.clear(); closestB.push_back(*h); - minDistance = h->start - a.end; + distances.clear(); + distances.push_back(minDistance); } - else if ((h->start - a.end) == minDistance) closestB.push_back(*h); + else if ((h->start - a.end) == minDistance) { + closestB.push_back(*h); + distances.push_back(minDistance); + } } } // if no overlaps were found, we'll widen the range @@ -122,31 +145,51 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) { slop += SLOPGROWTH; } } + // there is no feature in B on the same chromosome as A else { _bedA->reportBedTab(a); - _bedB->reportNullBedNewLine(); + if (_reportDistance == true) { + _bedB->reportNullBedTab(); + cout << -1 << endl; + } + else + _bedB->reportNullBedNewLine(); } + // report the closest feature(s) in B to the current A feature. + // obey the user's reporting request (_tieMode) if (numOverlaps > 0) { - if (closestB.size() == 1) { - _bedA->reportBedTab(a); - _bedB->reportBedNewLine(closestB[0]); + if (closestB.size() == 1 || _tieMode == "first") { + _bedA->reportBedTab(a); + if (_reportDistance == true) { + _bedB->reportBedTab(closestB[0]); + cout << distances[0] << endl; + } + else + _bedB->reportBedNewLine(closestB[0]); } else { if (_tieMode == "all") { + size_t i = 0; for (vector<BED>::iterator b = closestB.begin(); b != closestB.end(); ++b) { - _bedA->reportBedTab(a); - _bedB->reportBedNewLine(*b); + _bedA->reportBedTab(a); + if (_reportDistance == true) { + _bedB->reportBedTab(*b); + cout << distances[i++] <<endl; + } + else + _bedB->reportBedNewLine(*b); } } - else if (_tieMode == "first") { - _bedA->reportBedTab(a); - _bedB->reportBedNewLine(closestB[0]); - } else if (_tieMode == "last") { - _bedA->reportBedTab(a); - _bedB->reportBedNewLine(closestB[closestB.size()-1]); + _bedA->reportBedTab(a); + if (_reportDistance == true) { + _bedB->reportBedTab(closestB[closestB.size()-1]); + cout << distances[distances.size() - 1]<<endl; + } + else + _bedB->reportBedNewLine(closestB[closestB.size()-1]); } } } diff --git a/src/closestBed/closestBed.h b/src/closestBed/closestBed.h index 01acdc9aaeba2d76012eeca98693d72ea66eb26f..07de1b41a5fefc322720af9b0d940e2cd2dc6893 100644 --- a/src/closestBed/closestBed.h +++ b/src/closestBed/closestBed.h @@ -27,7 +27,7 @@ class BedClosest { public: // constructor - BedClosest(string &bedAFile, string &bedBFile, bool &forceStrand, string &tieMode); + BedClosest(string &bedAFile, string &bedBFile, bool forceStrand, string &tieMode, bool reportDistance); // destructor ~BedClosest(void); @@ -41,7 +41,8 @@ private: string _bedAFile; string _bedBFile; string _tieMode; - bool _forceStrand; + bool _forceStrand; + bool _reportDistance; BedFile *_bedA, *_bedB; diff --git a/src/closestBed/closestMain.cpp b/src/closestBed/closestMain.cpp index 210c3a4e85dfa5e30f418e1b80bcb3595ecf242b..23b432466312e7971a5b069c49b77f2e5c0e7ce2 100644 --- a/src/closestBed/closestMain.cpp +++ b/src/closestBed/closestMain.cpp @@ -33,10 +33,11 @@ int main(int argc, char* argv[]) { string bedBFile; string tieMode = "all"; - bool haveBedA = false; - bool haveBedB = false; - bool haveTieMode = false; - bool forceStrand = false; + bool haveBedA = false; + bool haveBedB = false; + bool haveTieMode = false; + bool forceStrand = false; + bool reportDistance = false; // check to see if we should print out some help @@ -75,6 +76,9 @@ int main(int argc, char* argv[]) { else if (PARAMETER_CHECK("-s", 2, parameterLength)) { forceStrand = true; } + else if (PARAMETER_CHECK("-d", 2, parameterLength)) { + reportDistance = true; + } else if (PARAMETER_CHECK("-t", 2, parameterLength)) { if ((i+1) < argc) { haveTieMode = true; @@ -97,7 +101,7 @@ int main(int argc, char* argv[]) { } if (!showHelp) { - BedClosest *bc = new BedClosest(bedAFile, bedBFile, forceStrand, tieMode); + BedClosest *bc = new BedClosest(bedAFile, bedBFile, forceStrand, tieMode, reportDistance); delete bc; return 0; } @@ -110,7 +114,8 @@ void ShowHelp(void) { cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + cerr << "Authors: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + cerr << "\t Erik Arner, Riken" << endl << endl; cerr << "Summary: For each feature in A, finds the closest " << endl; cerr << "\t feature (upstream or downstream) in B." << endl << endl; @@ -121,6 +126,11 @@ void ShowHelp(void) { cerr << "\t-s\t" << "Force strandedness. That is, find the closest feature in B" << endl; cerr << "\t\tthat overlaps A on the same strand." << endl; cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl; + + cerr << "\t-d\t" << "In addition to the closest feature in B, " << endl; + cerr << "\t\treport its distance to A as an extra column." << endl; + cerr << "\t\t- The reported distance for overlapping features will be 0." << endl << endl; + cerr << "\t-t\t" << "How ties for closest feature are handled. This occurs when two" << endl; cerr << "\t\tfeatures in B have exactly the same overlap with A." << endl; @@ -137,5 +147,4 @@ void ShowHelp(void) { // end the program here exit(1); - }