From e59f85d44a6735566f5c3598c24733e44a106a6d Mon Sep 17 00:00:00 2001 From: Jake Biesinger <jake.biesinger@gmail.com> Date: Tue, 25 Oct 2011 00:59:50 -0700 Subject: [PATCH] Added a signed distance option (-D) to closestBed --- src/closestBed/closestBed.cpp | 62 ++++++++++++++++++++++++++++------ src/closestBed/closestBed.h | 3 +- src/closestBed/closestMain.cpp | 12 ++++++- 3 files changed, 65 insertions(+), 12 deletions(-) diff --git a/src/closestBed/closestBed.cpp b/src/closestBed/closestBed.cpp index dcd44a1d..02f4786c 100644 --- a/src/closestBed/closestBed.cpp +++ b/src/closestBed/closestBed.cpp @@ -22,13 +22,14 @@ const int SLOPGROWTH = 2048000; Constructor */ BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand, - string &tieMode, bool reportDistance, bool ignoreOverlaps) + string &tieMode, bool reportDistance, bool signDistance, bool ignoreOverlaps) : _bedAFile(bedAFile) , _bedBFile(bedBFile) , _tieMode(tieMode) , _sameStrand(sameStrand) , _diffStrand(diffStrand) , _reportDistance(reportDistance) + , _signDistance(signDistance) , _ignoreOverlaps(ignoreOverlaps) { _bedA = new BedFile(_bedAFile); @@ -55,8 +56,8 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) { CHRPOS aFudgeEnd; int numOverlaps = 0; vector<BED> closestB; - CHRPOS minDistance = INT_MAX; - vector<CHRPOS> distances; + int32_t minDistance = INT_MAX; + vector<int32_t> distances; // is there at least one feature in B on the same chrom // as the current A feature? @@ -102,30 +103,71 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) { } // the hit is to the "left" of A else if (h->end <= a.start) { - if ((a.start - h->end) < minDistance) { - minDistance = a.start - h->end; + if ((a.start - h->end) < abs(minDistance)) { + if (_signDistance) { + if (a.strand == "+") { + minDistance = h->end - a.start; + } + else { + minDistance = a.start - h->end; + } + } + else { + minDistance = a.start - h->end; + } closestB.clear(); closestB.push_back(*h); distances.clear(); distances.push_back(minDistance); } - else if ((a.start - h->end) == minDistance) { + else if ((a.start - h->end) == abs(minDistance)) { + if (_signDistance) { + if (a.strand == "+") { + minDistance = h->end - a.start; + } + else { + minDistance = a.start - h->end; + } + } + else { + minDistance = h->end - a.start; + } closestB.push_back(*h); distances.push_back(minDistance); } } // the hit is to the "right" of A else if (h->start >= a.end) { - if ((h->start - a.end) < minDistance) { - minDistance = h->start - a.end; - + if ((h->start - a.end) < abs(minDistance)) { + if (_signDistance) { + if (a.strand == "+") { + minDistance = h->start - a.end; + } + else { + minDistance = a.end - h->start; + } + } + else { + minDistance = h->start - a.end; + } closestB.clear(); closestB.push_back(*h); distances.clear(); distances.push_back(minDistance); } - else if ((h->start - a.end) == minDistance) { + else if ((h->start - a.end) == abs(minDistance)) { + if (_signDistance) { + if (a.strand == "+") { + minDistance = h->start - a.end; + } + else { + minDistance = a.end - h->start; + } + } + else { + minDistance = h->start - a.end; + } closestB.push_back(*h); distances.push_back(minDistance); } diff --git a/src/closestBed/closestBed.h b/src/closestBed/closestBed.h index 0ee74d25..d8dae3b8 100644 --- a/src/closestBed/closestBed.h +++ b/src/closestBed/closestBed.h @@ -29,7 +29,7 @@ public: // constructor BedClosest(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand, string &tieMode, - bool reportDistance, bool ignoreOverlaps); + bool reportDistance, bool signDistance, bool ignoreOverlaps); // destructor ~BedClosest(void); @@ -46,6 +46,7 @@ private: bool _sameStrand; bool _diffStrand; bool _reportDistance; + bool _signDistance; bool _ignoreOverlaps; BedFile *_bedA, *_bedB; diff --git a/src/closestBed/closestMain.cpp b/src/closestBed/closestMain.cpp index d2f4fce7..a0da8880 100644 --- a/src/closestBed/closestMain.cpp +++ b/src/closestBed/closestMain.cpp @@ -40,6 +40,7 @@ int main(int argc, char* argv[]) { bool diffStrand = false; bool ignoreOverlaps = false; bool reportDistance = false; + bool signDistance = false; // check to see if we should print out some help @@ -84,6 +85,10 @@ int main(int argc, char* argv[]) { else if (PARAMETER_CHECK("-d", 2, parameterLength)) { reportDistance = true; } + else if (PARAMETER_CHECK("-D", 2, parameterLength)) { + reportDistance = true; + signDistance = true; + } else if (PARAMETER_CHECK("-io", 3, parameterLength)) { ignoreOverlaps = true; } @@ -118,7 +123,7 @@ int main(int argc, char* argv[]) { } if (!showHelp) { - BedClosest *bc = new BedClosest(bedAFile, bedBFile, sameStrand, diffStrand, tieMode, reportDistance, ignoreOverlaps); + BedClosest *bc = new BedClosest(bedAFile, bedBFile, sameStrand, diffStrand, tieMode, reportDistance, signDistance, ignoreOverlaps); delete bc; return 0; } @@ -151,6 +156,11 @@ void ShowHelp(void) { cerr << "\t-d\t" << "In addition to the closest feature in B, " << endl; cerr << "\t\treport its distance to A as an extra column." << endl; cerr << "\t\t- The reported distance for overlapping features will be 0." << endl << endl; + + cerr << "\t-D\t" << "Like -d, report the closest feature in B, and its distance to A" << endl; + cerr << "\t\tas an extra column. Unlike -d, use negative distances to report" << endl; + cerr << "\t\tif feature B is upstream of A. Note: Upstream B features are" << endl; + cerr << "\"to the right\" of features on the - strand" << endl; << endl; cerr << "\t-no\t" << "Ignore features in B that overlap A. That is, we want close, but " << endl; cerr << "\t\tnot touching features only." << endl << endl; -- GitLab