Skip to content
Snippets Groups Projects
Commit 90464038 authored by Aaron's avatar Aaron
Browse files

Added feature to get distance to closest feature (-d). Thanks to Erik Arner.

parent 7b2ca585
No related branches found
No related tags found
No related merge requests found
...@@ -21,15 +21,16 @@ const int SLOPGROWTH = 2048000; ...@@ -21,15 +21,16 @@ const int SLOPGROWTH = 2048000;
/* /*
Constructor Constructor
*/ */
BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool &forceStrand, string &tieMode) { BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool forceStrand, string &tieMode, bool reportDistance) {
_bedAFile = bedAFile; _bedAFile = bedAFile;
_bedBFile = bedBFile; _bedBFile = bedBFile;
_forceStrand = forceStrand; _forceStrand = forceStrand;
_tieMode = tieMode; _tieMode = tieMode;
_reportDistance = reportDistance;
_bedA = new BedFile(bedAFile); _bedA = new BedFile(bedAFile);
_bedB = new BedFile(bedBFile); _bedB = new BedFile(bedBFile);
FindClosestBed(); FindClosestBed();
} }
...@@ -54,6 +55,7 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) { ...@@ -54,6 +55,7 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
vector<BED> closestB; vector<BED> closestB;
float maxOverlap = 0; float maxOverlap = 0;
CHRPOS minDistance = INT_MAX; CHRPOS minDistance = INT_MAX;
vector<CHRPOS> distances;
if(_bedB->bedMap.find(a.chrom) != _bedB->bedMap.end()) { if(_bedB->bedMap.find(a.chrom) != _bedB->bedMap.end()) {
...@@ -67,7 +69,7 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) { ...@@ -67,7 +69,7 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
else else
aFudgeStart = 0; aFudgeStart = 0;
if ((a.start + slop) < (2 * MAXSLOP)) if ((static_cast<int>(a.start) + slop) < (2 * MAXSLOP))
aFudgeEnd = a.end + slop; aFudgeEnd = a.end + slop;
else else
aFudgeEnd = 2 * MAXSLOP; aFudgeEnd = 2 * MAXSLOP;
...@@ -87,34 +89,55 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) { ...@@ -87,34 +89,55 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
int overlapBases = (e - s); // the number of overlapping bases b/w a and b int overlapBases = (e - s); // the number of overlapping bases b/w a and b
int aLength = (a.end - a.start); // the length of a in b.p. int aLength = (a.end - a.start); // the length of a in b.p.
// there is overlap
if (s < e) { if (s < e) {
// is there enough overlap (default ~ 1bp) // is there enough overlap (default ~ 1bp)
float overlap = (float) overlapBases / (float) aLength; float overlap = (float) overlapBases / (float) aLength;
if ( overlap > 0 ) { if ( overlap > 0 ) {
// is this hit the closest? // is this hit the closest?
if (overlap > maxOverlap) { if (overlap > maxOverlap) {
maxOverlap = overlap;
closestB.clear(); closestB.clear();
closestB.push_back(*h); closestB.push_back(*h);
maxOverlap = overlap; distances.clear();
distances.push_back(0);
} }
else if (overlap == maxOverlap) closestB.push_back(*h); else if (overlap == maxOverlap) {
closestB.push_back(*h);
distances.push_back(0);
}
} }
} }
// the hit is to the "left" of A
else if (h->end < a.start){ else if (h->end < a.start){
if ((a.start - h->end) < minDistance) { if ((a.start - h->end) < minDistance) {
minDistance = a.start - h->end;
closestB.clear(); closestB.clear();
closestB.push_back(*h); closestB.push_back(*h);
minDistance = a.start - h->end; distances.clear();
distances.push_back(minDistance);
} }
else if ((a.start - h->end) == minDistance) closestB.push_back(*h); else if ((a.start - h->end) == minDistance) {
closestB.push_back(*h);
distances.push_back(minDistance);
}
} }
// the hit is to the "right" of A
else { else {
if ((h->start - a.end) < minDistance) { if ((h->start - a.end) < minDistance) {
minDistance = h->start - a.end;
closestB.clear(); closestB.clear();
closestB.push_back(*h); closestB.push_back(*h);
minDistance = h->start - a.end; distances.clear();
distances.push_back(minDistance);
} }
else if ((h->start - a.end) == minDistance) closestB.push_back(*h); else if ((h->start - a.end) == minDistance) {
closestB.push_back(*h);
distances.push_back(minDistance);
}
} }
} }
// if no overlaps were found, we'll widen the range // if no overlaps were found, we'll widen the range
...@@ -122,31 +145,51 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) { ...@@ -122,31 +145,51 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
slop += SLOPGROWTH; slop += SLOPGROWTH;
} }
} }
// there is no feature in B on the same chromosome as A
else { else {
_bedA->reportBedTab(a); _bedA->reportBedTab(a);
_bedB->reportNullBedNewLine(); if (_reportDistance == true) {
_bedB->reportNullBedTab();
cout << -1 << endl;
}
else
_bedB->reportNullBedNewLine();
} }
// report the closest feature(s) in B to the current A feature.
// obey the user's reporting request (_tieMode)
if (numOverlaps > 0) { if (numOverlaps > 0) {
if (closestB.size() == 1) { if (closestB.size() == 1 || _tieMode == "first") {
_bedA->reportBedTab(a); _bedA->reportBedTab(a);
_bedB->reportBedNewLine(closestB[0]); if (_reportDistance == true) {
_bedB->reportBedTab(closestB[0]);
cout << distances[0] << endl;
}
else
_bedB->reportBedNewLine(closestB[0]);
} }
else { else {
if (_tieMode == "all") { if (_tieMode == "all") {
size_t i = 0;
for (vector<BED>::iterator b = closestB.begin(); b != closestB.end(); ++b) { for (vector<BED>::iterator b = closestB.begin(); b != closestB.end(); ++b) {
_bedA->reportBedTab(a); _bedA->reportBedTab(a);
_bedB->reportBedNewLine(*b); if (_reportDistance == true) {
_bedB->reportBedTab(*b);
cout << distances[i++] <<endl;
}
else
_bedB->reportBedNewLine(*b);
} }
} }
else if (_tieMode == "first") {
_bedA->reportBedTab(a);
_bedB->reportBedNewLine(closestB[0]);
}
else if (_tieMode == "last") { else if (_tieMode == "last") {
_bedA->reportBedTab(a); _bedA->reportBedTab(a);
_bedB->reportBedNewLine(closestB[closestB.size()-1]); if (_reportDistance == true) {
_bedB->reportBedTab(closestB[closestB.size()-1]);
cout << distances[distances.size() - 1]<<endl;
}
else
_bedB->reportBedNewLine(closestB[closestB.size()-1]);
} }
} }
} }
......
...@@ -27,7 +27,7 @@ class BedClosest { ...@@ -27,7 +27,7 @@ class BedClosest {
public: public:
// constructor // constructor
BedClosest(string &bedAFile, string &bedBFile, bool &forceStrand, string &tieMode); BedClosest(string &bedAFile, string &bedBFile, bool forceStrand, string &tieMode, bool reportDistance);
// destructor // destructor
~BedClosest(void); ~BedClosest(void);
...@@ -41,7 +41,8 @@ private: ...@@ -41,7 +41,8 @@ private:
string _bedAFile; string _bedAFile;
string _bedBFile; string _bedBFile;
string _tieMode; string _tieMode;
bool _forceStrand; bool _forceStrand;
bool _reportDistance;
BedFile *_bedA, *_bedB; BedFile *_bedA, *_bedB;
......
...@@ -33,10 +33,11 @@ int main(int argc, char* argv[]) { ...@@ -33,10 +33,11 @@ int main(int argc, char* argv[]) {
string bedBFile; string bedBFile;
string tieMode = "all"; string tieMode = "all";
bool haveBedA = false; bool haveBedA = false;
bool haveBedB = false; bool haveBedB = false;
bool haveTieMode = false; bool haveTieMode = false;
bool forceStrand = false; bool forceStrand = false;
bool reportDistance = false;
// check to see if we should print out some help // check to see if we should print out some help
...@@ -75,6 +76,9 @@ int main(int argc, char* argv[]) { ...@@ -75,6 +76,9 @@ int main(int argc, char* argv[]) {
else if (PARAMETER_CHECK("-s", 2, parameterLength)) { else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
forceStrand = true; forceStrand = true;
} }
else if (PARAMETER_CHECK("-d", 2, parameterLength)) {
reportDistance = true;
}
else if (PARAMETER_CHECK("-t", 2, parameterLength)) { else if (PARAMETER_CHECK("-t", 2, parameterLength)) {
if ((i+1) < argc) { if ((i+1) < argc) {
haveTieMode = true; haveTieMode = true;
...@@ -97,7 +101,7 @@ int main(int argc, char* argv[]) { ...@@ -97,7 +101,7 @@ int main(int argc, char* argv[]) {
} }
if (!showHelp) { if (!showHelp) {
BedClosest *bc = new BedClosest(bedAFile, bedBFile, forceStrand, tieMode); BedClosest *bc = new BedClosest(bedAFile, bedBFile, forceStrand, tieMode, reportDistance);
delete bc; delete bc;
return 0; return 0;
} }
...@@ -110,7 +114,8 @@ void ShowHelp(void) { ...@@ -110,7 +114,8 @@ void ShowHelp(void) {
cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; cerr << "Authors: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
cerr << "\t Erik Arner, Riken" << endl << endl;
cerr << "Summary: For each feature in A, finds the closest " << endl; cerr << "Summary: For each feature in A, finds the closest " << endl;
cerr << "\t feature (upstream or downstream) in B." << endl << endl; cerr << "\t feature (upstream or downstream) in B." << endl << endl;
...@@ -121,6 +126,11 @@ void ShowHelp(void) { ...@@ -121,6 +126,11 @@ void ShowHelp(void) {
cerr << "\t-s\t" << "Force strandedness. That is, find the closest feature in B" << endl; cerr << "\t-s\t" << "Force strandedness. That is, find the closest feature in B" << endl;
cerr << "\t\tthat overlaps A on the same strand." << endl; cerr << "\t\tthat overlaps A on the same strand." << endl;
cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl; cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl;
cerr << "\t-d\t" << "In addition to the closest feature in B, " << endl;
cerr << "\t\treport its distance to A as an extra column." << endl;
cerr << "\t\t- The reported distance for overlapping features will be 0." << endl << endl;
cerr << "\t-t\t" << "How ties for closest feature are handled. This occurs when two" << endl; cerr << "\t-t\t" << "How ties for closest feature are handled. This occurs when two" << endl;
cerr << "\t\tfeatures in B have exactly the same overlap with A." << endl; cerr << "\t\tfeatures in B have exactly the same overlap with A." << endl;
...@@ -137,5 +147,4 @@ void ShowHelp(void) { ...@@ -137,5 +147,4 @@ void ShowHelp(void) {
// end the program here // end the program here
exit(1); exit(1);
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment