Skip to content
Snippets Groups Projects
Commit de52bca7 authored by Aaron's avatar Aaron
Browse files

Improvements to closestBed. Thanks to suggestions by Brent Pedersen, Ryan...

Improvements to closestBed.  Thanks to suggestions by Brent Pedersen, Ryan Layer, Assaf Gordon, Dan Webster.

1. All overlapping features are reported by closestBed -t all.
2. The "-io" option will prevent overlapping features from being reported.
parent da54ba34
No related branches found
No related tags found
No related merge requests found
......@@ -21,20 +21,20 @@ const int SLOPGROWTH = 2048000;
/*
Constructor
*/
BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool forceStrand, string &tieMode, bool reportDistance) {
_bedAFile = bedAFile;
_bedBFile = bedBFile;
_forceStrand = forceStrand;
_tieMode = tieMode;
_reportDistance = reportDistance;
_bedA = new BedFile(bedAFile);
_bedB = new BedFile(bedBFile);
BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool forceStrand, string &tieMode, bool reportDistance, bool ignoreOverlaps)
: _bedAFile(bedAFile)
, _bedBFile(bedBFile)
, _tieMode(tieMode)
, _forceStrand(forceStrand)
, _reportDistance(reportDistance)
, _ignoreOverlaps(ignoreOverlaps)
{
_bedA = new BedFile(_bedAFile);
_bedB = new BedFile(_bedBFile);
FindClosestBed();
}
/*
Destructor
*/
......@@ -53,7 +53,6 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
CHRPOS aFudgeEnd;
int numOverlaps = 0;
vector<BED> closestB;
float maxOverlap = 0;
CHRPOS minDistance = INT_MAX;
vector<CHRPOS> distances;
......@@ -83,36 +82,24 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
vector<BED>::const_iterator hitsEnd = hits.end();
for (; h != hitsEnd; ++h) {
numOverlaps++;
// do the actual features overlap?
int s = max(a.start, h->start);
int e = min(a.end, h->end);
int overlapBases = (e - s); // the number of overlapping bases b/w a and b
int aLength = (a.end - a.start); // the length of a in b.p.
// there is overlap
if (s < e) {
// is there enough overlap (default ~ 1bp)
float overlap = (float) overlapBases / (float) aLength;
if ( overlap > 0 ) {
// is this hit the closest?
if (overlap > maxOverlap) {
maxOverlap = overlap;
closestB.clear();
closestB.push_back(*h);
distances.clear();
distances.push_back(0);
}
else if (overlap == maxOverlap) {
closestB.push_back(*h);
distances.push_back(0);
}
}
// make sure we allow overlapping features.
if ((overlapBases > 0) && (_ignoreOverlaps == true))
continue;
else
numOverlaps++;
// there is overlap. make sure we allow overlapping features ()
if (overlapBases > 0) {
closestB.push_back(*h);
distances.push_back(0);
}
// the hit is to the "left" of A
else if (h->end <= a.start){
else if (h->end <= a.start) {
if ((a.start - h->end) < minDistance) {
minDistance = a.start - h->end;
......@@ -127,7 +114,7 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
}
}
// the hit is to the "right" of A
else {
else if (h->start >= a.end) {
if ((h->start - a.end) < minDistance) {
minDistance = h->start - a.end;
......@@ -161,7 +148,6 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
// report the closest feature(s) in B to the current A feature.
// obey the user's reporting request (_tieMode)
if (numOverlaps > 0) {
if (closestB.size() == 1 || _tieMode == "first") {
_bedA->reportBedTab(a);
if (_reportDistance == true) {
......
......@@ -27,7 +27,9 @@ class BedClosest {
public:
// constructor
BedClosest(string &bedAFile, string &bedBFile, bool forceStrand, string &tieMode, bool reportDistance);
BedClosest(string &bedAFile, string &bedBFile,
bool forceStrand, string &tieMode,
bool reportDistance, bool ignoreOverlaps);
// destructor
~BedClosest(void);
......@@ -43,6 +45,7 @@ private:
string _tieMode;
bool _forceStrand;
bool _reportDistance;
bool _ignoreOverlaps;
BedFile *_bedA, *_bedB;
......
......@@ -37,6 +37,7 @@ int main(int argc, char* argv[]) {
bool haveBedB = false;
bool haveTieMode = false;
bool forceStrand = false;
bool ignoreOverlaps = false;
bool reportDistance = false;
......@@ -79,6 +80,9 @@ int main(int argc, char* argv[]) {
else if (PARAMETER_CHECK("-d", 2, parameterLength)) {
reportDistance = true;
}
else if (PARAMETER_CHECK("-io", 3, parameterLength)) {
ignoreOverlaps = true;
}
else if (PARAMETER_CHECK("-t", 2, parameterLength)) {
if ((i+1) < argc) {
haveTieMode = true;
......@@ -86,6 +90,10 @@ int main(int argc, char* argv[]) {
i++;
}
}
else {
cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true;
}
}
// make sure we have both input files
......@@ -101,7 +109,7 @@ int main(int argc, char* argv[]) {
}
if (!showHelp) {
BedClosest *bc = new BedClosest(bedAFile, bedBFile, forceStrand, tieMode, reportDistance);
BedClosest *bc = new BedClosest(bedAFile, bedBFile, forceStrand, tieMode, reportDistance, ignoreOverlaps);
delete bc;
return 0;
}
......@@ -131,14 +139,16 @@ void ShowHelp(void) {
cerr << "\t\treport its distance to A as an extra column." << endl;
cerr << "\t\t- The reported distance for overlapping features will be 0." << endl << endl;
cerr << "\t-no\t" << "Ignore features in B that overlap A. That is, we want close, but " << endl;
cerr << "\t\tnot touching features only." << endl << endl;
cerr << "\t-t\t" << "How ties for closest feature are handled. This occurs when two" << endl;
cerr << "\t\tfeatures in B have exactly the same overlap with A." << endl;
cerr << "\t\tfeatures in B have exactly the same \"closeness\" with A." << endl;
cerr << "\t\tBy default, all such features in B are reported." << endl;
cerr << "\t\tHere are all the options:" << endl;
cerr << "\t\t- \"all\" Report all ties (default)." << endl;
cerr << "\t\t- \"all\" Report all ties (default)." << endl;
cerr << "\t\t- \"first\" Report the first tie that occurred in the B file." << endl;
cerr << "\t\t- \"last\" Report the last tie that occurred in the B file." << endl << endl;
cerr << "\t\t- \"last\" Report the last tie that occurred in the B file." << endl << endl;
cerr << "Notes: " << endl;
cerr << "\tReports \"none\" for chrom and \"-1\" for all other fields when a feature" << endl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment