Skip to content
Snippets Groups Projects
Commit 45306c09 authored by Aaron's avatar Aaron
Browse files

Created an "either" mode for pairToPair.

parent 74b0da44
No related branches found
No related tags found
No related merge requests found
......@@ -17,13 +17,15 @@
Constructor
*/
PairToPair::PairToPair(string &bedAFilePE, string &bedBFilePE, float &overlapFraction,
string searchType, bool ignoreStrand) {
string searchType, bool ignoreStrand, int slop, bool strandedSlop) {
_bedAFilePE = bedAFilePE;
_bedBFilePE = bedBFilePE;
_overlapFraction = overlapFraction;
_searchType = searchType;
_ignoreStrand = ignoreStrand;
_slop = slop;
_strandedSlop = strandedSlop;
_bedA = new BedFilePE(bedAFilePE);
_bedB = new BedFilePE(bedBFilePE);
......@@ -58,7 +60,7 @@ void PairToPair::IntersectPairs() {
while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) {
if (bedStatus == BED_VALID) {
// identify overlaps b/w the pairs
FindOverlaps(a, hitsA1B1, hitsA1B2, hitsA2B1, hitsA2B2, _searchType);
FindOverlaps(a, hitsA1B1, hitsA1B2, hitsA2B1, hitsA2B2);
// reset space for next BEDPE
hitsA1B1.clear(); hitsA1B2.clear(); hitsA2B1.clear(); hitsA2B2.clear();
......@@ -72,7 +74,7 @@ void PairToPair::IntersectPairs() {
void PairToPair::FindOverlaps(const BEDPE &a, vector<BEDCOV> &hitsA1B1, vector<BEDCOV> &hitsA1B2,
vector<BEDCOV> &hitsA2B1, vector<BEDCOV> &hitsA2B2, string type) {
vector<BEDCOV> &hitsA2B1, vector<BEDCOV> &hitsA2B2) {
// list of hits on each end of BEDPE
// that exceed the requested overlap fraction
......@@ -87,70 +89,92 @@ void PairToPair::FindOverlaps(const BEDPE &a, vector<BEDCOV> &hitsA1B1, vector<B
int numOverlapsA1B2 = 0;
int numOverlapsA2B1 = 0;
int numOverlapsA2B2 = 0;
// add the appropriate slop to the starts and ends
CHRPOS start1 = a.start1;
CHRPOS end1 = a.end1;
CHRPOS start2 = a.start2;
CHRPOS end2 = a.end2;
if (_strandedSlop == true) {
if (a.strand1 == "+")
end1 += _slop;
else
start1 -= _slop;
if (a.strand2 == "+")
end2 += _slop;
else
start2 -= _slop;
}
else {
start1 -= _slop;
start2 -= _slop;
end1 += _slop;
end2 += _slop;
}
// Find the _potential_ hits between each end of A and B
_bedB->FindOverlapsPerBin(1, a.chrom1, a.start1, a.end1, a.strand1, hitsA1B1, !(_ignoreStrand)); // hits between A1 to B1
_bedB->FindOverlapsPerBin(1, a.chrom2, a.start2, a.end2, a.strand2, hitsA2B1, !(_ignoreStrand)); // hits between A2 to B1
_bedB->FindOverlapsPerBin(2, a.chrom1, a.start1, a.end1, a.strand1, hitsA1B2, !(_ignoreStrand)); // hits between A1 to B2
_bedB->FindOverlapsPerBin(2, a.chrom2, a.start2, a.end2, a.strand2, hitsA2B2, !(_ignoreStrand)); // hits between A2 to B2
_bedB->FindOverlapsPerBin(1, a.chrom1, start1, end1, a.strand1, hitsA1B1, !(_ignoreStrand)); // hits between A1 to B1
_bedB->FindOverlapsPerBin(1, a.chrom2, start2, end2, a.strand2, hitsA2B1, !(_ignoreStrand)); // hits between A2 to B1
_bedB->FindOverlapsPerBin(2, a.chrom1, start1, end1, a.strand1, hitsA1B2, !(_ignoreStrand)); // hits between A1 to B2
_bedB->FindOverlapsPerBin(2, a.chrom2, start2, end2, a.strand2, hitsA2B2, !(_ignoreStrand)); // hits between A2 to B2
// Now, reduce to the set of hits on each end of A and B that meet the required overlap fraction and orientation.
FindQualityHitsBetweenEnds(a, 1, hitsA1B1, qualityHitsA1B1, numOverlapsA1B1); // quality hits between A1 to B1
FindQualityHitsBetweenEnds(a, 1, hitsA1B2, qualityHitsA1B2, numOverlapsA1B2); // quality hits between A2 to B1
FindQualityHitsBetweenEnds(a, 2, hitsA2B1, qualityHitsA2B1, numOverlapsA2B1); // quality hits between A1 to B2
FindQualityHitsBetweenEnds(a, 2, hitsA2B2, qualityHitsA2B2, numOverlapsA2B2); // quality hits between A2 to B2
// Now, reduce to the set of hits on each end of A and B
// that meet the required overlap fraction and orientation.
FindQualityHitsBetweenEnds(start1, end1, start2, end2, hitsA1B1, qualityHitsA1B1, numOverlapsA1B1); // quality hits between A1 to B1
FindQualityHitsBetweenEnds(start1, end1, start2, end2, hitsA1B2, qualityHitsA1B2, numOverlapsA1B2); // quality hits between A2 to B1
FindQualityHitsBetweenEnds(start1, end1, start2, end2, hitsA2B1, qualityHitsA2B1, numOverlapsA2B1); // quality hits between A1 to B2
FindQualityHitsBetweenEnds(start1, end1, start2, end2, hitsA2B2, qualityHitsA2B2, numOverlapsA2B2); // quality hits between A2 to B2
int matchCount1 = 0;
int matchCount2 = 0;
if ((numOverlapsA1B1 > 0) || (numOverlapsA2B2 > 0)) {
FindHitsOnBothEnds(a, qualityHitsA1B1, qualityHitsA2B2, matchCount1);
if (_searchType == "neither" || _searchType == "both") {
if ((numOverlapsA1B1 > 0) || (numOverlapsA2B2 > 0))
FindHitsOnBothEnds(a, qualityHitsA1B1, qualityHitsA2B2, matchCount1);
if ((numOverlapsA1B2 > 0) || (numOverlapsA2B1 > 0))
FindHitsOnBothEnds(a, qualityHitsA2B1, qualityHitsA1B2, matchCount2);
// report the fact that no hits were found iff _searchType is neither.
if ((matchCount1 == 0) && (matchCount2 == 0) && (_searchType == "neither")) {
_bedA->reportBedPENewLine(a);
}
}
if ((numOverlapsA1B2 > 0) || (numOverlapsA2B1 > 0)) {
FindHitsOnBothEnds(a, qualityHitsA2B1, qualityHitsA1B2, matchCount2);
}
if ((matchCount1 == 0) && (matchCount2 == 0) && (_searchType == "neither")) {
_bedA->reportBedPENewLine(a);
else if (_searchType == "either") {
FindHitsOnEitherEnd(a, qualityHitsA1B1, qualityHitsA2B2, matchCount1);
FindHitsOnEitherEnd(a, qualityHitsA2B1, qualityHitsA1B2, matchCount2);
}
}
void PairToPair::FindQualityHitsBetweenEnds(const BEDPE &a, int end, const vector<BEDCOV> &hits,
vector<BEDCOV> &qualityHits, int &numOverlaps) {
void PairToPair::FindQualityHitsBetweenEnds(CHRPOS start1, CHRPOS end1, CHRPOS start2, CHRPOS end2,
const vector<BEDCOV> &hits, vector<BEDCOV> &qualityHits, int &numOverlaps) {
if (end == 1) {
vector<BEDCOV>::const_iterator h = hits.begin();
vector<BEDCOV>::const_iterator hitsEnd = hits.end();
for (; h != hitsEnd; ++h) {
int s = max(a.start1, h->start);
int e = min(a.end1, h->end);
// is there enough overlap (default ~ 1bp)
if ( ((float)(e-s) / (float)(a.end1 - a.start1)) >= _overlapFraction ) {
numOverlaps++;
qualityHits.push_back(*h);
}
// end 1
vector<BEDCOV>::const_iterator h = hits.begin();
vector<BEDCOV>::const_iterator hitsEnd = hits.end();
for (; h != hitsEnd; ++h) {
int s = max(start1, h->start);
int e = min(end1, h->end);
// is there enough overlap (default ~ 1bp)
if ( ((float)(e-s) / (float)(end1 - start1)) >= _overlapFraction ) {
numOverlaps++;
qualityHits.push_back(*h);
}
}
else if (end == 2) {
vector<BEDCOV>::const_iterator h = hits.begin();
vector<BEDCOV>::const_iterator hitsEnd = hits.end();
for (; h != hitsEnd; ++h) {
int s = max(a.start2, h->start);
int e = min(a.end2, h->end);
// is there enough overlap (default ~ 1bp)
if ( ((float)(e-s) / (float)(a.end2 - a.start2)) >= _overlapFraction ) {
numOverlaps++;
qualityHits.push_back(*h);
}
// end 2
h = hits.begin();
hitsEnd = hits.end();
for (; h != hitsEnd; ++h) {
int s = max(start2, h->start);
int e = min(end2, h->end);
// is there enough overlap (default ~ 1bp)
if ( ((float)(e-s) / (float)(end2 - start2)) >= _overlapFraction ) {
numOverlaps++;
qualityHits.push_back(*h);
}
}
}
......@@ -187,3 +211,41 @@ void PairToPair::FindHitsOnBothEnds(const BEDPE &a, const vector<BEDCOV> &qualit
}
}
void PairToPair::FindHitsOnEitherEnd(const BEDPE &a, const vector<BEDCOV> &qualityHitsEnd1,
const vector<BEDCOV> &qualityHitsEnd2, int &matchCount) {
map<unsigned int, vector<BEDCOV>, less<int> > hitsMap;
for (vector<BEDCOV>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) {
hitsMap[h->count].push_back(*h);
matchCount++;
}
for (vector<BEDCOV>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) {
hitsMap[h->count].push_back(*h);
matchCount++;
}
for (map<unsigned int, vector<BEDCOV>, less<unsigned int> >::iterator m = hitsMap.begin(); m != hitsMap.end(); ++m) {
if (m->second.size() >= 1) {
if ((m->second.size()) == 2) {
BEDCOV b1 = m->second[0];
BEDCOV b2 = m->second[1];
_bedA->reportBedPETab(a);
printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", b1.chrom.c_str(), b1.start, b1.end,
b2.chrom.c_str(), b2.start, b2.end,
b1.name.c_str(), b1.score.c_str(),
b1.strand.c_str(), b2.strand.c_str());
}
else {
BEDCOV b1 = m->second[0];
_bedA->reportBedPETab(a);
printf("%s\t%d\t%d\t.\t-1\t-1\t%s\t%s\t%s\t.\n", b1.chrom.c_str(), b1.start, b1.end,
b1.name.c_str(), b1.score.c_str(), b1.strand.c_str());
}
}
}
}
......@@ -28,22 +28,14 @@ class PairToPair {
public:
// constructor
PairToPair(string &, string &, float &, string, bool);
PairToPair(string &bedAFilePE, string &bedBFilePE, float &overlapFraction,
string searchType, bool ignoreStrand, int slop, bool strandedSlop);
// destructor
~PairToPair(void);
void IntersectPairs();
void FindOverlaps(const BEDPE &a, vector<BEDCOV> &hitsA1B1, vector<BEDCOV> &hitsA1B2,
vector<BEDCOV> &hitsA2B1, vector<BEDCOV> &hitsA2B2, string type);
void FindQualityHitsBetweenEnds(const BEDPE &a, int end, const vector<BEDCOV> &hits,
vector<BEDCOV> &qualityHits, int &numOverlaps);
void FindHitsOnBothEnds(const BEDPE &a, const vector<BEDCOV> &qualityHitsEnd1,
const vector<BEDCOV> &qualityHitsEnd2, int &matchCount);
private:
......@@ -53,12 +45,28 @@ private:
float _overlapFraction;
string _searchType;
bool _ignoreStrand;
int _slop;
bool _strandedSlop;
// instance of a paired-end bed file class.
BedFilePE *_bedA;
// instance of a bed file class.
BedFilePE *_bedB;
// methods
void FindOverlaps(const BEDPE &a, vector<BEDCOV> &hitsA1B1, vector<BEDCOV> &hitsA1B2,
vector<BEDCOV> &hitsA2B1, vector<BEDCOV> &hitsA2B2);
void FindQualityHitsBetweenEnds(CHRPOS start1, CHRPOS end1, CHRPOS start2, CHRPOS end2,
const vector<BEDCOV> &hits, vector<BEDCOV> &qualityHits, int &numOverlaps);
void FindHitsOnBothEnds(const BEDPE &a, const vector<BEDCOV> &qualityHitsEnd1,
const vector<BEDCOV> &qualityHitsEnd2, int &matchCount);
void FindHitsOnEitherEnd(const BEDPE &a, const vector<BEDCOV> &qualityHitsEnd1,
const vector<BEDCOV> &qualityHitsEnd2, int &matchCount);
};
#endif /* PAIRTOPAIR_H */
......@@ -34,6 +34,7 @@ int main(int argc, char* argv[]) {
// input arguments
float overlapFraction = 1E-9;
int slop = 0;
string searchType = "both";
// flags to track parameters
......@@ -41,8 +42,9 @@ int main(int argc, char* argv[]) {
bool haveBedB = false;
bool haveSearchType = false;
bool haveFraction = false;
bool ignoreStrand = false;
bool ignoreStrand = false;
bool haveSlop = false;
bool strandedSlop = false;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
......@@ -90,6 +92,16 @@ int main(int argc, char* argv[]) {
i++;
}
}
else if(PARAMETER_CHECK("-slop", 5, parameterLength)) {
if ((i+1) < argc) {
haveSlop = true;
slop = atoi(argv[i + 1]);
i++;
}
}
else if(PARAMETER_CHECK("-ss", 3, parameterLength)) {
strandedSlop = true;
}
else if(PARAMETER_CHECK("-is", 3, parameterLength)) {
ignoreStrand = true;
}
......@@ -106,14 +118,19 @@ int main(int argc, char* argv[]) {
showHelp = true;
}
if (haveSearchType && (searchType != "neither") && (searchType != "both")) {
if (haveSearchType && (searchType != "neither") && (searchType != "both") && (searchType != "either")) {
cerr << endl << "*****" << endl << "*****ERROR: Request \"both\" or \"neither\"" << endl << "*****" << endl;
showHelp = true;
}
if (strandedSlop == true && haveSlop == false) {
cerr << endl << "*****" << endl << "*****ERROR: Need a -slop value if requesting -ss." << endl << "*****" << endl;
showHelp = true;
}
if (!showHelp) {
PairToPair *bi = new PairToPair(bedAFile, bedBFile, overlapFraction, searchType, ignoreStrand);
PairToPair *bi = new PairToPair(bedAFile, bedBFile, overlapFraction, searchType, ignoreStrand, slop, strandedSlop);
delete bi;
return 0;
}
......@@ -139,9 +156,18 @@ void ShowHelp(void) {
cerr << "\t-type \t" << "Approach to reporting overlaps between A and B." << endl << endl;
cerr << "\t\tneither\tReport overlaps if neither end of A overlaps B." << endl;
cerr << "\t\teither\tReport overlaps if either ends of A overlap B." << endl;
cerr << "\t\tboth\tReport overlaps if both ends of A overlap B." << endl;
cerr << "\t\t- Default." << endl << endl;
cerr << "\t\t\t- Default = both." << endl << endl;
cerr << "\t-slop \t" << "The amount of slop (in b.p.). to be added to each footprint." << endl << endl;
cerr << "\t-ss\t" << "Add slop based to each BEDPE footprint based on strand." << endl;
cerr << "\t\t- If strand is \"+\", slop is only added to the end coordinates." << endl << endl;
cerr << "\t\t- If strand is \"-\", slop is only added to the start coordinates." << endl << endl;
cerr << "\t\t- By default, slop is added in both directions." << endl << endl;
cerr << "\t-is\t" << "Ignore strands when searching for overlaps." << endl;
cerr << "\t\t- By default, strands are enforced." << endl << endl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment