Skip to content
Snippets Groups Projects
Commit aa3d3959 authored by Aaron's avatar Aaron
Browse files

Update to fastaFromBed and intersectBed.

	1. Added the -wo option to intersectBed.
	2. Added the -s option to fastaFromBed.
parent e88f6c38
No related branches found
No related tags found
No related merge requests found
......@@ -12,7 +12,8 @@
#include "lineFileUtilities.h"
#include "fastaFromBed.h"
Bed2Fa::Bed2Fa(bool &useName, string &dbFile, string &bedFile, string &fastaOutFile, bool &useFasta) {
Bed2Fa::Bed2Fa(bool &useName, string &dbFile, string &bedFile, string &fastaOutFile,
bool &useFasta, bool &useStrand) {
if (useName) {
this->useName = true;
......@@ -22,7 +23,8 @@ Bed2Fa::Bed2Fa(bool &useName, string &dbFile, string &bedFile, string &fastaOutF
this->bedFile = bedFile;
this->fastaOutFile = fastaOutFile;
this->useFasta = useFasta;
this->useStrand = useStrand;
this->bed = new BedFile(this->bedFile);
}
......@@ -78,17 +80,35 @@ void Bed2Fa::ExtractDNA() {
unsigned int start = bedList[i].start;
unsigned int end = bedList[i].end;
if ( (start <= currDNA.size()) && (end <= currDNA.size()) ) {
if ( (start <= currDNA.size()) && (end <= currDNA.size()) ) {
string dna = currDNA.substr(bedList[i].start, ((bedList[i].end - bedList[i].start)));
// revcomp if necessary. Thanks to Thomas Doktor.
if ((this->useStrand == true) && (bedList[i].strand == "-")) {
reverseComplement(dna);
}
if (!(this->useName)) {
if (this->useFasta == true) {
faOut << ">" << currChrom << ":"
<< bedList[i].start << "-" << bedList[i].end << endl << dna << endl;
if (this->useStrand == true) {
faOut << ">" << currChrom << ":" << bedList[i].start << "-"
<< bedList[i].end << "(" << bedList[i].strand << ")" << endl << dna << endl;
}
else {
faOut << ">" << currChrom << ":" << bedList[i].start << "-"
<< bedList[i].end << endl << dna << endl;
}
}
else {
faOut << currChrom << ":" << bedList[i].start << "-" << bedList[i].end
<< "\t" << dna << endl;
if (this->useStrand == true) {
faOut << currChrom << ":" << bedList[i].start << "-"
<< bedList[i].end << "(" << bedList[i].strand << ")"
<< "\t" << dna << endl;
}
else {
faOut << currChrom << ":" << bedList[i].start << "-" << bedList[i].end
<< "\t" << dna << endl;
}
}
}
else {
......@@ -120,18 +140,36 @@ void Bed2Fa::ExtractDNA() {
unsigned int start = bedList[i].start;
unsigned int end = bedList[i].end;
if ( (start <= currDNA.size()) && (end <= currDNA.size()) ) {
if ( (start <= currDNA.size()) && (end <= currDNA.size()) ) {
string dna = currDNA.substr(bedList[i].start, ((bedList[i].end - bedList[i].start)));
// revcomp if necessary. Thanks to Thomas Doktor.
if ((this->useStrand == true) && (bedList[i].strand == "-")) {
reverseComplement(dna);
}
if (!(this->useName)) {
if (this->useFasta == true) {
faOut << ">" << currChrom << ":"
<< bedList[i].start << "-" << bedList[i].end << endl << dna << endl;
if (this->useStrand == true) {
faOut << ">" << currChrom << ":" << bedList[i].start << "-"
<< bedList[i].end << "(" << bedList[i].strand << ")" << endl << dna << endl;
}
else {
faOut << ">" << currChrom << ":" << bedList[i].start << "-"
<< bedList[i].end << endl << dna << endl;
}
}
else {
faOut << currChrom << ":" << bedList[i].start << "-" << bedList[i].end
<< "\t" << dna << endl;
if (this->useStrand == true) {
faOut << currChrom << ":" << bedList[i].start << "-"
<< bedList[i].end << "(" << bedList[i].strand << ")"
<< "\t" << dna << endl;
}
else {
faOut << currChrom << ":" << bedList[i].start << "-" << bedList[i].end
<< "\t" << dna << endl;
}
}
}
else {
......@@ -142,10 +180,10 @@ void Bed2Fa::ExtractDNA() {
faOut << bedList[i].name << "\t" << dna << endl;
}
}
}
else cerr << "Feature (" << bedList[i].chrom << ":" << start << "-" << end << ") beyond "
<< currChrom << " size (" << currDNA.size() << " bp). Skipping." << endl;
}
currDNA = "";
}
......
......@@ -28,7 +28,8 @@ class Bed2Fa {
public:
// constructor
Bed2Fa(bool &, string &, string &, string &, bool &);
Bed2Fa(bool &useName, string &dbFile, string &bedFile, string &fastaOutFile,
bool &useFasta, bool &useStrand);
// destructor
~Bed2Fa(void);
......@@ -42,6 +43,7 @@ private:
string bedFile;
string fastaOutFile;
bool useFasta;
bool useStrand;
// instance of a bed file class.
BedFile *bed;
......
......@@ -42,6 +42,7 @@ int main(int argc, char* argv[]) {
bool haveFastaOut = false;
bool useNameOnly = false;
bool useFasta = true;
bool useStrand = false;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
......@@ -89,6 +90,9 @@ int main(int argc, char* argv[]) {
else if(PARAMETER_CHECK("-tab", 4, parameterLength)) {
useFasta = false;
}
else if(PARAMETER_CHECK("-s", 2, parameterLength)) {
useStrand = true;
}
else {
cerr << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true;
......@@ -101,7 +105,7 @@ int main(int argc, char* argv[]) {
if (!showHelp) {
Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta);
Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta, useStrand);
b2f->ExtractDNA();
return 0;
}
......@@ -125,8 +129,13 @@ void ShowHelp(void) {
cerr << "\t-bed\tBED file of ranges to extract from -fi" << endl;
cerr << "\t-fo\tOutput file (can be FASTA or TAB-delimited)" << endl;
cerr << "\t-name\tUse the BED name field (#4) for the FASTA header" << endl;
cerr << "\t-tab\tWrite output in TAB delimited format." << endl;
cerr << "\t\tDefault is FASTA format." << endl;
cerr << "\t\t- Default is FASTA format." << endl << endl;
cerr << "\t-s\tForce strandedness. If the feature occupies the antisense strand," << endl;
cerr << "\t\tthe sequence will be reverse complemented." << endl;
cerr << "\t\t- By default, strand information is ignored." << endl << endl;
......
......@@ -17,23 +17,25 @@
Constructor
*/
BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,
bool writeA, bool writeB, float overlapFraction,
bool writeA, bool writeB, bool writeOverlap, float overlapFraction,
bool noHit, bool writeCount, bool forceStrand, bool reciprocal,
bool bamInput, bool bamOutput) {
this->bedAFile = bedAFile;
this->bedBFile = bedBFile;
this->anyHit = anyHit;
this->noHit = noHit;
this->writeA = writeA;
this->writeB = writeB;
this->writeCount = writeCount;
this->bedAFile = bedAFile;
this->bedBFile = bedBFile;
this->anyHit = anyHit;
this->noHit = noHit;
this->writeA = writeA;
this->writeB = writeB;
this->writeOverlap = writeOverlap;
this->writeCount = writeCount;
this->overlapFraction = overlapFraction;
this->forceStrand = forceStrand;
this->reciprocal = reciprocal;
this->bamInput = bamInput;
this->bamOutput = bamOutput;
this->forceStrand = forceStrand;
this->reciprocal = reciprocal;
this->bamInput = bamInput;
this->bamOutput = bamOutput;
// create new BED file objects for A and B
this->bedA = new BedFile(bedAFile);
this->bedB = new BedFile(bedBFile);
}
......@@ -80,7 +82,7 @@ bool BedIntersect::FindOverlaps(const BED &a, vector<BED> &hits) {
hitsFound = true;
numOverlaps++; // we have another hit for A
/*
if (!writeB && printable) {
if (writeA) bedA->reportBedNewLine(a);
else bedA->reportBedRangeNewLine(a,s,e);
......@@ -95,6 +97,30 @@ bool BedIntersect::FindOverlaps(const BED &a, vector<BED> &hits) {
bedB->reportBedNewLine(*h);
}
}
*/
// new
if (printable == true) {
if (writeA == false && writeB == false && writeOverlap == false) {
bedA->reportBedRangeNewLine(a,s,e);
}
else if (writeA == true && writeB == true) {
bedA->reportBedTab(a);
bedB->reportBedNewLine(*h);
}
else if (writeA == true) {
bedA->reportBedNewLine(a);
}
else if (writeB == true) {
bedA->reportBedRangeTab(a,s,e);
bedB->reportBedNewLine(*h);
}
else if (writeOverlap == true) {
bedA->reportBedTab(a);
bedB->reportBedTab(*h);
printf("%d\n", overlapBases);
}
}
}
else { // the user wants there to be sufficient reciprocal overlap
int bLength = (h->end - h->start);
......
......@@ -34,7 +34,7 @@ public:
// constructor
BedIntersect(string bedAFile, string bedBFile, bool anyHit,
bool writeA, bool writeB, float overlapFraction,
bool writeA, bool writeB, bool writeOverlap, float overlapFraction,
bool noHit, bool writeCount, bool forceStrand, bool reciprocal,
bool bamInput, bool bamOutput);
......@@ -63,6 +63,7 @@ private:
bool writeA;
bool writeB;
bool writeCount;
bool writeOverlap;
bool forceStrand;
bool reciprocal;
float overlapFraction;
......
......@@ -36,18 +36,19 @@ int main(int argc, char* argv[]) {
// input arguments
float overlapFraction = 1E-9;
bool haveBedA = false;
bool haveBedB = false;
bool noHit = false;
bool anyHit = false;
bool writeA = false;
bool writeB = false;
bool writeCount = false;
bool haveFraction = false;
bool haveBedA = false;
bool haveBedB = false;
bool noHit = false;
bool anyHit = false;
bool writeA = false;
bool writeB = false;
bool writeCount = false;
bool writeOverlap = false;
bool haveFraction = false;
bool reciprocalFraction = false;
bool forceStrand = false;
bool inputIsBam = false;
bool outputIsBam = true;
bool forceStrand = false;
bool inputIsBam = false;
bool outputIsBam = true;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
......@@ -110,6 +111,9 @@ int main(int argc, char* argv[]) {
else if(PARAMETER_CHECK("-wb", 3, parameterLength)) {
writeB = true;
}
else if(PARAMETER_CHECK("-wo", 3, parameterLength)) {
writeOverlap = true;
}
else if(PARAMETER_CHECK("-c", 2, parameterLength)) {
writeCount = true;
}
......@@ -143,6 +147,21 @@ int main(int argc, char* argv[]) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -c, not both." << endl << "*****" << endl;
showHelp = true;
}
if (writeCount && writeOverlap) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -wo, not both." << endl << "*****" << endl;
showHelp = true;
}
if (writeA && writeOverlap) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -wa OR -wo, not both." << endl << "*****" << endl;
showHelp = true;
}
if (writeB && writeOverlap) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -wo, not both." << endl << "*****" << endl;
showHelp = true;
}
if (reciprocalFraction && !haveFraction) {
cerr << endl << "*****" << endl << "*****ERROR: If using -r, you need to define -f." << endl << "*****" << endl;
......@@ -153,11 +172,21 @@ int main(int argc, char* argv[]) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -c, not both." << endl << "*****" << endl;
showHelp = true;
}
if (anyHit && writeB) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -wb, not both." << endl << "*****" << endl;
showHelp = true;
}
if (anyHit && writeOverlap) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -wo, not both." << endl << "*****" << endl;
showHelp = true;
}
if (!showHelp) {
BedIntersect *bi = new BedIntersect(bedAFile, bedBFile, anyHit, writeA, writeB,
BedIntersect *bi = new BedIntersect(bedAFile, bedBFile, anyHit, writeA, writeB, writeOverlap,
overlapFraction, noHit, writeCount, forceStrand,
reciprocalFraction, inputIsBam, outputIsBam);
bi->DetermineBedInput();
......@@ -186,18 +215,24 @@ void ShowHelp(void) {
cerr << "\t\tis to write output in BAM when using -abam." << endl << endl;
cerr << "\t-wa\t" << "Write the original entry in A for each overlap." << endl << endl;
cerr << "\t-wb\t" << "Write the original entry in B for each overlap." << endl;
cerr << "\t\t- Useful for knowing _what_ A overlaps. Restricted by -f." << endl << endl;
cerr << "\t\t- Useful for knowing _what_ A overlaps. Restricted by -f and -r." << endl << endl;
cerr << "\t-wo\t" << "Write the original A and B entries plus the number of base" << endl;
cerr << "\t\tpairs of overlap between the two features." << endl;
cerr << "\t\t- Overlaps restricted by -f and -r." << endl << endl;
cerr << "\t-u\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;
cerr << "\t\t- In other words, just report the fact >=1 hit was found." << endl << endl;
cerr << "\t\t- In other words, just report the fact >=1 hit was found." << endl;
cerr << "\t\t- Overlaps restricted by -f and -r." << endl << endl;
cerr << "\t-c\t" << "For each entry in A, report the number of overlaps with B." << endl;
cerr << "\t\t- Reports 0 for A entries that have no overlap with B." << endl;
cerr << "\t\t- Overlaps restricted by -f." << endl << endl;
cerr << "\t\t- Overlaps restricted by -f and -r." << endl << endl;
cerr << "\t-v\t" << "Only report those entries in A that have _no overlaps_ with B." << endl;
cerr << "\t\t- Similar to \"grep -v.\"" << endl << endl;
cerr << "\t\t- Similar to \"grep -v\" (an homage)." << endl << endl;
cerr << "\t-f\t" << "Minimum overlap required as a fraction of A." << endl;
cerr << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl;
......@@ -206,7 +241,6 @@ void ShowHelp(void) {
cerr << "\t-r\t" << "Require that the fraction overlap be reciprocal for A and B." << endl;
cerr << "\t\t- In other words, if -f is 0.90 and -r is used, this requires" << endl;
cerr << "\t\t that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl;
cerr << "\t-s\t" << "Force strandedness. That is, only report hits in B that" << endl;
cerr << "\t\toverlap A on the same strand." << endl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment