Skip to content
Snippets Groups Projects
Commit eab9849e authored by Aaron's avatar Aaron
Browse files

[ENH] add -mate1 option for -bedpe output in bamtobed

parent da34c62d
No related branches found
No related tags found
No related merge requests found
......@@ -33,7 +33,9 @@ Option Description
Lastly, when an end is unmapped, the chromosome and strand will
be set to "." and the start and end coordinates will be set
to -1. *By default, this is disabled and the output will be
reported in BED format*.
reported in BED format*.
**-mate1** When writing BEDPE (-bedpe) format,
always report mate one as the first BEDPE "block".
**-bed12** Write "blocked" BED (a.k.a. BED12) format. This will convert
"spliced" BAM alignments (denoted by the "N" CIGAR operation)
to BED12.
......
......@@ -42,7 +42,8 @@ void ConvertBamToBed(const string &bamFile, bool useEditDistance,
bool useCigar, bool useNovoalign,
bool useBWA);
void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance);
void ConvertBamToBedpe(const string &bamFile,
const bool &useEditDistance, bool mate1First);
string PrintTag(const BamAlignment &bam, const string &tag);
......@@ -56,7 +57,7 @@ void PrintBed12(const BamAlignment &bam, const RefVector &refs,
string color = "255,0,0");
void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2,
const RefVector &refs, bool useEditDistance);
const RefVector &refs, bool useEditDistance, bool mate1First);
void ParseCigarBed12(const vector<CigarOp> &cigar, vector<int> &blockStarts,
vector<int> &blockEnds, int &alignmentEnd);
......@@ -85,6 +86,7 @@ int bamtobed_main(int argc, char* argv[]) {
bool useAlignmentScore = false;
bool useCigar = false;
bool obeySplits = false;
bool mate1First = false;
bool useNovoalign = false; // custom for Quinlan/Hall research
bool useBWA = false; // custom for Quinlan/Hall research
......@@ -137,6 +139,9 @@ int bamtobed_main(int argc, char* argv[]) {
else if(PARAMETER_CHECK("-bwa", 4, parameterLength)) {
useBWA = true;
}
else if(PARAMETER_CHECK("-mate1", 6, parameterLength)) {
mate1First = true;
}
else if(PARAMETER_CHECK("-color", 6, parameterLength)) {
if ((i+1) < argc) {
haveColor = true;
......@@ -194,15 +199,22 @@ int bamtobed_main(int argc, char* argv[]) {
<< "*****" << endl;
showHelp = true;
}
if (writeBedPE == false && mate1First == true) {
cerr << endl << "*****" << endl
<< "*****ERROR: Must use -mate1 with -bedpe." << endl
<< "*****" << endl;
showHelp = true;
}
// if there are no problems, let's convert BAM to BED or BEDPE
if (!showHelp) {
if (writeBedPE == false)
ConvertBamToBed(bamFile, useEditDistance,
tag, writeBed12,
obeySplits, color,
useCigar, useNovoalign, useBWA);
useCigar,
useNovoalign, useBWA);
else
ConvertBamToBedpe(bamFile, useEditDistance);
ConvertBamToBedpe(bamFile, useEditDistance, mate1First);
}
else {
bamtobed_help();
......@@ -221,29 +233,32 @@ void bamtobed_help(void) {
cerr << "Options: " << endl;
cerr << "\t-bedpe\t" << "Write BEDPE format." << endl;
cerr << "\t\t- Requires BAM to be grouped or sorted by query." << endl << endl;
cerr << "\t-bed12\t" << "Write \"blocked\" BED format (aka \"BED12\")." << endl << endl;
cerr << "\t\thttp://genome-test.cse.ucsc.edu/FAQ/FAQformat#format1" << endl << endl;
cerr << "\t-split\t" << "Report \"split\" BAM alignments as separate BED entries." << endl << endl;
cerr << "\t-ed\t" << "Use BAM edit distance (NM tag) for BED score." << endl;
cerr << "\t\t- Default for BED is to use mapping quality." << endl;
cerr << "\t\t- Default for BEDPE is to use the minimum of" << endl;
cerr << "\t\t the two mapping qualities for the pair." << endl;
cerr << "\t\t- When -ed is used with -bedpe, the total edit" << endl;
cerr << "\t\t distance from the two mates is reported." << endl << endl;
cerr << "\t-tag\t" << "Use other NUMERIC BAM alignment tag for BED score." << endl;
cerr << "\t\t- Default for BED is to use mapping quality." << endl;
cerr << "\t\t Disallowed with BEDPE output." << endl << endl;
cerr << "\t-color\t" << "An R,G,B string for the color used with BED12 format." << endl;
cerr << "\t\tDefault is (255,0,0)." << endl << endl;
cerr << "\t-cigar\t" << "Add the CIGAR string to the BED entry as a 7th column." << endl << endl;
cerr << "\t-bedpe\t" << "Write BEDPE format." << endl;
cerr << "\t\t- Requires BAM to be grouped or sorted by query." << endl << endl;
cerr << "\t-mate1\t" << "When writing BEDPE (-bedpe) format, " << endl;
cerr << "\t\talways report mate one as the first BEDPE \"block\"." << endl << endl;
cerr << "\t-bed12\t" << "Write \"blocked\" BED format (aka \"BED12\")." << endl << endl;
cerr << "\t\thttp://genome-test.cse.ucsc.edu/FAQ/FAQformat#format1" << endl << endl;
cerr << "\t-split\t" << "Report \"split\" BAM alignments as separate BED entries." << endl << endl;
cerr << "\t-ed\t" << "Use BAM edit distance (NM tag) for BED score." << endl;
cerr << "\t\t- Default for BED is to use mapping quality." << endl;
cerr << "\t\t- Default for BEDPE is to use the minimum of" << endl;
cerr << "\t\t the two mapping qualities for the pair." << endl;
cerr << "\t\t- When -ed is used with -bedpe, the total edit" << endl;
cerr << "\t\t distance from the two mates is reported." << endl << endl;
cerr << "\t-tag\t" << "Use other NUMERIC BAM alignment tag for BED score." << endl;
cerr << "\t\t- Default for BED is to use mapping quality." << endl;
cerr << "\t\t Disallowed with BEDPE output." << endl << endl;
cerr << "\t-color\t" << "An R,G,B string for the color used with BED12 format." << endl;
cerr << "\t\tDefault is (255,0,0)." << endl << endl;
cerr << "\t-cigar\t" << "Add the CIGAR string to the BED entry as a 7th column." << endl << endl;
// end the program here
......@@ -253,7 +268,7 @@ void bamtobed_help(void) {
void ConvertBamToBed(const string &bamFile, bool useEditDistance, const string &bamTag,
bool writeBed12, bool obeySplits, const string &color,
bool useCigar, bool useNovoalign, bool useBWA) {
bool useCigar, bool useNovoalign, bool useBWA) {
// open the BAM file
BamReader reader;
if (!reader.Open(bamFile)) {
......@@ -284,7 +299,10 @@ void ConvertBamToBed(const string &bamFile, bool useEditDistance, const string &
1. The BAM file is grouped/sorted by query name,
not alignment position
*/
void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance) {
void ConvertBamToBedpe(const string &bamFile,
const bool &useEditDistance,
bool mate1First)
{
// open the BAM file
BamReader reader;
if (!reader.Open(bamFile)) {
......@@ -313,10 +331,10 @@ void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance) {
bam1 = bam2;
reader.GetNextAlignment(bam2);
}
PrintBedPE(bam1, bam2, refs, useEditDistance);
PrintBedPE(bam1, bam2, refs, useEditDistance, mate1First);
}
else if (bam1.IsPaired() && bam2.IsPaired()) {
PrintBedPE(bam1, bam2, refs, useEditDistance);
PrintBedPE(bam1, bam2, refs, useEditDistance, mate1First);
}
}
reader.Close();
......@@ -527,7 +545,8 @@ void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDist
}
void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVector &refs, bool useEditDistance) {
void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2,
const RefVector &refs, bool useEditDistance, bool mate1First) {
// initialize BEDPE variables
string chrom1, chrom2, strand1, strand2;
......@@ -575,11 +594,24 @@ void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVec
}
// swap the ends if necessary
if ( chrom1 > chrom2 || ((chrom1 == chrom2) && (start1 > start2)) ) {
swap(chrom1, chrom2);
swap(start1, start2);
swap(end1, end2);
swap(strand1, strand2);
if (!mate1First)
{
if ( chrom1 > chrom2 || ((chrom1 == chrom2) && (start1 > start2)) ) {
swap(chrom1, chrom2);
swap(start1, start2);
swap(end1, end2);
swap(strand1, strand2);
}
}
// if -mate1First, make sure the first block is always mate 1
else
{
if (!bam1.IsFirstMate()) {
swap(chrom1, chrom2);
swap(start1, start2);
swap(end1, end2);
swap(strand1, strand2);
}
}
// report BEDPE using min mapQuality
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment