From 32793b98a0ed9f9c5bf879248d872f183e9474c9 Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Mon, 1 Nov 2010 13:20:59 -0400 Subject: [PATCH] Added the -cigar option to bamToBed. --- src/bamToBed/bamToBed.cpp | 60 +++++++++++++++++++++++++++++------ src/pairToPair/pairToPair.cpp | 4 +-- src/utils/BamTools/BamAux.h | 4 +-- 3 files changed, 55 insertions(+), 13 deletions(-) diff --git a/src/bamToBed/bamToBed.cpp b/src/bamToBed/bamToBed.cpp index c43ef20f..719d801c 100644 --- a/src/bamToBed/bamToBed.cpp +++ b/src/bamToBed/bamToBed.cpp @@ -36,16 +36,18 @@ using namespace std; void ShowHelp(void); void ConvertBamToBed(const string &bamFile, const bool &useEditDistance, const string &bamTag, - const bool &writeBed12, const bool &obeySplits, const string &color); + const bool &writeBed12, const bool &obeySplits, const string &color, const bool &useCigar); void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance); -void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, bool obeySplits); +void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, bool obeySplits, bool useCigar); void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, string color = "255,0,0"); void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVector &refs, bool useEditDistance); void ParseCigarBed12(const vector<CigarOp> &cigar, vector<int> &blockStarts, vector<int> &blockEnds, int &alignmentEnd); +string BuildCigarString(const vector<CigarOp> &cigar); + bool IsCorrectMappingForBEDPE (const BamAlignment &bam); @@ -66,6 +68,7 @@ int main(int argc, char* argv[]) { bool writeBed12 = false; bool useEditDistance = false; bool useAlignmentScore = false; + bool useCigar = false; bool obeySplits = false; // check to see if we should print out some help @@ -104,6 +107,9 @@ int main(int argc, char* argv[]) { else if(PARAMETER_CHECK("-ed", 3, parameterLength)) { useEditDistance = true; } + else if(PARAMETER_CHECK("-cigar", 6, parameterLength)) { + useCigar = true; + } else if(PARAMETER_CHECK("-as", 3, parameterLength)) { useAlignmentScore = true; } @@ -140,6 +146,10 @@ int main(int argc, char* argv[]) { cerr << endl << "*****" << endl << "*****ERROR: Cannot use -ed with -splits. Edit distances cannot be computed for each \'chunk\'." << endl << "*****" << endl; showHelp = true; } + if (useEditDistance == true && useCigar == true) { + cerr << endl << "*****" << endl << "*****ERROR: Cannot use -cigar with -splits. Not yet supported." << endl << "*****" << endl; + showHelp = true; + } if (useEditDistance == true && haveOtherTag == true) { cerr << endl << "*****" << endl << "*****ERROR: Cannot use -ed with -tag. Choose one or the other." << endl << "*****" << endl; showHelp = true; @@ -151,7 +161,7 @@ int main(int argc, char* argv[]) { // if there are no problems, let's convert BAM to BED or BEDPE if (!showHelp) { if (writeBedPE == false) - ConvertBamToBed(bamFile, useEditDistance, tag, writeBed12, obeySplits, color); // BED or "blocked BED" + ConvertBamToBed(bamFile, useEditDistance, tag, writeBed12, obeySplits, color, useCigar); // BED or "blocked BED" else ConvertBamToBedpe(bamFile, useEditDistance); // BEDPE } @@ -194,6 +204,8 @@ void ShowHelp(void) { cerr << "\t-color\t" << "An R,G,B string for the color used with BED12 format." << endl; cerr << "\t\tDefault is (255,0,0)." << endl; + + cerr << "\t-cigar\t" << "Add the CIGAR string to the BED entry as a 7th column." << endl << endl; // end the program here @@ -202,7 +214,7 @@ void ShowHelp(void) { void ConvertBamToBed(const string &bamFile, const bool &useEditDistance, const string &bamTag, - const bool &writeBed12, const bool &obeySplits, const string &color) { + const bool &writeBed12, const bool &obeySplits, const string &color, const bool &useCigar) { // open the BAM file BamReader reader; reader.Open(bamFile); @@ -216,7 +228,7 @@ void ConvertBamToBed(const string &bamFile, const bool &useEditDistance, const s while (reader.GetNextAlignment(bam)) { if (bam.IsMapped() == true) { if (writeBed12 == false) // BED - PrintBed(bam, refs, useEditDistance, bamTag, obeySplits); + PrintBed(bam, refs, useEditDistance, bamTag, obeySplits, useCigar); else //"blocked" BED PrintBed12(bam, refs, useEditDistance, bamTag, color); } @@ -299,7 +311,28 @@ void ParseCigarBed12(const vector<CigarOp> &cigar, vector<int> &blockStarts, vec } -void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, bool obeySplits) { +string BuildCigarString(const vector<CigarOp> &cigar) { + + stringstream cigarString; + + for (size_t i = 0; i < cigar.size(); ++i) { + //cerr << cigar[i].Type << " " << cigar[i].Length << endl; + switch (cigar[i].Type) { + case ('M') : + case ('I') : + case ('D') : + case ('N') : + case ('S') : + case ('H') : + case ('P') : + cigarString << cigar[i].Length << cigar[i].Type; + } + } + return cigarString.str(); +} + + +void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, bool obeySplits, bool useCigar) { // set the strand string strand = "+"; @@ -317,13 +350,13 @@ void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDista if (obeySplits == false) { // report the alignment in BED6 format. if (useEditDistance == false && bamTag == "") { - printf("%s\t%d\t%d\t\%s\t%d\t%s\n", refs.at(bam.RefID).RefName.c_str(), bam.Position, + printf("%s\t%d\t%d\t\%s\t%d\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position, alignmentEnd, name.c_str(), bam.MapQuality, strand.c_str()); } else if (useEditDistance == true && bamTag == "") { uint32_t editDistance; if (bam.GetTag("NM", editDistance)) { - printf("%s\t%d\t%d\t\%s\t%u\t%s\n", refs.at(bam.RefID).RefName.c_str(), bam.Position, + printf("%s\t%d\t%d\t\%s\t%u\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position, alignmentEnd, name.c_str(), editDistance, strand.c_str()); } else { @@ -334,7 +367,7 @@ void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDista else if (useEditDistance == false && bamTag != "") { int32_t tagValue; if (bam.GetTag(bamTag, tagValue)) { - printf("%s\t%d\t%d\t\%s\t%d\t%s\n", refs.at(bam.RefID).RefName.c_str(), bam.Position, + printf("%s\t%d\t%d\t\%s\t%d\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position, alignmentEnd, name.c_str(), tagValue, strand.c_str()); } else { @@ -342,6 +375,15 @@ void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDista exit(1); } } + + // does the user want CIGAR as well? + if (useCigar == false) { + printf("\n"); + } + else { + string cigar = BuildCigarString(bam.CigarData); + printf("\t%s\n", cigar.c_str()); + } } // Report each chunk of the BAM alignment as a discrete BED entry // For example 10M100N10M would be reported as two seprate BED entries of length 10 diff --git a/src/pairToPair/pairToPair.cpp b/src/pairToPair/pairToPair.cpp index 16a73889..ca268d20 100644 --- a/src/pairToPair/pairToPair.cpp +++ b/src/pairToPair/pairToPair.cpp @@ -207,11 +207,11 @@ void PairToPair::FindHitsOnEitherEnd(const BEDPE &a, const vector<BEDCOV> &quali map<unsigned int, vector<BEDCOV>, less<int> > hitsMap; for (vector<BEDCOV>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) { - hitsMap[h->count].push_back(*h); + hitsMap[h->lineNum].push_back(*h); matchCount++; } for (vector<BEDCOV>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) { - hitsMap[h->count].push_back(*h); + hitsMap[h->lineNum].push_back(*h); matchCount++; } diff --git a/src/utils/BamTools/BamAux.h b/src/utils/BamTools/BamAux.h index 73e88381..866ca15d 100644 --- a/src/utils/BamTools/BamAux.h +++ b/src/utils/BamTools/BamAux.h @@ -95,7 +95,7 @@ struct BamAlignment { void SetIsMateUnmapped(bool ok); // Sets "alignment's mate is mapped" flag void SetIsMateReverseStrand(bool ok); // Sets "alignment's mate mapped to reverse strand" flag void SetIsPaired(bool ok); // Sets "alignment part of paired-end read" flag - void SetIsProperPair(bool ok); // Sets "alignment is part of read that satisfied paired-end resolution" flag + void SetIsProperPair(bool ok); // Sets "alignment is part of read that satisfied paired-end resolution" flag void SetIsReverseStrand(bool ok); // Sets "alignment mapped to reverse strand" flag void SetIsSecondaryAlignment(bool ok); // Sets "position is primary alignment" flag void SetIsSecondMate(bool ok); // Sets "alignment is second mate on read" flag @@ -418,7 +418,7 @@ int BamAlignment::GetEndPosition(bool usePadded) const { if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) { alignEnd += (*cigarIter).Length; } - else if ( usePadded && cigarType == 'I' ) { + else if ( usePadded && cigarType == 'I' ) { alignEnd += (*cigarIter).Length; } } -- GitLab