From 1cfd53159f8b3f32b57f729d9b00a457e5587208 Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Tue, 21 Sep 2010 13:08:56 -0400 Subject: [PATCH] Added -ubam support to intersectBed and bedToBam. --- src/bedToBam/bedToBam.cpp | 35 +++++++++--------------------- src/intersectBed/intersectMain.cpp | 4 +++- src/utils/bedFile/bedFile.cpp | 3 ++- 3 files changed, 15 insertions(+), 27 deletions(-) diff --git a/src/bedToBam/bedToBam.cpp b/src/bedToBam/bedToBam.cpp index 337bc6bf..b43ec38a 100644 --- a/src/bedToBam/bedToBam.cpp +++ b/src/bedToBam/bedToBam.cpp @@ -1,5 +1,5 @@ /***************************************************************************** - bamToBed.cpp + bedToBam.cpp (c) 2009 - Aaron Quinlan Hall Laboratory @@ -36,8 +36,7 @@ using namespace std; // function declarations void ShowHelp(void); -void DetermineBedInput(BedFile *bed, GenomeFile *genome, bool isBED12, int mapQual); -void ProcessBed(istream &bedInput, BedFile *bed, GenomeFile *genome, bool isBED12, int mapQual); +void ProcessBed(istream &bedInput, BedFile *bed, GenomeFile *genome, bool isBED12, int mapQual, bool uncompressedBam); void ConvertBedToBam(const BED &bed, BamAlignment &bam, map<string, int> &chromToId, bool isBED12, int mapQual, int lineNum); void MakeBamHeader(const string &genomeFile, RefVector &refs, string &header, map<string, int> &chromToInt); int reg2bin(int beg, int end); @@ -59,6 +58,7 @@ int main(int argc, char* argv[]) { bool haveGenome = false; bool haveMapQual = false; bool isBED12 = false; + bool uncompressedBam = false; for(int i = 1; i < argc; i++) { int parameterLength = (int)strlen(argv[i]); @@ -98,6 +98,9 @@ int main(int argc, char* argv[]) { } else if(PARAMETER_CHECK("-bed12", 6, parameterLength)) { isBED12 = true; + } + else if(PARAMETER_CHECK("-ubam", 5, parameterLength)) { + uncompressedBam = true; } else { cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; @@ -124,7 +127,7 @@ int main(int argc, char* argv[]) { BedFile *bed = new BedFile(bedFile); GenomeFile *genome = new GenomeFile(genomeFile); - DetermineBedInput(bed, genome, isBED12, mapQual); + ProcessBed(cin, bed, genome, isBED12, mapQual, uncompressedBam); } else { ShowHelp(); @@ -150,6 +153,7 @@ void ShowHelp(void) { cerr << "\t-bed12\t" << "The BED file is in BED12 format. The BAM CIGAR" << endl; cerr << "\t\tstring will reflect BED \"blocks\"." << endl << endl; + cerr << "\t-ubam\t" << "Write uncompressed BAM output. Default is to write compressed BAM." << endl << endl; cerr << "Notes: " << endl; cerr << "\t(1) BED files must be at least BED4 to be amenable to BAM (needs name field)." << endl << endl; @@ -160,26 +164,7 @@ void ShowHelp(void) { } -void DetermineBedInput(BedFile *bed, GenomeFile *genome, bool isBED12, int mapQual) { - - // dealing with a proper file - if (bed->bedFile != "stdin") { - - ifstream bedStream(bed->bedFile.c_str(), ios::in); - if ( !bedStream ) { - cerr << "Error: The requested bed file (" << bed->bedFile << ") could not be opened. Exiting!" << endl; - exit (1); - } - ProcessBed(bedStream, bed, genome, isBED12, mapQual); - } - // reading from stdin - else { - ProcessBed(cin, bed, genome, isBED12, mapQual); - } -} - - -void ProcessBed(istream &bedInput, BedFile *bed, GenomeFile *genome, bool isBED12, int mapQual) { +void ProcessBed(istream &bedInput, BedFile *bed, GenomeFile *genome, bool isBED12, int mapQual, bool uncompressedBam) { BamWriter *writer = new BamWriter(); @@ -190,7 +175,7 @@ void ProcessBed(istream &bedInput, BedFile *bed, GenomeFile *genome, bool isBED1 MakeBamHeader(genome->getGenomeFileName(), refs, bamHeader, chromToId); // open a BAM and add the reference headers to the BAM file - writer->Open("stdout", bamHeader, refs); + writer->Open("stdout", bamHeader, refs, uncompressedBam); // process each BED entry and convert to BAM diff --git a/src/intersectBed/intersectMain.cpp b/src/intersectBed/intersectMain.cpp index 1e7e2617..e2da5b46 100644 --- a/src/intersectBed/intersectMain.cpp +++ b/src/intersectBed/intersectMain.cpp @@ -222,7 +222,9 @@ void ShowHelp(void) { cerr << "Options: " << endl; cerr << "\t-abam\t" << "The A input file is in BAM format. Output will be BAM as well." << endl << endl; - + + cerr << "\t-ubam\t" << "Write uncompressed BAM output. Default is to write compressed BAM." << endl << endl; + cerr << "\t-bed\t" << "When using BAM input (-abam), write output as BED. The default" << endl; cerr << "\t\tis to write output in BAM when using -abam." << endl << endl; diff --git a/src/utils/bedFile/bedFile.cpp b/src/utils/bedFile/bedFile.cpp index 3560628b..634b7125 100644 --- a/src/utils/bedFile/bedFile.cpp +++ b/src/utils/bedFile/bedFile.cpp @@ -197,7 +197,8 @@ BedLineStatus BedFile::GetNextBed(BED &bed, int &lineNum) { } -void BedFile::FindOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, vector<BED> &hits, bool forceStrand) { +void BedFile::FindOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, + string strand, vector<BED> &hits, bool forceStrand) { BIN startBin, endBin; startBin = (start >> _binFirstShift); -- GitLab