From bcd7d9f453ab2baedd8bd4598ada059c5a8a76ec Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Mon, 3 May 2010 22:00:40 -0400 Subject: [PATCH] Removed the seq and qual defaults from bedToBam. Thanks to James Ward. --- src/bedToBam/bedToBam.cpp | 104 +++++++++++++++++--------------- src/coverageBed/coverageBed.cpp | 3 +- src/utils/bedFile/bedFile.cpp | 3 +- 3 files changed, 59 insertions(+), 51 deletions(-) diff --git a/src/bedToBam/bedToBam.cpp b/src/bedToBam/bedToBam.cpp index b4f17703..1290e3cf 100755 --- a/src/bedToBam/bedToBam.cpp +++ b/src/bedToBam/bedToBam.cpp @@ -233,10 +233,13 @@ void ConvertBedToBam(const BED &bed, BamAlignment &bam, map<string, int, std::le // hard-code the sequence and qualities. int bedLength = bed.end - bed.start; - string query(bedLength, 'N'); - string quals(bedLength, 'H'); - bam.QueryBases = query; - bam.Qualities = quals; + + // set dummy seq and qual strings. the input is BED, + // so the sequence is inherently the same as it's + // reference genome. + // Thanks to James M. Ward for pointing this out. + bam.QueryBases = ""; + bam.Qualities = ""; // chrom and map quality bam.RefID = chromToId[bed.chrom]; @@ -259,58 +262,65 @@ void ConvertBedToBam(const BED &bed, BamAlignment &bam, map<string, int, std::le cOp.Length = bedLength; bam.CigarData.push_back(cOp); } - // precess each block. + // we're being told that the input is BED12. else { - - // extract the relevant BED fields to convert BED12 to BAM - // namely: thickStart, thickEnd, blockCount, blockStarts, blockEnds - // unsigned int thickStart = atoi(bed.otherFields[0].c_str()); - // unsigned int thickEnd = atoi(bed.otherFields[1].c_str()); - unsigned int blockCount = atoi(bed.otherFields[3].c_str()); - - vector<string> blockSizesString, blockStartsString; - vector<int> blockSizes, blockStarts; - Tokenize(bed.otherFields[4], blockSizesString, ","); - Tokenize(bed.otherFields[5], blockStartsString, ","); - for (unsigned int i = 0; i < blockCount; ++i) { - blockStarts.push_back(atoi(blockStartsString[i].c_str())); - blockSizes.push_back(atoi(blockSizesString[i].c_str())); - } + // does it smell like BED12? if so, process it. + if (bed.otherFields.size() == 6) { + + // extract the relevant BED fields to convert BED12 to BAM + // namely: blockCount, blockStarts, blockEnds + unsigned int blockCount = atoi(bed.otherFields[3].c_str()); + + vector<string> blockSizesString, blockStartsString; + vector<int> blockSizes, blockStarts; + Tokenize(bed.otherFields[4], blockSizesString, ","); + Tokenize(bed.otherFields[5], blockStartsString, ","); - // make sure this is a well-formed BED12 entry. - if ((blockSizes.size() != blockCount) || (blockSizes.size() != blockCount)) { - cerr << "Error: Number of BED blocks does not match blockCount at line: " << lineNum << ". Exiting!" << endl; - exit (1); - } - else { - // does the first block start after the bed.start? - // if so, we need to do some "splicing" - if (blockStarts[0] - bed.start > 0) { - CigarOp cOp; - cOp.Length = blockStarts[0] - bed.start; - cOp.Type = 'N'; - bam.CigarData.push_back(cOp); + for (unsigned int i = 0; i < blockCount; ++i) { + blockStarts.push_back(atoi(blockStartsString[i].c_str())); + blockSizes.push_back(atoi(blockSizesString[i].c_str())); } - // handle the "middle" blocks - for (unsigned int i = 0; i < blockCount - 1; ++i) { - CigarOp cOp; - cOp.Length = blockSizes[i]; - cOp.Type = 'M'; - bam.CigarData.push_back(cOp); - - if (blockStarts[i+1] > (blockStarts[i] + blockSizes[i])) { + + // make sure this is a well-formed BED12 entry. + if ((blockSizes.size() != blockCount) || (blockSizes.size() != blockCount)) { + cerr << "Error: Number of BED blocks does not match blockCount at line: " << lineNum << ". Exiting!" << endl; + exit (1); + } + else { + // does the first block start after the bed.start? + // if so, we need to do some "splicing" + if (blockStarts[0] - bed.start > 0) { CigarOp cOp; - cOp.Length = (blockStarts[i+1] - (blockStarts[i] + blockSizes[i])); + cOp.Length = blockStarts[0] - bed.start; cOp.Type = 'N'; bam.CigarData.push_back(cOp); } + // handle the "middle" blocks + for (unsigned int i = 0; i < blockCount - 1; ++i) { + CigarOp cOp; + cOp.Length = blockSizes[i]; + cOp.Type = 'M'; + bam.CigarData.push_back(cOp); + + if (blockStarts[i+1] > (blockStarts[i] + blockSizes[i])) { + CigarOp cOp; + cOp.Length = (blockStarts[i+1] - (blockStarts[i] + blockSizes[i])); + cOp.Type = 'N'; + bam.CigarData.push_back(cOp); + } + } + // handle the last block. + CigarOp cOp; + cOp.Length = blockSizes[blockCount - 1]; + cOp.Type = 'M'; + bam.CigarData.push_back(cOp); } - // handle the last block. - CigarOp cOp; - cOp.Length = blockSizes[blockCount - 1]; - cOp.Type = 'M'; - bam.CigarData.push_back(cOp); + } + // it doesn't smell like BED12. complain. + else { + cerr << "You've indicated that the input file is in BED12 format, yet the relevant fields cannot be found. Exiting." << endl << endl; + exit(1); } } } diff --git a/src/coverageBed/coverageBed.cpp b/src/coverageBed/coverageBed.cpp index c0f6f64d..85939d67 100755 --- a/src/coverageBed/coverageBed.cpp +++ b/src/coverageBed/coverageBed.cpp @@ -80,7 +80,7 @@ void BedCoverage::CollectCoverageBam(string bamFile) { // load the "B" bed file into a map so // that we can easily compare "A" to it for overlaps _bedB->loadBedFileIntoMap(); - + // open the BAM file BamReader reader; reader.Open(bamFile); @@ -102,7 +102,6 @@ void BedCoverage::CollectCoverageBam(string bamFile) { a.start = bam.Position; a.end = bam.GetEndPosition(false); a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-"; - _bedB->countHits(a, _forceStrand); } } diff --git a/src/utils/bedFile/bedFile.cpp b/src/utils/bedFile/bedFile.cpp index e8921d45..e8e8bf45 100755 --- a/src/utils/bedFile/bedFile.cpp +++ b/src/utils/bedFile/bedFile.cpp @@ -669,12 +669,11 @@ void BedFile::loadBedFileIntoMap() { bedStatus = this->GetNextBed(bedEntry, lineNum); while (bedStatus != BED_INVALID) { // ignore headers and blank lines - if (bedStatus == BED_VALID) { + if (bedStatus == BED_VALID) { int bin = getBin(bedEntry.start, bedEntry.end); bedEntry.count = 0; bedEntry.minOverlapStart = INT_MAX; bedMap[bedEntry.chrom][bin].push_back(bedEntry); - bedEntry = nullBed; } bedStatus = this->GetNextBed(bedEntry, lineNum); -- GitLab