Skip to content
Snippets Groups Projects
Commit 017f6e61 authored by Aaron's avatar Aaron
Browse files

Added new multiIntersectBed tool. New relase to follow.

parent 8f8c5d82
No related branches found
No related tags found
No related merge requests found
......@@ -29,6 +29,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)/maskFastaFromBed \
$(SRC_DIR)/mergeBed \
$(SRC_DIR)/multiBamCov \
$(SRC_DIR)/multiIntersectBed \
$(SRC_DIR)/nucBed \
$(SRC_DIR)/overlap \
$(SRC_DIR)/pairToBed \
......
UTILITIES_DIR = ../utils/
OBJ_DIR = ../../obj/
BIN_DIR = ../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
-I$(UTILITIES_DIR)/lineFileUtilities/ \
-I$(UTILITIES_DIR)/genomeFile/ \
-I$(UTILITIES_DIR)/version/ \
-I$(UTILITIES_DIR)/gzstream/ \
-I$(UTILITIES_DIR)/fileType/ \
-I$(UTILITIES_DIR)/BamTools/include
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= multiIntersectBed.cpp multiIntersectBedMain.cpp
OBJECTS= $(SOURCES:.cpp=.o)
_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
PROGRAM= multiIntersectBed
all: $(PROGRAM)
.PHONY: all
$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
@echo " * linking $(PROGRAM)"
@$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
$(BUILT_OBJECTS): $(SOURCES)
@echo " * compiling" $(*F).cpp
@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
$(EXT_OBJECTS):
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedGraphFile/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
clean:
@echo "Cleaning up."
@rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
.PHONY: clean
/*****************************************************************************
intervalItem.h
(c) 2010 - Assaf Gordon
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#ifndef INTERVALITEM_H
#define INTERVALITEM_H
#include <string>
#include <queue>
enum COORDINATE_TYPE {
START,
END
};
/*
An interval item in the priority queue.
An IntervalItem can mark either a START position or an END position.
*/
class IntervalItem
{
public:
int source_index; // which source BedGraph file this came from
COORDINATE_TYPE coord_type; // is this the start or the end position?
CHRPOS coord;
IntervalItem () :
source_index(-1),
coord_type(START),
coord(0)
{}
IntervalItem(int _index, COORDINATE_TYPE _type, CHRPOS _coord) :
source_index(_index),
coord_type(_type),
coord(_coord)
{}
IntervalItem(const IntervalItem &other) :
source_index(other.source_index),
coord_type(other.coord_type),
coord(other.coord)
{}
bool operator< ( const IntervalItem& other ) const
{
return this->coord > other.coord;
}
};
// our priority queue
typedef std::priority_queue<IntervalItem> INTERVALS_PRIORITY_QUEUE;
#endif
/*****************************************************************************
unionBedGraphs.cpp
(c) 2010 - Assaf Gordon, CSHL
- Aaron Quinlan, UVA
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include <cassert>
#include <cstring>
#include <cstdlib>
#include <iostream>
#include <algorithm>
#include "bedFile.h"
#include "multiIntersectBed.h"
using namespace std;
MultiIntersectBed::MultiIntersectBed(std::ostream& _output,
const vector<string>& _filenames,
const vector<string>& _titles,
bool _print_empty_regions,
const std::string& _genome_size_filename,
const std::string& _no_coverage_value ) :
filenames(_filenames),
titles(_titles),
output(_output),
current_non_zero_inputs(0),
print_empty_regions(_print_empty_regions),
genome_sizes(NULL),
no_coverage_value(_no_coverage_value)
{
if (print_empty_regions) {
assert(!_genome_size_filename.empty());
genome_sizes = new GenomeFile(_genome_size_filename);
}
}
MultiIntersectBed::~MultiIntersectBed() {
CloseFiles();
if (genome_sizes) {
delete genome_sizes;
genome_sizes = NULL ;
}
}
void MultiIntersectBed::MultiIntersect() {
OpenFiles();
// Add the first interval from each file
for(size_t i = 0;i < input_files.size(); ++i)
LoadNextItem(i);
// Chromosome loop - once per chromosome
do {
// Find the first chromosome to use
current_chrom = DetermineNextChrom();
// Populate the queue with initial values from all files
// (if they belong to the correct chromosome)
for(size_t i = 0; i < input_files.size(); ++i)
AddInterval(i);
CHRPOS current_start = ConsumeNextCoordinate();
// User wanted empty regions, and the first coordinate is not 0 - print a dummy empty coverage
if (print_empty_regions && current_start > 0)
PrintEmptyCoverage(0,current_start);
// Intervals loop - until all intervals (of current chromosome) from all files are used.
do {
CHRPOS current_end = queue.top().coord;
PrintCoverage(current_start, current_end);
current_start = ConsumeNextCoordinate();
} while (!queue.empty());
// User wanted empty regions, and the last coordinate is not the last coordinate of the chromosome
// print a dummy empty coverage
if (print_empty_regions) {
CHRPOS chrom_size = genome_sizes->getChromSize(current_chrom);
if (current_start < chrom_size)
PrintEmptyCoverage(current_start, chrom_size);
}
} while (!AllFilesDone());
}
CHRPOS MultiIntersectBed::ConsumeNextCoordinate() {
assert(!queue.empty());
CHRPOS new_position = queue.top().coord;
do {
IntervalItem item = queue.top();
UpdateInformation(item);
queue.pop();
} while (!queue.empty() && queue.top().coord == new_position);
return new_position;
}
void MultiIntersectBed::UpdateInformation(const IntervalItem &item) {
// Update the depth coverage for this file
// Which coordinate is it - start or end?
switch (item.coord_type)
{
case START:
current_depth[item.source_index] = 1;
current_non_zero_inputs++;
break;
case END:
//Read the next interval from this file
AddInterval(item.source_index);
current_depth[item.source_index] = 0;
current_non_zero_inputs--;
break;
default:
assert(0);
}
}
void MultiIntersectBed::AddInterval(int index) {
assert(static_cast<unsigned int>(index) < input_files.size());
//This file has no more intervals
if (current_item[index].chrom.empty())
return;
//If the next interval belongs to a different chrom, don't add it
if (current_item[index].chrom!=current_chrom)
return;
const BED &bed(current_item[index]);
IntervalItem start_item(index, START, bed.start);
IntervalItem end_item(index, END, bed.end);
queue.push(start_item);
queue.push(end_item);
LoadNextItem(index);
}
void MultiIntersectBed::PrintHeader() {
output << "chrom\tstart\tend" ;
for (size_t i=0;i<titles.size();++i)
output << "\t" <<titles[i];
output << endl;
}
void MultiIntersectBed::PrintCoverage(CHRPOS start, CHRPOS end) {
if ( current_non_zero_inputs == 0 && ! print_empty_regions )
return ;
output << current_chrom << "\t"
<< start << "\t"
<< end;
for (size_t i=0;i<current_depth.size();++i)
output << "\t" << current_depth[i] ;
output << endl;
}
void MultiIntersectBed::PrintEmptyCoverage(CHRPOS start, CHRPOS end) {
output << current_chrom << "\t"
<< start << "\t"
<< end;
for (size_t i=0;i<current_depth.size();++i)
output << "\t0";
output << endl;
}
void MultiIntersectBed::LoadNextItem(int index) {
assert(static_cast<unsigned int>(index) < input_files.size());
current_item[index].chrom="";
BedFile *file = input_files[index];
BED merged_bed;
int lineNum = 0;
//
// TO DO: Do the mergeing on the fly. How best to do this?
//
// IDEA: Implement a Merge class with GetNextMerge element.
//
while (file->GetNextMergedBed(merged_bed, lineNum))
{
current_item[index] = merged_bed;
break;
}
}
bool MultiIntersectBed::AllFilesDone() {
for (size_t i=0;i<current_item.size();++i)
if (!current_item[i].chrom.empty())
return false;
return true;
}
string MultiIntersectBed::DetermineNextChrom() {
string next_chrom;
for (size_t i=0;i<current_item.size();++i) {
if (current_item[i].chrom.empty())
continue;
if (next_chrom.empty())
next_chrom = current_item[i].chrom;
else
if (current_item[i].chrom < next_chrom)
next_chrom = current_item[i].chrom ;
}
return next_chrom;
}
void MultiIntersectBed::OpenFiles() {
for (size_t i = 0; i < filenames.size(); ++i) {
BedFile *file = new BedFile(filenames[i]);
file->Open();
input_files.push_back(file);
current_depth.push_back(0);
}
current_item.resize(filenames.size());
}
void MultiIntersectBed::CloseFiles() {
for (size_t i=0; i < input_files.size(); ++i) {
BedFile *file = input_files[i];
delete file;
input_files[i] = NULL ;
}
input_files.clear();
}
/*****************************************************************************
multiIntersectBed.h
(c) 2010 - Aaron Quinlan, UVA
- Assaf Gordon, CSHL
Quinlan Laboratory
Department of Public Health Sciences
Center for Public Health Genomics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#ifndef MULTIINTERSECTBED_H
#define MULTIINTERSECTBED_H
#include <vector>
#include <string>
#include "bedFile.h"
#include "genomeFile.h"
#include "intervalItem.h"
class MultiIntersectBed
{
private:
vector<string> filenames;
vector<string> titles;
vector<BedFile*> input_files;
vector<int> current_depth;
vector<BED> current_item;
std::ostream &output;
INTERVALS_PRIORITY_QUEUE queue;
std::string current_chrom;
int current_non_zero_inputs;
bool print_empty_regions;
GenomeFile* genome_sizes;
std::string no_coverage_value;
public:
MultiIntersectBed(std::ostream& _output,
const vector<string>& _filenames,
const vector<string>& _titles,
bool _print_empty_regions,
const std::string& _genomeFileName,
const std::string& _no_coverage_value);
virtual ~MultiIntersectBed();
// Combines all interval files
void MultiIntersect();
// Print the header line: chrom/start/end + name of each bedgraph file.
void PrintHeader();
private:
// Open all input files, initialize "current_XXX" vectors
void OpenFiles();
// Close the input files.
void CloseFiles();
/*
Add an interval from BedGraph file 'index' into the queue.
will only be added if it belongs to the current chromosome.
If the interval was added (=consumed), the next interval will be read from the file
using 'LoadNextItem'
*/
void AddInterval(int index);
/*
Loads the next interval from Bed file 'index'.
Stores it in 'current_bed_item' vector.
*/
void LoadNextItem(int index);
/*
Scans the 'current_bedgraph_item' vector,
find the 'first' chromosome to use (different BedGraph files can start with different chromosomes).
*/
std::string DetermineNextChrom();
/*
Returns 'true' if ALL intervals from ALL BedGraph files were used
*/
bool AllFilesDone();
/*
Extract the next coordinate from the queue, and updates the current coverage information.
If multiple interval share the same coordinate values, all of them are handled.
If an END coordinate is consumed, the next interval (from the corresponding file) is read.
*/
CHRPOS ConsumeNextCoordinate();
/*
Updates the coverage information based on the given item.
Item can be a START coordinate or an END coordiante.
*/
void UpdateInformation(const IntervalItem &item);
/*
prints chrom/start/end and the current depth coverage values of all the files.
*/
void PrintCoverage(CHRPOS start, CHRPOS end);
/*
prints chrom/start/end and the ZERO depth coverage values of all the files.
*/
void PrintEmptyCoverage(CHRPOS start, CHRPOS end);
void DebugPrintQueue();
};
#endif
/*****************************************************************************
unionBedGraphsMain.cpp
(c) 2010 - Assaf Gordon, CSHL
- Aaron Quinlan, UVA
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include <climits>
#include <cstring>
#include <cstdlib>
#include <vector>
#include <string>
#include <iostream>
#include <getopt.h>
#include <libgen.h> //for basename()
#include "version.h"
#include "genomeFile.h"
#include "MultiIntersectBed.h"
using namespace std;
// define our program name
#define PROGRAM_NAME "multiIntersectBed"
// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
//STLized version of basename()
// (because POSIX basename() modifies the input string pointer)
// Additionally: removes any extension the basename might have.
std::string stl_basename(const std::string& path);
// function declarations
void ShowHelp(void);
void ShowExamples(void);
int main(int argc, char* argv[])
{
bool haveFiles = false;
bool haveTitles = false;
bool haveGenome = false;
bool haveFiller = true;
bool printHeader = false;
bool printEmptyRegions = false;
bool showHelp = false;
string genomeFile;
string basePath;
string noCoverageValue = "0";
vector<string> inputFiles;
vector<string> inputTitles;
//Parse command line options
if(argc <= 1)
ShowHelp();
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
(PARAMETER_CHECK("--help", 5, parameterLength))) {
showHelp = true;
}
}
if(showHelp == true) {
ShowHelp();
exit(1);
}
// do some parsing (all of these parameters require 2 strings)
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if(PARAMETER_CHECK("-i", 2, parameterLength)) {
if ((i+1) < argc) {
haveFiles = true;
i = i+1;
string file = argv[i];
while (file[0] != '-' && i < argc) {
inputFiles.push_back(file);
i++;
if (i < argc)
file = argv[i];
}
i--;
}
}
else if(PARAMETER_CHECK("-names", 6, parameterLength)) {
if ((i+1) < argc) {
haveTitles = true;
i = i+1;
string title = argv[i];
while (title[0] != '-' && i < argc) {
inputTitles.push_back(title);
i++;
if (i < argc)
title = argv[i];
}
i--;
}
}
else if(PARAMETER_CHECK("-g", 2, parameterLength)) {
if ((i+1) < argc) {
haveGenome = true;
genomeFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-filler", 7, parameterLength)) {
if ((i+1) < argc) {
haveFiller = true;
noCoverageValue = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-header", 7, parameterLength)) {
printHeader = true;
}
else if(PARAMETER_CHECK("-empty", 6, parameterLength)) {
printEmptyRegions = true;
}
else if(PARAMETER_CHECK("-examples", 9, parameterLength)) {
ShowHelp();
ShowExamples();
exit(1);
}
}
//Sanity checks
if (inputFiles.empty() == true) {
cerr << "Error: missing BedGraph file names (-i) to combine." << endl;
exit(1);
}
if (inputFiles.size() == 1) {
cerr << "Error: Only a single BedGraph file was specified. Nothing to combine, exiting." << endl;
exit(1);
}
if (printEmptyRegions && (genomeFile.empty() == true)) {
cerr << "Error: when using -empty, the genome sizes file (-g) must be specified using '-g FILE'." << endl;
exit(1);
}
if ((haveTitles == true) && (inputFiles.size() != inputTitles.size())) {
cerr << "Error: The number of file titles (-names) does not match the number of files (-i)." << endl;
exit(1);
}
MultiIntersectBed mbi(cout, inputFiles, inputTitles, printEmptyRegions, genomeFile, noCoverageValue);
if (printHeader)
mbi.PrintHeader();
mbi.MultiIntersect();
}
void ShowHelp(void) {
cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
cerr << "Authors: Assaf Gordon, CSHL" << endl;
cerr << " Aaron Quinlan (aaronquinlan@gmail.com)" << endl << endl;
cerr << "Summary: Combines multiple BedGraph files into a single file," << endl;
cerr << "\t allowing coverage comparisons between them." << endl << endl;
cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i FILE1 FILE2 .. FILEn" << endl;
cerr << "\t Assumes that each BedGraph file is sorted by chrom/start " << endl;
cerr << "\t and that the intervals in each are non-overlapping." << endl << endl;
cerr << "Options: " << endl;
cerr << "\t-header\t\t" << "Print a header line." << endl;
cerr << "\t\t\t(chrom/start/end + names of each file)." << endl << endl;
cerr << "\t-names\t\t" << "A list of names (one / file) to describe each file in -i." << endl;
cerr << "\t\t\tThese names will be printed in the header line." << endl << endl;
cerr << "\t-g\t\t" << "Use genome file to calculate empty regions." << endl;
cerr << "\t\t\t- STRING." << endl << endl;
cerr << "\t-empty\t\t" << "Report empty regions (i.e., start/end intervals w/o" << endl;
cerr << "\t\t\tvalues in all files)." << endl;
cerr << "\t\t\t- Requires the '-g FILE' parameter.\n" << endl;
cerr << "\t-filler TEXT\t" << "Use TEXT when representing intervals having no value." << endl;
cerr << "\t\t\t- Default is '0', but you can use 'N/A' or any other text." << endl << endl;
cerr << "\t-examples\t" << "Show detailed usage examples." << endl << endl;
}
void ShowExamples()
{
cerr << "Example usage:\n\n" \
"== Input files: ==\n" \
"\n" \
" $ cat 1.bg\n" \
" chr1 1000 1500 10\n" \
" chr1 2000 2100 20\n" \
"\n" \
" $ cat 2.bg\n" \
" chr1 900 1600 60\n" \
" chr1 1700 2050 50\n" \
"\n" \
" $ cat 3.bg\n" \
" chr1 1980 2070 80\n" \
" chr1 2090 2100 20\n" \
"\n" \
" $ cat sizes.txt\n" \
" chr1 5000\n" \
"\n" \
"== Union/combine the files: ==\n" \
"\n" \
" $ unionBedGraphs -i 1.bg 2.bg 3.bg\n" \
" chr1 900 1000 0 60 0\n" \
" chr1 1000 1500 10 60 0\n" \
" chr1 1500 1600 0 60 0\n" \
" chr1 1700 1980 0 50 0\n" \
" chr1 1980 2000 0 50 80\n" \
" chr1 2000 2050 20 50 80\n" \
" chr1 2050 2070 20 0 80\n" \
" chr1 2070 2090 20 0 0\n" \
" chr1 2090 2100 20 0 20\n" \
"\n" \
"== Union/combine the files, with a header line (titles are the file names): ==\n" \
"\n" \
" $ unionBedGraphs -header -i 1.bg 2.bg 3.bg\n" \
" chrom start end 1 2 3\n" \
" chr1 900 1000 0 60 0\n" \
" chr1 1000 1500 10 60 0\n" \
" chr1 1500 1600 0 60 0\n" \
" chr1 1700 1980 0 50 0\n" \
" chr1 1980 2000 0 50 80\n" \
" chr1 2000 2050 20 50 80\n" \
" chr1 2050 2070 20 0 80\n" \
" chr1 2070 2090 20 0 0\n" \
" chr1 2090 2100 20 0 20\n" \
"\n" \
"== Union/combine the files, with a header line and custom names: ==\n" \
"\n" \
" $ unionBedGraphs -header -i 1.bg 2.bg 3.bg -names WT-1 WT-2 KO-1\n" \
" chrom start end WT-1 WT-2 KO-1\n" \
" chr1 900 1000 0 60 0\n" \
" chr1 1000 1500 10 60 0\n" \
" chr1 1500 1600 0 60 0\n" \
" chr1 1700 1980 0 50 0\n" \
" chr1 1980 2000 0 50 80\n" \
" chr1 2000 2050 20 50 80\n" \
" chr1 2050 2070 20 0 80\n" \
" chr1 2070 2090 20 0 0\n" \
" chr1 2090 2100 20 0 20\n" \
"\n" \
"== Union/combine, showing empty regions (note, requires -g): ==\n" \
"\n" \
" $ unionBedGraphs -header -empty -g sizes.TXT -i 1.bg 2.bg 3.bg\n" \
" chrom start end 1 2 3\n" \
" chr1 0 900 0 0 0\n" \
" chr1 900 1000 0 60 0\n" \
" chr1 1000 1500 10 60 0\n" \
" chr1 1500 1600 0 60 0\n" \
" chr1 1600 1700 0 0 0\n" \
" chr1 1700 1980 0 50 0\n" \
" chr1 1980 2000 0 50 80\n" \
" chr1 2000 2050 20 50 80\n" \
" chr1 2050 2070 20 0 80\n" \
" chr1 2070 2090 20 0 0\n" \
" chr1 2090 2100 20 0 20\n" \
" chr1 2100 5000 0 0 0\n" \
"\n" \
;
}
std::string stl_basename(const std::string& path)
{
string result;
char* path_dup = strdup(path.c_str());
char* basename_part = basename(path_dup);
result = basename_part;
free(path_dup);
size_t pos = result.find_last_of('.');
if (pos != string::npos )
result = result.substr(0,pos);
return result;
}
......@@ -119,7 +119,10 @@ BedFile::BedFile(string &bedFile)
: bedFile(bedFile),
_isGff(false),
_isVcf(false),
_typeIsKnown(false)
_typeIsKnown(false),
_merged_start(-1),
_merged_end(-1),
_merged_chrom("")
{}
// Destructor
......@@ -193,6 +196,53 @@ BedLineStatus BedFile::GetNextBed(BED &bed, int &lineNum) {
}
bool BedFile::GetNextMergedBed(BED &merged_bed, int &lineNum) {
if (_bedStream->good()) {
BED bed;
BedLineStatus bedStatus;
while ((bedStatus = GetNextBed(bed, lineNum)) != BED_INVALID) {
if (bedStatus == BED_VALID) {
if (((int) bed.start - _merged_end > 0) ||
(_merged_end < 0) ||
(bed.chrom != _merged_chrom))
{
if (_merged_start >= 0) {
merged_bed.chrom = _merged_chrom;
merged_bed.start = _merged_start;
merged_bed.end = _merged_end;
_merged_chrom = bed.chrom;
_merged_start = bed.start;
_merged_end = bed.end;
return true;
}
else {
_merged_start = bed.start;
_merged_chrom = bed.chrom;
_merged_end = bed.end;
}
}
else if ((int) bed.end > _merged_end)
{
_merged_end = bed.end;
}
}
}
// handle the last merged block in the file.
if (bedStatus == BED_INVALID)
{
merged_bed.chrom = _merged_chrom;
merged_bed.start = _merged_start;
merged_bed.end = _merged_end;
return true;
}
}
return false;
}
void BedFile::FindOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end,
string strand, vector<BED> &hits, bool sameStrand, bool diffStrand) {
......
......@@ -421,6 +421,10 @@ public:
// Get the next BED entry in an opened BED file.
BedLineStatus GetNextBed (BED &bed, int &lineNum);
// Returns the next MERGED (i.e., non-overlapping) interval in an opened BED file
// NOTE: assumes input file is sorted by chrom then start
bool GetNextMergedBed(BED &merged_bed, int &lineNum);
// load a BED file into a map keyed by chrom, then bin. value is vector of BEDs
void loadBedFileIntoMap();
......@@ -487,6 +491,9 @@ private:
istream *_bedStream;
string _bedLine;
vector<string> _bedFields;
int _merged_start;
int _merged_end;
string _merged_chrom;
void setZeroBased(bool zeroBased);
void setGff (bool isGff);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment