diff --git a/Makefile b/Makefile index 735ceb9cdeb995a0cb812bd99de9445692fb5c24..167fc1e8035bfdb8c9ebbc15abfd9f1d7080fe7a 100644 --- a/Makefile +++ b/Makefile @@ -44,7 +44,8 @@ SUBDIRS = $(SRC_DIR)/annotateBed \ $(SRC_DIR)/subtractBed \ $(SRC_DIR)/tagBam \ $(SRC_DIR)/unionBedGraphs \ - $(SRC_DIR)/windowBed + $(SRC_DIR)/windowBed \ + $(SRC_DIR)/windowMaker UTIL_SUBDIRS = $(SRC_DIR)/utils/lineFileUtilities \ $(SRC_DIR)/utils/bedFile \ diff --git a/scripts/makeBashScripts.py b/scripts/makeBashScripts.py index 2345bbf50510884435a62b6337e3566e07017ddb..410f5ffb77c474772c0650c069560801fe80fe6f 100644 --- a/scripts/makeBashScripts.py +++ b/scripts/makeBashScripts.py @@ -37,7 +37,8 @@ def main(): 'subtract': 'subtractBed', 'tag': 'tagBam', 'unionbedg': 'unionBedGraphs', - 'window': 'windowBed'} + 'window': 'windowBed', + 'makewindows': 'windowMaker'} # create a BASH script for each old tool, mapping to the new CLI command. for tool in tool_map: diff --git a/src/bedtools.cpp b/src/bedtools.cpp index 05f849a81a293f56d623dec528741a40ba4db43a..0bcc5c2a2efe28e0d2da6bce7c6dd256c4888a08 100644 --- a/src/bedtools.cpp +++ b/src/bedtools.cpp @@ -63,6 +63,7 @@ int subtract_main(int argc, char* argv[]); // int tagbam_main(int argc, char* argv[]);// int unionbedgraphs_main(int argc, char* argv[]);// int window_main(int argc, char* argv[]); // +int windowmaker_main(int argc, char* argv[]); // int bedtools_help(void); int bedtools_faq(void); @@ -116,6 +117,7 @@ int main(int argc, char *argv[]) else if (sub_cmd == "overlap") return getoverlap_main(argc-1, argv+1); else if (sub_cmd == "igv") return bedtoigv_main(argc-1, argv+1); else if (sub_cmd == "links") return links_main(argc-1, argv+1); + else if (sub_cmd == "makewindows") return windowmaker_main(argc-1, argv+1); // help else if (sub_cmd == "-h" || sub_cmd == "--help" || @@ -134,14 +136,16 @@ int main(int argc, char *argv[]) // verison information else if (sub_cmd == "-contact" || sub_cmd == "--contact") { - cout << "For further help, please email the bedtools mailing list: " << endl; - cout << "bedtools-discuss@googlegroups.com" << endl << endl; + cout << endl; + cout << "- For further help, or to report a bug, please " << endl; + cout << " email the bedtools mailing list: " << endl; + cout << " bedtools-discuss@googlegroups.com" << endl << endl; - cout << "Stable releases of bedtools can be found at: " << endl; - cout << "http://bedtools.googlecode.com" << endl << endl; + cout << "- Stable releases of bedtools can be found at: " << endl; + cout << " http://bedtools.googlecode.com" << endl << endl; - cout << "The development repository can be found at: " << endl; - cout << "https://github.com/arq5x/bedtools" << endl << endl; + cout << "- The development repository can be found at: " << endl; + cout << " https://github.com/arq5x/bedtools" << endl << endl; } // unknown else { @@ -208,11 +212,12 @@ int bedtools_help(void) cout << " overlap " << "Computes the amount of overlap from two intervals.\n"; cout << " igv " << "Create an IGV snapshot batch script.\n"; cout << " links " << "Create a HTML page of links to UCSC locations.\n"; + cout << " makewindows " << "Make interval \"windows\" across a genome.\n"; cout << endl; cout << " -General help:\n"; cout << " --help " << "Print this help menu.\n"; - cout << " --faq " << "Frequently asked questions.\n"; + //cout << " --faq " << "Frequently asked questions.\n"; TODO cout << " --version " << "What version of bedtools are you using?.\n"; cout << " --contact " << "Feature requests, bugs, mailing lists, etc.\n"; diff --git a/src/windowMaker/Makefile b/src/windowMaker/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..ea27b299e58af92305f3964327436873556721ae --- /dev/null +++ b/src/windowMaker/Makefile @@ -0,0 +1,37 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/BamTools/include + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= windowMakerMain.cpp windowMaker.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=genomeFile.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= windowMaker + + +all: $(BUILT_OBJECTS) + +.PHONY: all + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean diff --git a/src/windowMaker/windowMaker.cpp b/src/windowMaker/windowMaker.cpp new file mode 100644 index 0000000000000000000000000000000000000000..31c9cba307a283e388ceab4cb8366b3452d130c3 --- /dev/null +++ b/src/windowMaker/windowMaker.cpp @@ -0,0 +1,46 @@ +/***************************************************************************** + windowMaker.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "windowMaker.h" + +WindowMaker::WindowMaker(string &genomeFile, uint32_t size, uint32_t step) + : _genomeFile(genomeFile) + , _size(size) + , _step(step) +{ + _genome = new GenomeFile(genomeFile); + MakeWindows(); +} + +WindowMaker::~WindowMaker(void) {} + + +void WindowMaker::MakeWindows() { + + // get a list of the chroms in the user's genome + vector<string> chromList = _genome->getChromList(); + + // process each chrom in the genome + for (size_t c = 0; c < chromList.size(); ++c) { + string chrom = chromList[c]; + uint32_t chrom_size = _genome->getChromSize(chrom); + + for (uint32_t start = 0; start <= chrom_size; start += _step) { + if ((start + _size) <= chrom_size) { + cout << chrom << "\t" << start << "\t" << start + _size << endl; + } + else if (start < chrom_size) { + cout << chrom << "\t" << start << "\t" << chrom_size << endl; + } + } + } +} + diff --git a/src/windowMaker/windowMaker.h b/src/windowMaker/windowMaker.h new file mode 100644 index 0000000000000000000000000000000000000000..29430cab8fc7121633be90d396cd8b5633d7162e --- /dev/null +++ b/src/windowMaker/windowMaker.h @@ -0,0 +1,37 @@ +/***************************************************************************** + windowMaker.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "genomeFile.h" + +using namespace std; + + +//************************************************ +// Class methods and elements +//************************************************ +class WindowMaker { + +public: + + // constructor + WindowMaker(string &genomeFile, uint32_t size, uint32_t step); + + // destructor + ~WindowMaker(void); + + void MakeWindows(); + +private: + string _genomeFile; + GenomeFile *_genome; + uint32_t _size; + uint32_t _step; +}; diff --git a/src/windowMaker/windowMakerMain.cpp b/src/windowMaker/windowMakerMain.cpp new file mode 100644 index 0000000000000000000000000000000000000000..19caf42bec3c6439e896ca663fc2a1f6b51c2f90 --- /dev/null +++ b/src/windowMaker/windowMakerMain.cpp @@ -0,0 +1,130 @@ +/***************************************************************************** + windowMakerMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "windowMaker.h" + +using namespace std; + +// define our program name +#define PROGRAM_NAME "bedtools makewindows" + + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void windowmaker_help(void); + +int windowmaker_main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string genomeFile; + + // parms + uint32_t size = 0; + uint32_t step = 0; + + bool haveGenome = false; + bool haveSize = false; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) windowmaker_help(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-g", 2, parameterLength)) { + if ((i+1) < argc) { + haveGenome = true; + genomeFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-w", 2, parameterLength)) { + if ((i+1) < argc) { + haveSize = true; + size = atoi(argv[i + 1]); + step = size; + i++; + } + } + else if(PARAMETER_CHECK("-s", 2, parameterLength)) { + if ((i+1) < argc) { + step = atoi(argv[i + 1]); + i++; + } + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + // make sure we have both input files + if (!haveGenome || !haveSize) { + cerr << endl << "*****" << endl << "*****ERROR: Need -g (genome file) and -w (window size). " << endl << "*****" << endl; + showHelp = true; + } + if (!showHelp) { + WindowMaker *wm = new WindowMaker(genomeFile, size, step); + delete wm; + } + else { + windowmaker_help(); + } + return 0; +} + +void windowmaker_help(void) { + + cerr << "\nTool: bedtools makewindows" << endl; + + cerr << "Summary: Makes adjacent and/or sliding windows across a genome." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -g <genome> -w <window_size>" << endl << endl; + + cerr << "Options: " << endl; + + cerr << "\t-s\t" << "Step size: i.e., how many base pairs to step before" << endl; + cerr << "\t\tcreating a new window. Used to create \"sliding\" windows." << endl; + cerr << "\t\t- Defaults to -w (non-sliding windows)." << endl << endl; + + cerr << "Notes: " << endl; + cerr << "\t(1) The genome file should tab delimited and structured as follows:" << endl; + cerr << "\t <chromName><TAB><chromSize>" << endl << endl; + cerr << "\tFor example, Human (hg19):" << endl; + cerr << "\tchr1\t249250621" << endl; + cerr << "\tchr2\t243199373" << endl; + cerr << "\t..." << endl; + cerr << "\tchr18_gl000207_random\t4262" << endl << endl; + + cerr << "Tips: " << endl; + cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl; + cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl; + cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\" << endl; + cerr << "\t\"select chrom, size from hg19.chromInfo\" > hg19.genome" << endl << endl; + + exit(1); + +}