Skip to content
Snippets Groups Projects
Commit 574b9349 authored by Aaron's avatar Aaron
Browse files

Added -C (case-insensitive) matching to nucBed.

parent dd037684
No related branches found
No related tags found
No related merge requests found
......@@ -14,7 +14,9 @@
NucBed::NucBed(string &dbFile, string &bedFile, bool printSeq,
bool hasPattern, const string &pattern, bool forceStrand) {
bool hasPattern, const string &pattern,
bool forceStrand, bool ignoreCase)
{
_dbFile = dbFile;
_bedFile = bedFile;
......@@ -22,7 +24,7 @@ NucBed::NucBed(string &dbFile, string &bedFile, bool printSeq,
_hasPattern = hasPattern;
_pattern = pattern;
_forceStrand = forceStrand;
_ignoreCase = ignoreCase;
_bed = new BedFile(_bedFile);
// Compute the DNA content in each BED/GFF/VCF interval
......@@ -42,7 +44,7 @@ void NucBed::ReportDnaProfile(const BED& bed, const string &sequence, int seqLen
getDnaContent(sequence,a,c,g,t,n,other);
if (_hasPattern)
userPatternCount = countPattern(sequence, _pattern);
userPatternCount = countPattern(sequence, _pattern, _ignoreCase);
// report the original interval
......
......@@ -31,7 +31,7 @@ public:
// constructor
NucBed(string &dbFile, string &bedFile, bool printSeq,
bool hasPattern, const string &pattern,
bool forceStrand);
bool forceStrand, bool ignoreCase);
// destructor
~NucBed(void);
......@@ -45,6 +45,7 @@ private:
bool _hasPattern;
string _pattern;
bool _forceStrand;
bool _ignoreCase;
// instance of a bed file class.
BedFile *_bed;
......
......@@ -40,6 +40,7 @@ int nuc_main(int argc, char* argv[]) {
bool printSeq = false;
bool hasPattern = false;
bool forceStrand = false;
bool ignoreCase = false;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
......@@ -80,6 +81,9 @@ int nuc_main(int argc, char* argv[]) {
else if(PARAMETER_CHECK("-s", 2, parameterLength)) {
forceStrand = true;
}
else if(PARAMETER_CHECK("-C", 2, parameterLength)) {
ignoreCase = true;
}
else if(PARAMETER_CHECK("-pattern", 8, parameterLength)) {
if ((i+1) < argc) {
hasPattern = true;
......@@ -99,7 +103,8 @@ int nuc_main(int argc, char* argv[]) {
if (!showHelp) {
NucBed *nuc = new NucBed(fastaDbFile, bedFile, printSeq, hasPattern, pattern, forceStrand);
NucBed *nuc = new NucBed(fastaDbFile, bedFile, printSeq,
hasPattern, pattern, forceStrand, ignoreCase);
delete nuc;
}
else {
......@@ -117,13 +122,19 @@ void nuc_help(void) {
cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -bed <bed/gff/vcf>" << endl << endl;
cerr << "Options: " << endl;
cerr << "\t-fi\tInput FASTA file" << endl << endl;
cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl << endl;
cerr << "\t-s\tProfile the sequence according to strand." << endl << endl;
cerr << "\t-seq\tPrint the extracted sequence" << endl << endl;
cerr << "\t-pattern\tReport the number of times a user-defined sequence" << endl;
cerr << "\t\t\tis observed (case-insensitive)." << endl << endl;
cerr << "\t\t\tis observed (case-sensitive)." << endl << endl;
cerr << "\t-C\tIgore case when matching -pattern. By defaulty, case matters." << endl << endl;
cerr << "Output format: " << endl;
cerr << "\tThe following information will be reported after each BED entry:" << endl;
......
......@@ -110,17 +110,24 @@ void getDnaContent(const string &seq, int &a, int &c, int &g, int &t, int &n, in
}
int countPattern(const string &seq, const string &pattern)
int countPattern(const string &seq, const string &pattern, bool ignoreCase)
{
// swap the bases
int patternLength = pattern.size();
string s = seq;
string p = pattern;
// standardize the seq and the pattern
// if case should be ignored
if (ignoreCase) {
toUpperCase(s);
toUpperCase(p);
}
int patternLength = p.size();
int patternCount = 0;
for(unsigned int i = 0; i < seq.length(); i++) {
if (seq.substr(i,patternLength) == pattern) {
for(unsigned int i = 0; i < s.length(); i++) {
if (s.substr(i,patternLength) == p) {
patternCount++;
}
}
return patternCount;
}
......@@ -22,6 +22,6 @@ void toUpperCase(string &seq);
// Calculates the number of a, c, g, t, n, and other bases found in a sequence
void getDnaContent(const string &seq, int &a, int &c, int &g, int &t, int &n, int &other);
int countPattern(const string &seq, const string &pattern);
int countPattern(const string &seq, const string &pattern, bool ignoreCase);
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment