Skip to content
Snippets Groups Projects
Commit 0783e741 authored by Aaron Quinlan's avatar Aaron Quinlan
Browse files

Merge pull request #25 from brentp/groupby-columns

Groupby columns specified like cut (but retaining order)
parents 56af1979 2e766670
No related branches found
No related tags found
No related merge requests found
......@@ -16,7 +16,7 @@ INCLUDES = -I$(UTILITIES_DIR)/tabFile/ \
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= groupBy.cpp
SOURCES= groupBy.cpp $(UTILITIES_DIR)/lineFileUtilities/lineFileUtilities.h
OBJECTS= groupBy.o
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
......
......@@ -172,10 +172,10 @@ int groupby_main(int argc, char* argv[]) {
// Split the column string sent by the user into discrete column numbers
// A comma separated string is expected.
vector<int> groupColumnsInt;
Tokenize(groupColumnsString, groupColumnsInt, ',');
TokenizeColumns(groupColumnsString, groupColumnsInt);
vector<int> opColumnsInt;
Tokenize(opsColumnString, opColumnsInt, ',');
TokenizeColumns(opsColumnString, opColumnsInt);
// sanity check the group columns
for(size_t i = 0; i < groupColumnsInt.size(); ++i) {
......
......@@ -46,5 +46,40 @@ void Tokenize(const string &str, vector<int> &elems, char delimiter = '\t')
}
}
// tokenize a column string into a list of integers.
inline
void TokenizeColumns(const string &str, vector<int> &elems)
{
// http://stackoverflow.com/questions/236129/how-to-split-a-string-in-c/236803#236803
// NOTE: this approach intentionally allows consecutive delimiters
vector<string> col_sets;
Tokenize(str, col_sets, ',');
for( size_t i = 0; i < col_sets.size(); i++ ) {
string col_set = col_sets[i];
if( string::npos == col_set.find("-") ){
elems.push_back(atoi(col_set.c_str()));
}
else {
vector<string> ends;
Tokenize(col_set, ends, '-');
int start = atoi(ends[0].c_str());
int end = atoi(ends[1].c_str());
if(start <= end){
for(int i = start; i <= end; i++){
elems.push_back(i);
}
}
else {
for(int i = start; i >= end; i--){
elems.push_back(i);
}
}
}
}
}
#endif /* LINEFILEUTILITIES_H */
lines_a=$(../../bin/groupBy -g 3-1 -o collapse -c 4 -i ../map/values3.bed | wc -l)
lines_b=$(../../bin/groupBy -g 1-3 -o collapse -c 4 -i ../map/values3.bed | wc -l)
lines_c=$(../../bin/groupBy -g 1,2,3 -o collapse -c 4 -i ../map/values3.bed | wc -l)
lines_d=$(../../bin/groupBy -g 1-2,3 -o collapse -c 4 -i ../map/values3.bed | wc -l)
check(){
if [ "$1" != "$2" ]; then
"fail groupby"
fi
}
check $lines_a $lines_b
check $lines_a $lines_c
check $lines_a $lines_d
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment