Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
bedtools2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
External wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
R3
legacy
bedtools2
Commits
444d5bce
Commit
444d5bce
authored
13 years ago
by
Aaron
Browse files
Options
Downloads
Patches
Plain Diff
Added expand tool.
parent
64190f57
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
Makefile
+1
-0
1 addition, 0 deletions
Makefile
src/bedtools.cpp
+3
-0
3 additions, 0 deletions
src/bedtools.cpp
src/expand/Makefile
+36
-0
36 additions, 0 deletions
src/expand/Makefile
src/expand/expand.cpp
+265
-0
265 additions, 0 deletions
src/expand/expand.cpp
with
305 additions
and
0 deletions
Makefile
+
1
−
0
View file @
444d5bce
...
...
@@ -29,6 +29,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(
SRC_DIR
)
/clusterBed
\
$(
SRC_DIR
)
/complementBed
\
$(
SRC_DIR
)
/coverageBed
\
$(
SRC_DIR
)
/expand
\
$(
SRC_DIR
)
/fastaFromBed
\
$(
SRC_DIR
)
/flankBed
\
$(
SRC_DIR
)
/genomeCoverageBed
\
...
...
This diff is collapsed.
Click to expand it.
src/bedtools.cpp
+
3
−
0
View file @
444d5bce
...
...
@@ -44,6 +44,7 @@ int closest_main(int argc, char* argv[]); //
int
cluster_main
(
int
argc
,
char
*
argv
[]);
//
int
complement_main
(
int
argc
,
char
*
argv
[]);
//
int
coverage_main
(
int
argc
,
char
*
argv
[]);
//
int
expand_main
(
int
argc
,
char
*
argv
[]);
//
int
fastafrombed_main
(
int
argc
,
char
*
argv
[]);
//
int
flank_main
(
int
argc
,
char
*
argv
[]);
//
int
genomecoverage_main
(
int
argc
,
char
*
argv
[]);
//
...
...
@@ -126,6 +127,7 @@ int main(int argc, char *argv[])
else
if
(
sub_cmd
==
"links"
)
return
links_main
(
argc
-
1
,
argv
+
1
);
else
if
(
sub_cmd
==
"makewindows"
)
return
windowmaker_main
(
argc
-
1
,
argv
+
1
);
else
if
(
sub_cmd
==
"groupby"
)
return
groupby_main
(
argc
-
1
,
argv
+
1
);
else
if
(
sub_cmd
==
"expand"
)
return
expand_main
(
argc
-
1
,
argv
+
1
);
// help
else
if
(
sub_cmd
==
"-h"
||
sub_cmd
==
"--help"
||
...
...
@@ -225,6 +227,7 @@ int bedtools_help(void)
cout
<<
" links "
<<
"Create a HTML page of links to UCSC locations.
\n
"
;
cout
<<
" makewindows "
<<
"Make interval
\"
windows
\"
across a genome.
\n
"
;
cout
<<
" groupby "
<<
"Group by common cols. & summarize oth. cols. (~ SQL
\"
groupBy
\"
)
\n
"
;
cout
<<
" expand "
<<
"Replicate lines based on lists of values in columns.
\n
"
;
cout
<<
endl
;
cout
<<
"[ General help ]"
<<
endl
;
...
...
This diff is collapsed.
Click to expand it.
src/expand/Makefile
0 → 100644
+
36
−
0
View file @
444d5bce
UTILITIES_DIR
=
../utils/
OBJ_DIR
=
../../obj/
BIN_DIR
=
../../bin/
# -------------------
# define our includes
# -------------------
INCLUDES
=
-I
$(
UTILITIES_DIR
)
/tabFile/
\
-I
$(
UTILITIES_DIR
)
/lineFileUtilities/
\
-I
$(
UTILITIES_DIR
)
/gzstream/
\
-I
$(
UTILITIES_DIR
)
/fileType/
\
-I
$(
UTILITIES_DIR
)
/VectorOps/
\
-I
$(
UTILITIES_DIR
)
/version/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES
=
expand.cpp
OBJECTS
=
expand.o
BUILT_OBJECTS
=
$(
patsubst %,
$(
OBJ_DIR
)
/%,
$(
OBJECTS
))
all
:
$(BUILT_OBJECTS)
.PHONY
:
all
$(BUILT_OBJECTS)
:
$(SOURCES)
@
echo
" * compiling"
$(
*
F
)
.cpp
@$(
CXX
)
-c
-o
$@
$(
*
F
)
.cpp
$(
LDFLAGS
)
$(
CXXFLAGS
)
$(
INCLUDES
)
clean
:
@
echo
"Cleaning up."
@
rm
-f
$(
OBJ_DIR
)
/expand.o
.PHONY
:
clean
This diff is collapsed.
Click to expand it.
src/expand/expand.cpp
0 → 100644
+
265
−
0
View file @
444d5bce
/*****************************************************************************
expand.cpp
(c) 2009, 2010, 2011 - Aaron Quinlan
Center for Public Health Genomics
University of Virginia
aaronquinlan@gmail.com
Licenced under the MIT license.
******************************************************************************/
#include
<vector>
#include
<map>
#include
<numeric>
#include
<algorithm>
#include
<iterator>
#include
<iostream>
#include
<iomanip>
#include
<fstream>
#include
<sstream>
#include
<stdlib.h>
#include
<math.h>
#include
<limits.h>
#include
<string.h>
#include
<exception>
#include
<stdexcept>
// out_of_range exception
#include
"version.h"
#include
"lineFileUtilities.h"
#include
"tabFile.h"
#include
"VectorOps.h"
using
namespace
std
;
// define our program name
#define PROGRAM_NAME "bedtools expand"
// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) ((strncmp(argv[i], param, min(actualLen, paramLen))== 0) && \
(actualLen == paramLen))
#define LOOKS_LIKE_A_PARAM(string) (strlen(string)>0 && string[0]=='-')
// function declarations
void
expand_help
(
void
);
void
Expand
(
const
string
&
inFile
,
const
vector
<
int
>
&
expColumns
);
int
expand_main
(
int
argc
,
char
*
argv
[])
{
// input files
string
inFile
=
"stdin"
;
string
groupColumnsString
=
"1,2,3"
;
string
expColumnString
;
// our configuration variables
bool
showHelp
=
false
;
bool
haveExpColumns
=
false
;
// check to see if we should print out some help
if
(
argc
<=
1
)
showHelp
=
true
;
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
int
parameterLength
=
(
int
)
strlen
(
argv
[
i
]);
if
((
PARAMETER_CHECK
(
"-h"
,
2
,
parameterLength
))
||
(
PARAMETER_CHECK
(
"--help"
,
5
,
parameterLength
)))
{
showHelp
=
true
;
}
}
if
(
showHelp
)
expand_help
();
// do some parsing (all of these parameters require 2 strings)
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
int
parameterLength
=
(
int
)
strlen
(
argv
[
i
]);
if
(
PARAMETER_CHECK
(
"-i"
,
2
,
parameterLength
))
{
if
((
i
+
1
)
>=
argc
||
LOOKS_LIKE_A_PARAM
(
argv
[
i
+
1
]))
{
cerr
<<
endl
<<
"*****ERROR: -i parameter requires a value."
<<
endl
<<
endl
;
expand_help
();
break
;
}
else
{
inFile
=
argv
[
i
+
1
];
i
++
;
}
}
else
if
(
PARAMETER_CHECK
(
"-expcols"
,
7
,
parameterLength
)
||
PARAMETER_CHECK
(
"-c"
,
2
,
parameterLength
))
{
if
((
i
+
1
)
>=
argc
||
LOOKS_LIKE_A_PARAM
(
argv
[
i
+
1
]))
{
cerr
<<
endl
<<
"*****ERROR: -opCols parameter requires a value."
<<
endl
<<
endl
;
expand_help
();
break
;
}
else
{
haveExpColumns
=
true
;
expColumnString
=
argv
[
i
+
1
];
i
++
;
}
}
else
{
cerr
<<
endl
<<
"*****ERROR: Unrecognized parameter: "
<<
argv
[
i
]
<<
" *****"
<<
endl
<<
endl
;
showHelp
=
true
;
}
}
if
(
!
haveExpColumns
)
{
cerr
<<
endl
<<
"*****"
<<
endl
<<
"*****ERROR: Need -opCols."
<<
endl
<<
"*****"
<<
endl
;
showHelp
=
true
;
}
if
(
!
showHelp
)
{
vector
<
int
>
expColumns
;
Tokenize
(
expColumnString
,
expColumns
,
','
);
// sanity check the exp columns
for
(
size_t
i
=
0
;
i
<
expColumns
.
size
();
++
i
)
{
int
expCol
=
expColumns
[
i
];
if
(
expCol
<
1
)
{
cerr
<<
endl
<<
"*****"
<<
endl
<<
"*****ERROR: expansion columns must be >=1. "
<<
endl
<<
"*****"
<<
endl
;
expand_help
();
}
}
Expand
(
inFile
,
expColumns
);
}
else
{
expand_help
();
}
return
0
;
}
void
expand_help
(
void
)
{
cerr
<<
"
\n
Tool: bedtools expand "
<<
endl
;
cerr
<<
"Version: "
<<
VERSION
<<
"
\n
"
;
cerr
<<
"Summary: Replicate lines in a file based on columns of comma-separated values."
<<
endl
<<
endl
;
cerr
<<
"Usage:
\t
"
<<
PROGRAM_NAME
<<
" -c [COLS] "
<<
endl
;
cerr
<<
"Options: "
<<
endl
;
cerr
<<
"
\t
-i
\t
"
<<
"Input file. Assumes
\"
stdin
\"
if omitted."
<<
endl
<<
endl
;
cerr
<<
"
\t
-c
\t
"
<<
"Specify the column (1-based) that should be summarized."
<<
endl
;
cerr
<<
"
\t\t
- Required."
<<
endl
;
cerr
<<
"Examples: "
<<
endl
;
cerr
<<
" $ cat test.txt"
<<
endl
;
cerr
<<
" chr1 10 20 1,2,3 10,20,30"
<<
endl
;
cerr
<<
" chr1 40 50 4,5,6 40,50,60"
<<
endl
<<
endl
;
cerr
<<
" $ bedtools expand test.txt -c 5"
<<
endl
;
cerr
<<
" chr1 10 20 1,2,3 10"
<<
endl
;
cerr
<<
" chr1 10 20 1,2,3 20"
<<
endl
;
cerr
<<
" chr1 10 20 1,2,3 30"
<<
endl
;
cerr
<<
" chr1 40 50 4,5,6 40"
<<
endl
;
cerr
<<
" chr1 40 50 4,5,6 50"
<<
endl
;
cerr
<<
" chr1 40 50 4,5,6 60"
<<
endl
<<
endl
;
cerr
<<
" $ bedtools expand test.txt -c 4,5"
<<
endl
;
cerr
<<
" chr1 10 20 1 10"
<<
endl
;
cerr
<<
" chr1 10 20 2 20"
<<
endl
;
cerr
<<
" chr1 10 20 3 30"
<<
endl
;
cerr
<<
" chr1 40 50 4 40"
<<
endl
;
cerr
<<
" chr1 40 50 5 50"
<<
endl
;
cerr
<<
" chr1 40 50 6 60"
<<
endl
;
// end the program here
exit
(
1
);
}
void
Expand
(
const
string
&
inFile
,
const
vector
<
int
>
&
expColumns
)
{
// current line number
int
lineNum
=
0
;
// string representing current line
string
inLine
;
// vector of strings holding the tokenized current line
vector
<
string
>
inFields
;
inFields
.
reserve
(
20
);
// build a map of the columns to be expanded
// to allow quic lookups to test if a column is
// "normal" or whether it is one of the columns
// that is being expaded
map
<
int
,
bool
>
expColMap
;
for
(
size_t
c
=
0
;
c
<
expColumns
.
size
();
c
++
)
expColMap
[
expColumns
[
c
]]
=
true
;
// open a new tab file, loop through it line by line
// and expand each line into multiple lines according to the
// columns the user has requested.
//
TabLineStatus
tabLineStatus
;
TabFile
*
_tab
=
new
TabFile
(
inFile
);
_tab
->
Open
();
while
((
tabLineStatus
=
_tab
->
GetNextTabLine
(
inFields
,
lineNum
))
!=
TAB_INVALID
)
{
lineNum
++
;
if
(
tabLineStatus
==
TAB_VALID
)
{
// a list containing the expanded values (inner) for each column (outer)
vector
<
vector
<
string
>
>
expandedCols
;
// expand each requested column into a vector
int
prev_size
=
-
1
;
for
(
size_t
c
=
0
;
c
<
expColumns
.
size
();
c
++
)
{
vector
<
string
>
expansion
;
if
((
expColumns
[
c
]
-
1
)
>=
(
int
)
inFields
.
size
())
{
cerr
<<
endl
<<
"*****"
<<
endl
<<
"*****ERROR: Requested column number exceeds number of columns."
<<
endl
<<
"***** This was violated at line: "
<<
lineNum
<<
endl
<<
"*****"
<<
endl
;
exit
(
1
);
}
// expand the requested column into a vector
Tokenize
(
inFields
[
expColumns
[
c
]
-
1
],
expansion
,
','
);
if
((
int
)
expansion
.
size
()
!=
prev_size
&&
prev_size
>=
0
)
{
cerr
<<
endl
<<
"*****"
<<
endl
<<
"*****ERROR: Each expanded column must have the same number of elements."
<<
endl
<<
"***** This was violated at line: "
<<
lineNum
<<
endl
<<
"*****"
<<
endl
;
exit
(
1
);
}
else
{
expandedCols
.
push_back
(
expansion
);
}
prev_size
=
expansion
.
size
();
}
// now replicate/expand the original line based on the
// values in the requested columns
size_t
totalCols
=
inFields
.
size
();
for
(
size_t
n
=
0
;
n
<
expandedCols
[
0
].
size
();
n
++
)
{
int
numExpColsSeen
=
0
;
for
(
size_t
c
=
0
;
c
<
totalCols
;
c
++
)
{
// normal column, print as-is
if
(
!
expColMap
[
c
+
1
])
{
printf
(
"%s"
,
inFields
[
c
].
c_str
());
}
// expanded column, grab relevant value from expanded vector.
else
{
printf
(
"%s"
,
expandedCols
[
numExpColsSeen
][
n
].
c_str
());
numExpColsSeen
++
;
}
// add a tab if not the very last value
if
(
c
<
totalCols
-
1
)
printf
(
"
\t
"
);
}
printf
(
"
\n
"
);
}
}
inFields
.
clear
();
}
_tab
->
Close
();
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment