Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
bedtools2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
External wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
R3
legacy
bedtools2
Commits
45306c09
Commit
45306c09
authored
14 years ago
by
Aaron
Browse files
Options
Downloads
Patches
Plain Diff
Created an "either" mode for pairToPair.
parent
74b0da44
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/pairToPair/pairToPair.cpp
+113
-51
113 additions, 51 deletions
src/pairToPair/pairToPair.cpp
src/pairToPair/pairToPair.h
+18
-10
18 additions, 10 deletions
src/pairToPair/pairToPair.h
src/pairToPair/pairToPairMain.cpp
+31
-5
31 additions, 5 deletions
src/pairToPair/pairToPairMain.cpp
with
162 additions
and
66 deletions
src/pairToPair/pairToPair.cpp
+
113
−
51
View file @
45306c09
...
...
@@ -17,13 +17,15 @@
Constructor
*/
PairToPair
::
PairToPair
(
string
&
bedAFilePE
,
string
&
bedBFilePE
,
float
&
overlapFraction
,
string
searchType
,
bool
ignoreStrand
)
{
string
searchType
,
bool
ignoreStrand
,
int
slop
,
bool
strandedSlop
)
{
_bedAFilePE
=
bedAFilePE
;
_bedBFilePE
=
bedBFilePE
;
_overlapFraction
=
overlapFraction
;
_searchType
=
searchType
;
_ignoreStrand
=
ignoreStrand
;
_slop
=
slop
;
_strandedSlop
=
strandedSlop
;
_bedA
=
new
BedFilePE
(
bedAFilePE
);
_bedB
=
new
BedFilePE
(
bedBFilePE
);
...
...
@@ -58,7 +60,7 @@ void PairToPair::IntersectPairs() {
while
((
bedStatus
=
_bedA
->
GetNextBedPE
(
a
,
lineNum
))
!=
BED_INVALID
)
{
if
(
bedStatus
==
BED_VALID
)
{
// identify overlaps b/w the pairs
FindOverlaps
(
a
,
hitsA1B1
,
hitsA1B2
,
hitsA2B1
,
hitsA2B2
,
_searchType
);
FindOverlaps
(
a
,
hitsA1B1
,
hitsA1B2
,
hitsA2B1
,
hitsA2B2
);
// reset space for next BEDPE
hitsA1B1
.
clear
();
hitsA1B2
.
clear
();
hitsA2B1
.
clear
();
hitsA2B2
.
clear
();
...
...
@@ -72,7 +74,7 @@ void PairToPair::IntersectPairs() {
void
PairToPair
::
FindOverlaps
(
const
BEDPE
&
a
,
vector
<
BEDCOV
>
&
hitsA1B1
,
vector
<
BEDCOV
>
&
hitsA1B2
,
vector
<
BEDCOV
>
&
hitsA2B1
,
vector
<
BEDCOV
>
&
hitsA2B2
,
string
type
)
{
vector
<
BEDCOV
>
&
hitsA2B1
,
vector
<
BEDCOV
>
&
hitsA2B2
)
{
// list of hits on each end of BEDPE
// that exceed the requested overlap fraction
...
...
@@ -87,70 +89,92 @@ void PairToPair::FindOverlaps(const BEDPE &a, vector<BEDCOV> &hitsA1B1, vector<B
int
numOverlapsA1B2
=
0
;
int
numOverlapsA2B1
=
0
;
int
numOverlapsA2B2
=
0
;
// add the appropriate slop to the starts and ends
CHRPOS
start1
=
a
.
start1
;
CHRPOS
end1
=
a
.
end1
;
CHRPOS
start2
=
a
.
start2
;
CHRPOS
end2
=
a
.
end2
;
if
(
_strandedSlop
==
true
)
{
if
(
a
.
strand1
==
"+"
)
end1
+=
_slop
;
else
start1
-=
_slop
;
if
(
a
.
strand2
==
"+"
)
end2
+=
_slop
;
else
start2
-=
_slop
;
}
else
{
start1
-=
_slop
;
start2
-=
_slop
;
end1
+=
_slop
;
end2
+=
_slop
;
}
// Find the _potential_ hits between each end of A and B
_bedB
->
FindOverlapsPerBin
(
1
,
a
.
chrom1
,
a
.
start1
,
a
.
end1
,
a
.
strand1
,
hitsA1B1
,
!
(
_ignoreStrand
));
// hits between A1 to B1
_bedB
->
FindOverlapsPerBin
(
1
,
a
.
chrom2
,
a
.
start2
,
a
.
end2
,
a
.
strand2
,
hitsA2B1
,
!
(
_ignoreStrand
));
// hits between A2 to B1
_bedB
->
FindOverlapsPerBin
(
2
,
a
.
chrom1
,
a
.
start1
,
a
.
end1
,
a
.
strand1
,
hitsA1B2
,
!
(
_ignoreStrand
));
// hits between A1 to B2
_bedB
->
FindOverlapsPerBin
(
2
,
a
.
chrom2
,
a
.
start2
,
a
.
end2
,
a
.
strand2
,
hitsA2B2
,
!
(
_ignoreStrand
));
// hits between A2 to B2
_bedB
->
FindOverlapsPerBin
(
1
,
a
.
chrom1
,
start1
,
end1
,
a
.
strand1
,
hitsA1B1
,
!
(
_ignoreStrand
));
// hits between A1 to B1
_bedB
->
FindOverlapsPerBin
(
1
,
a
.
chrom2
,
start2
,
end2
,
a
.
strand2
,
hitsA2B1
,
!
(
_ignoreStrand
));
// hits between A2 to B1
_bedB
->
FindOverlapsPerBin
(
2
,
a
.
chrom1
,
start1
,
end1
,
a
.
strand1
,
hitsA1B2
,
!
(
_ignoreStrand
));
// hits between A1 to B2
_bedB
->
FindOverlapsPerBin
(
2
,
a
.
chrom2
,
start2
,
end2
,
a
.
strand2
,
hitsA2B2
,
!
(
_ignoreStrand
));
// hits between A2 to B2
// Now, reduce to the set of hits on each end of A and B that meet the required overlap fraction and orientation.
FindQualityHitsBetweenEnds
(
a
,
1
,
hitsA1B1
,
qualityHitsA1B1
,
numOverlapsA1B1
);
// quality hits between A1 to B1
FindQualityHitsBetweenEnds
(
a
,
1
,
hitsA1B2
,
qualityHitsA1B2
,
numOverlapsA1B2
);
// quality hits between A2 to B1
FindQualityHitsBetweenEnds
(
a
,
2
,
hitsA2B1
,
qualityHitsA2B1
,
numOverlapsA2B1
);
// quality hits between A1 to B2
FindQualityHitsBetweenEnds
(
a
,
2
,
hitsA2B2
,
qualityHitsA2B2
,
numOverlapsA2B2
);
// quality hits between A2 to B2
// Now, reduce to the set of hits on each end of A and B
// that meet the required overlap fraction and orientation.
FindQualityHitsBetweenEnds
(
start1
,
end1
,
start2
,
end2
,
hitsA1B1
,
qualityHitsA1B1
,
numOverlapsA1B1
);
// quality hits between A1 to B1
FindQualityHitsBetweenEnds
(
start1
,
end1
,
start2
,
end2
,
hitsA1B2
,
qualityHitsA1B2
,
numOverlapsA1B2
);
// quality hits between A2 to B1
FindQualityHitsBetweenEnds
(
start1
,
end1
,
start2
,
end2
,
hitsA2B1
,
qualityHitsA2B1
,
numOverlapsA2B1
);
// quality hits between A1 to B2
FindQualityHitsBetweenEnds
(
start1
,
end1
,
start2
,
end2
,
hitsA2B2
,
qualityHitsA2B2
,
numOverlapsA2B2
);
// quality hits between A2 to B2
int
matchCount1
=
0
;
int
matchCount2
=
0
;
if
((
numOverlapsA1B1
>
0
)
||
(
numOverlapsA2B2
>
0
))
{
FindHitsOnBothEnds
(
a
,
qualityHitsA1B1
,
qualityHitsA2B2
,
matchCount1
);
if
(
_searchType
==
"neither"
||
_searchType
==
"both"
)
{
if
((
numOverlapsA1B1
>
0
)
||
(
numOverlapsA2B2
>
0
))
FindHitsOnBothEnds
(
a
,
qualityHitsA1B1
,
qualityHitsA2B2
,
matchCount1
);
if
((
numOverlapsA1B2
>
0
)
||
(
numOverlapsA2B1
>
0
))
FindHitsOnBothEnds
(
a
,
qualityHitsA2B1
,
qualityHitsA1B2
,
matchCount2
);
// report the fact that no hits were found iff _searchType is neither.
if
((
matchCount1
==
0
)
&&
(
matchCount2
==
0
)
&&
(
_searchType
==
"neither"
))
{
_bedA
->
reportBedPENewLine
(
a
);
}
}
if
((
numOverlapsA1B2
>
0
)
||
(
numOverlapsA2B1
>
0
))
{
FindHitsOnBothEnds
(
a
,
qualityHitsA2B1
,
qualityHitsA1B2
,
matchCount2
);
}
if
((
matchCount1
==
0
)
&&
(
matchCount2
==
0
)
&&
(
_searchType
==
"neither"
))
{
_bedA
->
reportBedPENewLine
(
a
);
else
if
(
_searchType
==
"either"
)
{
FindHitsOnEitherEnd
(
a
,
qualityHitsA1B1
,
qualityHitsA2B2
,
matchCount1
);
FindHitsOnEitherEnd
(
a
,
qualityHitsA2B1
,
qualityHitsA1B2
,
matchCount2
);
}
}
void
PairToPair
::
FindQualityHitsBetweenEnds
(
const
BEDPE
&
a
,
int
end
,
const
vector
<
BEDCOV
>
&
hits
,
vector
<
BEDCOV
>
&
qualityHits
,
int
&
numOverlaps
)
{
void
PairToPair
::
FindQualityHitsBetweenEnds
(
CHRPOS
start1
,
CHRPOS
end
1
,
CHRPOS
start2
,
CHRPOS
end2
,
const
vector
<
BEDCOV
>
&
hits
,
vector
<
BEDCOV
>
&
qualityHits
,
int
&
numOverlaps
)
{
if
(
end
==
1
)
{
vector
<
BEDCOV
>::
const_iterator
h
=
hits
.
begin
();
vector
<
BEDCOV
>::
const_iterator
hitsEnd
=
hits
.
end
();
for
(;
h
!=
hitsEnd
;
++
h
)
{
int
s
=
max
(
a
.
start1
,
h
->
start
);
int
e
=
min
(
a
.
end1
,
h
->
end
);
// is there enough overlap (default ~ 1bp)
if
(
((
float
)(
e
-
s
)
/
(
float
)(
a
.
end1
-
a
.
start1
))
>=
_overlapFraction
)
{
numOverlaps
++
;
qualityHits
.
push_back
(
*
h
);
}
// end 1
vector
<
BEDCOV
>::
const_iterator
h
=
hits
.
begin
();
vector
<
BEDCOV
>::
const_iterator
hitsEnd
=
hits
.
end
();
for
(;
h
!=
hitsEnd
;
++
h
)
{
int
s
=
max
(
start1
,
h
->
start
);
int
e
=
min
(
end1
,
h
->
end
);
// is there enough overlap (default ~ 1bp)
if
(
((
float
)(
e
-
s
)
/
(
float
)(
end1
-
start1
))
>=
_overlapFraction
)
{
numOverlaps
++
;
qualityHits
.
push_back
(
*
h
);
}
}
else
if
(
end
==
2
)
{
vector
<
BEDCOV
>::
const_iterator
h
=
hits
.
begin
();
vector
<
BEDCOV
>::
const_iterator
hitsEnd
=
hits
.
end
();
for
(;
h
!=
hitsEnd
;
++
h
)
{
int
s
=
max
(
a
.
start2
,
h
->
start
);
int
e
=
min
(
a
.
end2
,
h
->
end
);
// is there enough overlap (default ~ 1bp)
if
(
((
float
)(
e
-
s
)
/
(
float
)(
a
.
end2
-
a
.
start2
))
>=
_overlapFraction
)
{
numOverlaps
++
;
qualityHits
.
push_back
(
*
h
);
}
// end 2
h
=
hits
.
begin
();
hitsEnd
=
hits
.
end
();
for
(;
h
!=
hitsEnd
;
++
h
)
{
int
s
=
max
(
start2
,
h
->
start
);
int
e
=
min
(
end2
,
h
->
end
);
// is there enough overlap (default ~ 1bp)
if
(
((
float
)(
e
-
s
)
/
(
float
)(
end2
-
start2
))
>=
_overlapFraction
)
{
numOverlaps
++
;
qualityHits
.
push_back
(
*
h
);
}
}
}
...
...
@@ -187,3 +211,41 @@ void PairToPair::FindHitsOnBothEnds(const BEDPE &a, const vector<BEDCOV> &qualit
}
}
void
PairToPair
::
FindHitsOnEitherEnd
(
const
BEDPE
&
a
,
const
vector
<
BEDCOV
>
&
qualityHitsEnd1
,
const
vector
<
BEDCOV
>
&
qualityHitsEnd2
,
int
&
matchCount
)
{
map
<
unsigned
int
,
vector
<
BEDCOV
>
,
less
<
int
>
>
hitsMap
;
for
(
vector
<
BEDCOV
>::
const_iterator
h
=
qualityHitsEnd1
.
begin
();
h
!=
qualityHitsEnd1
.
end
();
++
h
)
{
hitsMap
[
h
->
count
].
push_back
(
*
h
);
matchCount
++
;
}
for
(
vector
<
BEDCOV
>::
const_iterator
h
=
qualityHitsEnd2
.
begin
();
h
!=
qualityHitsEnd2
.
end
();
++
h
)
{
hitsMap
[
h
->
count
].
push_back
(
*
h
);
matchCount
++
;
}
for
(
map
<
unsigned
int
,
vector
<
BEDCOV
>
,
less
<
unsigned
int
>
>::
iterator
m
=
hitsMap
.
begin
();
m
!=
hitsMap
.
end
();
++
m
)
{
if
(
m
->
second
.
size
()
>=
1
)
{
if
((
m
->
second
.
size
())
==
2
)
{
BEDCOV
b1
=
m
->
second
[
0
];
BEDCOV
b2
=
m
->
second
[
1
];
_bedA
->
reportBedPETab
(
a
);
printf
(
"%s
\t
%d
\t
%d
\t
%s
\t
%d
\t
%d
\t
%s
\t
%s
\t
%s
\t
%s
\n
"
,
b1
.
chrom
.
c_str
(),
b1
.
start
,
b1
.
end
,
b2
.
chrom
.
c_str
(),
b2
.
start
,
b2
.
end
,
b1
.
name
.
c_str
(),
b1
.
score
.
c_str
(),
b1
.
strand
.
c_str
(),
b2
.
strand
.
c_str
());
}
else
{
BEDCOV
b1
=
m
->
second
[
0
];
_bedA
->
reportBedPETab
(
a
);
printf
(
"%s
\t
%d
\t
%d
\t
.
\t
-1
\t
-1
\t
%s
\t
%s
\t
%s
\t
.
\n
"
,
b1
.
chrom
.
c_str
(),
b1
.
start
,
b1
.
end
,
b1
.
name
.
c_str
(),
b1
.
score
.
c_str
(),
b1
.
strand
.
c_str
());
}
}
}
}
This diff is collapsed.
Click to expand it.
src/pairToPair/pairToPair.h
+
18
−
10
View file @
45306c09
...
...
@@ -28,22 +28,14 @@ class PairToPair {
public:
// constructor
PairToPair
(
string
&
,
string
&
,
float
&
,
string
,
bool
);
PairToPair
(
string
&
bedAFilePE
,
string
&
bedBFilePE
,
float
&
overlapFraction
,
string
searchType
,
bool
ignoreStrand
,
int
slop
,
bool
strandedSlop
);
// destructor
~
PairToPair
(
void
);
void
IntersectPairs
();
void
FindOverlaps
(
const
BEDPE
&
a
,
vector
<
BEDCOV
>
&
hitsA1B1
,
vector
<
BEDCOV
>
&
hitsA1B2
,
vector
<
BEDCOV
>
&
hitsA2B1
,
vector
<
BEDCOV
>
&
hitsA2B2
,
string
type
);
void
FindQualityHitsBetweenEnds
(
const
BEDPE
&
a
,
int
end
,
const
vector
<
BEDCOV
>
&
hits
,
vector
<
BEDCOV
>
&
qualityHits
,
int
&
numOverlaps
);
void
FindHitsOnBothEnds
(
const
BEDPE
&
a
,
const
vector
<
BEDCOV
>
&
qualityHitsEnd1
,
const
vector
<
BEDCOV
>
&
qualityHitsEnd2
,
int
&
matchCount
);
private:
...
...
@@ -53,12 +45,28 @@ private:
float
_overlapFraction
;
string
_searchType
;
bool
_ignoreStrand
;
int
_slop
;
bool
_strandedSlop
;
// instance of a paired-end bed file class.
BedFilePE
*
_bedA
;
// instance of a bed file class.
BedFilePE
*
_bedB
;
// methods
void
FindOverlaps
(
const
BEDPE
&
a
,
vector
<
BEDCOV
>
&
hitsA1B1
,
vector
<
BEDCOV
>
&
hitsA1B2
,
vector
<
BEDCOV
>
&
hitsA2B1
,
vector
<
BEDCOV
>
&
hitsA2B2
);
void
FindQualityHitsBetweenEnds
(
CHRPOS
start1
,
CHRPOS
end1
,
CHRPOS
start2
,
CHRPOS
end2
,
const
vector
<
BEDCOV
>
&
hits
,
vector
<
BEDCOV
>
&
qualityHits
,
int
&
numOverlaps
);
void
FindHitsOnBothEnds
(
const
BEDPE
&
a
,
const
vector
<
BEDCOV
>
&
qualityHitsEnd1
,
const
vector
<
BEDCOV
>
&
qualityHitsEnd2
,
int
&
matchCount
);
void
FindHitsOnEitherEnd
(
const
BEDPE
&
a
,
const
vector
<
BEDCOV
>
&
qualityHitsEnd1
,
const
vector
<
BEDCOV
>
&
qualityHitsEnd2
,
int
&
matchCount
);
};
#endif
/* PAIRTOPAIR_H */
This diff is collapsed.
Click to expand it.
src/pairToPair/pairToPairMain.cpp
+
31
−
5
View file @
45306c09
...
...
@@ -34,6 +34,7 @@ int main(int argc, char* argv[]) {
// input arguments
float
overlapFraction
=
1E-9
;
int
slop
=
0
;
string
searchType
=
"both"
;
// flags to track parameters
...
...
@@ -41,8 +42,9 @@ int main(int argc, char* argv[]) {
bool
haveBedB
=
false
;
bool
haveSearchType
=
false
;
bool
haveFraction
=
false
;
bool
ignoreStrand
=
false
;
bool
ignoreStrand
=
false
;
bool
haveSlop
=
false
;
bool
strandedSlop
=
false
;
// check to see if we should print out some help
if
(
argc
<=
1
)
showHelp
=
true
;
...
...
@@ -90,6 +92,16 @@ int main(int argc, char* argv[]) {
i
++
;
}
}
else
if
(
PARAMETER_CHECK
(
"-slop"
,
5
,
parameterLength
))
{
if
((
i
+
1
)
<
argc
)
{
haveSlop
=
true
;
slop
=
atoi
(
argv
[
i
+
1
]);
i
++
;
}
}
else
if
(
PARAMETER_CHECK
(
"-ss"
,
3
,
parameterLength
))
{
strandedSlop
=
true
;
}
else
if
(
PARAMETER_CHECK
(
"-is"
,
3
,
parameterLength
))
{
ignoreStrand
=
true
;
}
...
...
@@ -106,14 +118,19 @@ int main(int argc, char* argv[]) {
showHelp
=
true
;
}
if
(
haveSearchType
&&
(
searchType
!=
"neither"
)
&&
(
searchType
!=
"both"
))
{
if
(
haveSearchType
&&
(
searchType
!=
"neither"
)
&&
(
searchType
!=
"both"
)
&&
(
searchType
!=
"either"
)
)
{
cerr
<<
endl
<<
"*****"
<<
endl
<<
"*****ERROR: Request
\"
both
\"
or
\"
neither
\"
"
<<
endl
<<
"*****"
<<
endl
;
showHelp
=
true
;
}
if
(
strandedSlop
==
true
&&
haveSlop
==
false
)
{
cerr
<<
endl
<<
"*****"
<<
endl
<<
"*****ERROR: Need a -slop value if requesting -ss."
<<
endl
<<
"*****"
<<
endl
;
showHelp
=
true
;
}
if
(
!
showHelp
)
{
PairToPair
*
bi
=
new
PairToPair
(
bedAFile
,
bedBFile
,
overlapFraction
,
searchType
,
ignoreStrand
);
PairToPair
*
bi
=
new
PairToPair
(
bedAFile
,
bedBFile
,
overlapFraction
,
searchType
,
ignoreStrand
,
slop
,
strandedSlop
);
delete
bi
;
return
0
;
}
...
...
@@ -139,9 +156,18 @@ void ShowHelp(void) {
cerr
<<
"
\t
-type
\t
"
<<
"Approach to reporting overlaps between A and B."
<<
endl
<<
endl
;
cerr
<<
"
\t\t
neither
\t
Report overlaps if neither end of A overlaps B."
<<
endl
;
cerr
<<
"
\t\t
either
\t
Report overlaps if either ends of A overlap B."
<<
endl
;
cerr
<<
"
\t\t
both
\t
Report overlaps if both ends of A overlap B."
<<
endl
;
cerr
<<
"
\t\t
- Default."
<<
endl
<<
endl
;
cerr
<<
"
\t\t\t
- Default = both."
<<
endl
<<
endl
;
cerr
<<
"
\t
-slop
\t
"
<<
"The amount of slop (in b.p.). to be added to each footprint."
<<
endl
<<
endl
;
cerr
<<
"
\t
-ss
\t
"
<<
"Add slop based to each BEDPE footprint based on strand."
<<
endl
;
cerr
<<
"
\t\t
- If strand is
\"
+
\"
, slop is only added to the end coordinates."
<<
endl
<<
endl
;
cerr
<<
"
\t\t
- If strand is
\"
-
\"
, slop is only added to the start coordinates."
<<
endl
<<
endl
;
cerr
<<
"
\t\t
- By default, slop is added in both directions."
<<
endl
<<
endl
;
cerr
<<
"
\t
-is
\t
"
<<
"Ignore strands when searching for overlaps."
<<
endl
;
cerr
<<
"
\t\t
- By default, strands are enforced."
<<
endl
<<
endl
;
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment