diff --git a/test/intersect/chromsOutOfOrder.bed b/test/intersect/chromsOutOfOrder.bed new file mode 100644 index 0000000000000000000000000000000000000000..5d0fe5154840012dcfc59ceb0ffdb2e57a222681 --- /dev/null +++ b/test/intersect/chromsOutOfOrder.bed @@ -0,0 +1,4 @@ +chr1 100 200 +chr1 150 250 +chr2 100 200 +chr1 200 300 diff --git a/test/intersect/new_test-intersect.sh b/test/intersect/new_test-intersect.sh index 1c226dcddc644a875030c753c16f187d9cc38d91..0774fe1877fa202d0275654187abf5653feb17b4 100755 --- a/test/intersect/new_test-intersect.sh +++ b/test/intersect/new_test-intersect.sh @@ -553,4 +553,39 @@ check obs aWithHeaderVsB.txt rm obs +########################################################### +# Test enforcement of sort order when using -sorted option. +# Show that detect chrom "jumping", i.e. chr1, chr2, then +# back to chr1 +############################################################ +echo " intersect.new.t48...\c" +echo "Error: Sorted input specified" > exp +$BT intersect -a chromsOutOfOrder.bed -b b.bed -sorted 2>&1 > /dev/null | grep "Error: Sorted input specified, " | cut -f1 --delimiter=',' > obs +check obs exp +rm obs exp + +########################################################### +# Test enforcement of sort order when using -sorted option. +# Show that detect chrom "jumping", i.e. chr1, chr2, then +# back to chr1 +############################################################ +echo " intersect.new.t49...\c" +echo "Error: Sorted input specified" > exp +$BT intersect -a recordsOutOfOrder.bed -b b.bed -sorted 2>&1 > /dev/null | grep "Error: Sorted input specified, " | cut -f1 --delimiter=',' > obs +check obs exp +rm obs exp + +########################################################### +# Test that we throw an error for non-existant files +############################################################ +echo " intersect.new.t50...\c" +echo "Error: Unable to open file nonExistantFile.bed. Exiting." > exp +$BT intersect -a nonExistantFile.bed -b b.bed -sorted 2>&1 > /dev/null | cat - >obs +check obs exp +rm obs exp + + + + + diff --git a/test/intersect/performanceTest.sh b/test/intersect/performanceTest.sh new file mode 100755 index 0000000000000000000000000000000000000000..6e9109343ee1044af7bdc66a90ae2353ae0c1390 --- /dev/null +++ b/test/intersect/performanceTest.sh @@ -0,0 +1,138 @@ +################################################################# +# +# PERFORMANCE TESTS FOR VARIOUS INPUT METHODS. +# +# We'll test uncompressed data, gzip, bgzip, and Bam files. +# Each will be tried as a file, and as the 3 types of stdin: +# redirects, pipes, and fifos. +# +# We'll run each one twice in a row to check for improvement +# due to a cache warm up. +# +################################################################# + + +BT=${BT-../../bin/bedtools} + + +################################################################# +# Start by generating data, if desired +# +################################################################# +if true; then + echo "generating data..." + mkdir perfData + cd perfData + ../$BT random -l 1000 -n 20000000 -g ../human.hg19.genome | sort -k1,1 -k2,2n > a10M.bed + ../$BT random -l 1000 -n 10000000 -g ../human.hg19.genome | sort -k1,1 -k2,2n > b10M.bed + cp a10M.bed a10M_gzipped.bed + gzip a10M_gzipped.bed + cp a10M.bed a10M_bgzipped.bed + bgzip a10M_bgzipped.bed + ../$BT bedtobam -i a10M.bed -g ../human.hg19.genome > a10M.bam + cd .. +fi + +################################################################### +# Begin Tests +################################################################### +echo -e "Test 1 of 16...." +echo "Test 1: Intersect a10M, ten million records, uncompressed from file with b10M" > runLog.txt +runit $BT intersect -a perfData/a10M.bed -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 1 for possible cache speed up." >> runLog.txt +runit $BT intersect -a perfData/a10M.bed -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + +echo -e "Test 2 of 16...." +echo -e "\n\nTest 2: Intersect a10M, ten million records, gzipped from file with b10M" >> runLog.txt +runit $BT intersect -a perfData/a10M_gzipped.bed.gz -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 2 for possible cache speed up." >> runLog.txt +runit $BT intersect -a perfData/a10M_gzipped.bed.gz -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + +echo -e "Test 3 of 16...." +echo -e "\n\nTest 3: Intersect a10M, ten million records, bgzipped from file with b10M" >> runLog.txt +runit $BT intersect -a perfData/a10M_bgzipped.bed.gz -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 3 for possible cache speed up." >> runLog.txt +runit $BT intersect -a perfData/a10M_bgzipped.bed.gz -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + +echo -e "Test 4 of 16...." +echo -e "\n\nTest 4: Intersect a10M, ten million records, bam from file with b10M" >> runLog.txt +runit $BT intersect -a perfData/a10M.bam -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 4 for possible cache speed up." >> runLog.txt +runit $BT intersect -a perfData/a10M.bam -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + + +echo -e "Test 5 of 16...." +echo -e "\n\nTest 5: Intersect a10M, ten million records, uncompressed from redirect with b10M" >> runLog.txt +runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M.bed 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 5 for possible cache speed up." >> runLog.txt +runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M.bed 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + +echo -e "Test 6 of 16...." +echo -e "\n\nTest 6: Intersect a10M, ten million records, gzipped from redirect with b10M" >> runLog.txt +runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M_gzipped.bed.gz 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 6 for possible cache speed up." >> runLog.txt +runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M_gzipped.bed.gz 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + +echo -e "Test 7 of 16...." +echo -e "\n\nTest 7: Intersect a10M, ten million records, bgzipped from redirect with b10M" >> runLog.txt +runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M_bgzipped.bed.gz 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 7 for possible cache speed up." >> runLog.txt +runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M_bgzipped.bed.gz 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + +echo -e "Test 8 of 16...." +echo -e "\n\nTest 8: Intersect a10M, ten million records, bam from redirect with b10M" >> runLog.txt +runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M.bam 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 8 for possible cache speed up." >> runLog.txt +runit $BT intersect -a - -b perfData/b10M.bed -sorted < perfData/a10M.bam 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + +echo -e "Test 9 of 16...." +echo -e "\n\nTest 9: Intersect a10M, ten million records, uncompressed from pipe with b10M" >> runLog.txt +cat perfData/a10M.bed | runit $BT intersect -a - -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 9 for possible cache speed up." >> runLog.txt +cat perfData/a10M.bed | runit $BT intersect -a - -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + +echo -e "Test 10 of 16...." +echo -e "\n\nTest 10: Intersect a10M, ten million records, gzip from pipe with b10M" >> runLog.txt +cat perfData/a10M_gzipped.bed.gz | runit $BT intersect -a - -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 10 for possible cache speed up." >> runLog.txt +cat perfData/a10M_gzipped.bed.gz | runit $BT intersect -a - -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + +echo -e "Test 11 of 16...." +echo -e "\n\nTest 11: Intersect a10M, ten million records, bgzip from pipe with b10M" >> runLog.txt +cat perfData/a10M_bgzipped.bed.gz | runit $BT intersect -a - -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 11 for possible cache speed up." >> runLog.txt +cat perfData/a10M_bgzipped.bed.gz | runit $BT intersect -a - -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + +echo -e "Test 12 of 16...." +echo -e "\n\nTest 12: Intersect a10M, ten million records, bam from pipe with b10M" >> runLog.txt +cat perfData/a10M.bam | runit $BT intersect -a - -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 12 for possible cache speed up." >> runLog.txt +cat perfData/a10M.bam | runit $BT intersect -a - -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + +echo -e "Test 13 of 16...." +echo -e "\n\nTest 13: Intersect a10M, ten million records, uncompressed from fifo with b10M" >> runLog.txt +runit $BT intersect -a <(cat perfData/a10M.bed) -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 13 for possible cache speed up." >> runLog.txt +runit $BT intersect -a <(cat perfData/a10M.bed) -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + +echo -e "Test 14 of 16...." +echo -e "\n\nTest 14: Intersect a10M, ten million records, gzipped from fifo with b10M" >> runLog.txt +runit $BT intersect -a <(cat perfData/a10M_gzipped.bed.gz) -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 14 for possible cache speed up." >> runLog.txt +runit $BT intersect -a <(cat perfData/a10M_gzipped.bed.gz) -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + +echo -e "Test 15 of 16...." +echo -e "\n\nTest 15: Intersect a10M, ten million records, bgzipped from fifo with b10M" >> runLog.txt +runit $BT intersect -a <(cat perfData/a10M_bgzipped.bed.gz) -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 15 for possible cache speed up." >> runLog.txt +runit $BT intersect -a <(cat perfData/a10M_bgzipped.bed.gz) -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + +echo -e "Test 16 of 16...." +echo -e "\n\nTest 16: Intersect a10M, ten million records, bam from fifo with b10M" >> runLog.txt +runit $BT intersect -a <(cat perfData/a10M.bam) -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt +echo -e "\nRe-do test 16 for possible cache speed up." >> runLog.txt +runit $BT intersect -a <(cat perfData/a10M.bam) -b perfData/b10M.bed -sorted 2>&1 >/dev/null | grep -e "user" -e maxrss >> runLog.txt + + +echo "Tests completed." +rm -rf perfData diff --git a/test/intersect/recordsOutOfOrder.bed b/test/intersect/recordsOutOfOrder.bed new file mode 100644 index 0000000000000000000000000000000000000000..8ff0d0bdedd77f214e850ff278f573e6606bd771 --- /dev/null +++ b/test/intersect/recordsOutOfOrder.bed @@ -0,0 +1,4 @@ +chr1 20 30 +chr1 40 50 +chr1 15 60 +chr1 70 80