Skip to content
Snippets Groups Projects
Commit c3ab3d1e authored by Brent Pedersen's avatar Brent Pedersen
Browse files

scripts for comparing to shuffled

parent d6544e0e
No related branches found
No related tags found
No related merge requests found
set -eo pipefail
echo "fisher,shuffled"
for i in $(seq 1000); do
fisher=$(python ./sim.py | tail -1 | cut -f 2)
shuffle=$(bash shuf.sh "bedtools intersect -a taa.bed -b tbb.bed" "wc -l" tgg.genome)
echo "$fisher,$shuffle"
done
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")
fig, axs = plt.subplots(3, figsize=(4, 12))
df = pd.read_csv(sys.argv[1])
axs[0].scatter(df.fisher, df.shuffled, s=4)
axs[0].set_xlim(0, 1)
axs[0].set_ylim(0, 1)
axs[0].set_xlabel('fisher p-value')
axs[0].set_ylabel('shuffled p-value')
axs[0].plot([0, 1], [0, 1], ls='--')
x = -np.log10(df.fisher)
y = -np.log10(df.shuffled)
m = int(max(x.max(), y.max())) + 1
axs[1].scatter(x, y, s=4)
axs[1].set_xlim(0, m)
axs[1].set_ylim(0, m)
axs[1].set_xlabel('-log10(fisher p-value)')
axs[1].set_ylabel('-log10(shuffled p-value)')
axs[1].plot([0, m], [0, m], ls='--')
x = -np.log10(1 - np.minimum(1-1e-6, df.fisher))
y = -np.log10(1 - np.minimum(1-1e-6, df.shuffled))
m = int(max(x.max(), y.max())) + 1
axs[2].scatter(x, y, s=4)
axs[2].set_xlim(0, m)
axs[2].set_ylim(0, m)
axs[2].set_xlabel('-log10(1 - fisher p-value)')
axs[2].set_ylabel('-log10(1 - shuffled p-value)')
axs[2].plot([0, m], [0, m], ls='--')
plt.tight_layout()
plt.savefig(sys.argv[1].replace('.txt', '') + '.png')
fig.show()
set -eo pipefail
obs=$(bedtools intersect -wo -a taa.bed -b tbb.bed | wc -l)
seq 1000 | xargs -P 7 -n 1 bash -c "bedtools intersect -wo -a taa.bed -b <(bedtools shuffle -allowBeyondChromEnd -i tbb.bed -g tgg.genome) | wc -l " | awk -vobs=$obs '{s += ($1 > obs)}END{print s / NR}'
p=$(seq 100 | xargs -P 7 -n 1 bash -c "bedtools intersect -wo -a taa.bed -b <(bedtools shuffle -allowBeyondChromEnd -i tbb.bed -g tgg.genome) | wc -l " | awk -vobs=$obs '{s += ($1 > obs)}END{print (1 + s) / (1 + NR)}')
if [ '1' -eq $(echo $p'< 0.1' | bc -l) ] || [ '1' -eq $(echo $p'> 0.9' | bc -l) ]; then
p=$(seq 1000 | xargs -P 7 -n 1 bash -c "bedtools intersect -wo -a taa.bed -b <(bedtools shuffle -allowBeyondChromEnd -i tbb.bed -g tgg.genome) | wc -l " | awk -vobs=$obs '{s += ($1 > obs)}END{print (1 + s) / (1 + NR)}')
fi
if [ '1' -eq $(echo $p'< 0.01' | bc -l) ] || [ '1' -eq $(echo $p'> 0.99' | bc -l) ]; then
p=$(seq 5000 | xargs -P 7 -n 1 bash -c "bedtools intersect -wo -a taa.bed -b <(bedtools shuffle -allowBeyondChromEnd -i tbb.bed -g tgg.genome) | wc -l " | awk -vobs=$obs '{s += ($1 > obs)}END{print (1 + s) / (1 + NR)}')
fi
echo $p
......@@ -3,11 +3,11 @@ from random import randint
from subprocess import check_output
genome_size = 25000000
nA = 210
nB = 3390
nA = 12100
nB = 2000
minA, maxA = (20, 2000)
minB, maxB = (20, 1250)
minA, maxA = (20, 100)
minB, maxB = (20, 2250)
for f, imin, imax, n in (
('taa.bed', minA, maxA, nA),
......@@ -24,4 +24,5 @@ for f, imin, imax, n in (
print >> open('tgg.genome', 'w'), ("chr1\t%i" % genome_size)
print check_output("../../bin/bedtools fisher -a taa.bed -b tbb.bed -g tgg.genome", shell=True)
# NOTE: add -m here to make merged output
print check_output("../../bin/bedtools fisher -a taa.bed -b tbb.bed -g tgg.genome", shell=True).strip()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment