Skip to content

Instantly share code, notes, and snippets.

@pjvandehaar
Created October 5, 2016 20:21
Show Gist options
  • Save pjvandehaar/0714070abac7a26019e358612b4e7727 to your computer and use it in GitHub Desktop.
Save pjvandehaar/0714070abac7a26019e358612b4e7727 to your computer and use it in GitHub Desktop.
# resulting graph is at https://www.dropbox.com/s/vepdi2deoeejly0/dhs_nreads.png?dl=0
import pybedtools
import pysam
import itertools
import matplotlib.pyplot as plt
def avg(iterator):
total, n = 0,0
for it in iterator:
total += it
n += 1
return float(total) / n
bedfile = pybedtools.BedTool("ENCFF001UWQ.bed.gz")
avg_DHS_length = int(avg(DHS.end - DHS.start for DHS in bedfile))
samfile = pysam.AlignmentFile("ENCFF000BXW.bam", "rb")
bedfile = pybedtools.BedTool("ENCFF001UWQ.bed.gz")
out_data = {
'+': {},
'-': {},
}
for strand in '-+':
out_data[strand]['dnstream'] = [0] * 500
out_data[strand]['upstream'] = [0] * 500
out_data[strand]['DHS'] = [0] * avg_DHS_length
#bedfile = itertools.islice(bedfile, 0, 1000) # make it faster while debugging
for DHS in bedfile:
chrom = DHS.chrom.encode('ascii')
upstream500_range = (chrom, DHS.start-501, DHS.start+1)
DHS_range = (chrom, DHS.start-1, DHS.end+1)
dnstream500_range = (chrom, DHS.end-1, DHS.end + 501)
for read in samfile.fetch(*upstream500_range):
assert read.rlen > 0
pos = read.pos if not read.is_reverse else read.pos + read.rlen
relative_pos = pos - upstream500_range[1]
if 0 <= relative_pos < 500:
strand = '+-'[read.is_reverse]
out_data[strand]['upstream'][relative_pos] += 1
for read in samfile.fetch(*DHS_range):
assert read.rlen > 0
pos = read.pos if not read.is_reverse else read.pos + read.rlen
relative_pos = pos - DHS_range[1]
scaled_pos = float(relative_pos) / (DHS_range[2]-DHS_range[1])
scaled_pos = int(scaled_pos * avg_DHS_length)
if 0 <= scaled_pos < avg_DHS_length:
strand = '+-'[read.is_reverse]
out_data[strand]['DHS'][scaled_pos] += 1
for read in samfile.fetch(*dnstream500_range):
assert read.rlen > 0
pos = read.pos if not read.is_reverse else read.pos + read.rlen
relative_pos = pos - dnstream500_range[1]
if 0 <= relative_pos < 500:
strand = '+-'[read.is_reverse]
out_data[strand]['dnstream'][relative_pos] += 1
plt.rcParams['figure.figsize'] = (10, 6)
plt.axvline(0, color='lightgrey')
plt.axvline(avg_DHS_length, color='lightgrey')
strand = '+'
plt.plot([i-500 for i in range(500*2+avg_DHS_length)],
out_data[strand]['upstream'] + out_data[strand]['DHS'] + out_data[strand]['dnstream'],
color='blue')
strand = '-'
plt.plot([i-500 for i in range(500*2+avg_DHS_length)],
[i for i in out_data[strand]['upstream'] + out_data[strand]['DHS'] + out_data[strand]['dnstream']],
color='red')
plt.plot([i-500 for i in range(500*2+avg_DHS_length)], [0] * (500*2+avg_DHS_length), color='black')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment