Skip to content

Instantly share code, notes, and snippets.

@dwinter
Created November 6, 2014 19:15
Show Gist options
  • Save dwinter/940905ea35d34b11193d to your computer and use it in GitHub Desktop.
Save dwinter/940905ea35d34b11193d to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
import subprocess
import collections
import random
from StringIO import StringIO
from Bio import SeqIO
from Bio.Emboss import Primer3
from Bio.Emboss.Applications import Primer3Commandline
class Chromosome(object):
""" Represents one chromosome from a BAM header """
def __init__(self, name, start, end):
self.name = name
self.start= start
self.end = end
def __repr__(self):
return "Chromosome({0}, {1}, {2})".format(self.name, self.start,
self.end)
def make_bed_line(self, index):
pos = index - self.start
return("{0}\t{1}\t{2}\n".format(self.name, pos, pos+1))
class PrimerTarget:
"""
Represents a primer target region, with functions to run primer3
"""
def __init__(self, chrom, pos, target, seq_record):
self.chrom = chrom
self.pos = pos
self.target = target
self.sequence = seq_record
def __repr__(self):
s = "PrimerTarget(chrom={0}, pos={1}, target={2}, seq_record=SeqRecord(..) )"
return s.format(self.chrom, self.pos, self.target)
def _parse_primers(self, primer_text):
rec = Primer3.read(StringIO(primer_text))
return rec
def find_primers(self, **kwargs):
""" """
SeqIO.write(self.sequence, "temp.fasta", "fasta")
call= Primer3Commandline(cmd="eprimer32",
sequence = "temp.fasta",
stdout=True, auto=True,
target = "{0},{1}".format(*self.target),
**kwargs)
out, err = call()
if err:
print err
return out,err
return self._parse_primers(out)
def slice_seq(chrom, centre, idx, flanking =500):
"""Take a region of a chromosome """
rec = idx[chrom]
if centre < flanking:
start = 0
else:
start = centre - flanking
if centre + flanking > len(rec):
end = len(rec)
else:
end = centre + flanking
target = centre-start
return(PrimerTarget(chrom, centre, (target-30,target+31), rec[start:end]))
def parse_header(fname):
"""Create a set of chromomsome objects from Bam header """
cmd = "samtools view -H {0}".format(fname)
child = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=(sys.platform!="win32"))
header, err = child.communicate()
genome = []
cummulative_len = 0
for line in header.split("\n"):
if line.startswith("@SQ"):
chrom, L = [e.split(":")[1] for e in line.split()[1:] ]
genome.append(Chromosome(chrom, cummulative_len+1, int(L) + cummulative_len))
cummulative_len += int(L)
genome.sort(key=lambda x : x.start)
return(genome)
def design_primers(target_region, gc_slide = [20,18,15]):
""" """
name = "{0}_{1}".format(target_region.chrom, target_region.pos)
#Does it work by default?
recs = target_region.find_primers(numreturn=1, mintm=57, maxtm=63, maxdifftm=5,)
if recs.primers:
return (name, recs.primers[0])
#OK, how about allowing more and more AT rich
for gc in gc_slide:
recs = target_region.find_primers(numreturn=1, mingc=gc, mintm=57, maxtm=63,maxdifftm=5)
if recs.primers:
return (name, recs.primers[0])
#last desperate attempt
recs = target_region.find_primers(numreturn=1, mingc=15, mintm=48, maxtm=70,maxdifftm=5)
if recs.primers:
return (name, recs.primers[0])
#We just didn't get one, I guess
return None
def main():
try:
n, ref, bam, out_name = sys.argv[1:]
except ValueError:
print ("Need exactly four arguments: number of replicates, reference genome and bam (header) file name of our file")
return(-1)
print("====Sampling the genome====\n")
genome = parse_header(bam)
bases = [random.randint(0, genome[-1].end) for _ in range(int(n))]
sites = []
for b in bases:
i = 0
while genome[i].end < b:
i += 1
sites.append( (genome[i].name, b - genome[i].start) )
print("====Designing Primers=====")
ref_idx = SeqIO.index(ref, "fasta")
seqs = [slice_seq(*x, idx=ref_idx) for x in sites]
print(len(seqs))
primers = (design_primers(t) for t in seqs) #generator
counter = 0
with open(out_name, "w") as out:
for site_name,p in primers:
if p:
out.write("{0} {1} {2}\n".format(site_name, p.forward_seq, p.reverse_seq))
counter += 1
print("Wrote primers for {0} out of {1} sites".format(counter, n))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment