Skip to content

Instantly share code, notes, and snippets.

@brantfaircloth
Created August 24, 2013 21:12
Show Gist options
  • Save brantfaircloth/6330442 to your computer and use it in GitHub Desktop.
Save brantfaircloth/6330442 to your computer and use it in GitHub Desktop.
Virtual restriction digests and BioPython
from Bio import SeqIO
seq = SeqIO.read(open('chr1.fas','rU'),'fasta')
len(seq)
30427671
from Bio import Restriction
Restriction.HindIII.search(seq.seq)
sites = Restriction.HindIII.search(seq.seq)
# get the number of fragments produced by cutting
len(sites)
16963
# sort the sites by position (just to be sure)
sites.sort()
# get the distance between sites
dist = [sites[r] - sites[r-1] for r in xrange(1,len(sites))]
# generate some summary stats
import numpy
import math
dist_a = numpy.array(dist)
# mean fragment size
numpy.mean(dist_a)
1793.6787525056006
# 95 % Confidence invterval around mean
1.96 * numpy.std(dist_a, ddof=1)/math.sqrt(len(dist_a))
33.572399955020472
# get number of fragments btw. 100 and 500 bp - this could likely use some #
# cleanup, but I was in a hurry
less_than_five_bool = dist_a < 500
less_than_five = dist_a[less_than_five_bool]
greater_than_one_bool = less_than_five > 100
one_to_five_hundred_bp = less_than_five[greater_than_one_bool]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment