Skip to content

Instantly share code, notes, and snippets.

@philippmuench
Last active October 1, 2017 19:38
Show Gist options
  • Save philippmuench/07a0f52fcbb682b4ce1776c8dd9a865b to your computer and use it in GitHub Desktop.
Save philippmuench/07a0f52fcbb682b4ce1776c8dd9a865b to your computer and use it in GitHub Desktop.
simulate CRISPR sequences
#!/usr/bin/env python
import random
import sys
# space sequence characteristics
setsize_min = 2 # min number of spacer
setsize_max = 8 # max number of spacer
minlength = 30 # min length of spacer
maxlength = 40 # max length of spacer
# palindromic repeat characteristics
repeat_length_min = 15
repeat_length_max = 35
# neightboring sequence
neightbor_min = 1000
neightbor_max = 1500
def simulate_random_sequence(length):
dna = ['A', 'C', 'G', 'T']
sequence = ''
for i in range(length):
sequence += random.choice(dna)
return sequence
def simulate_repeat(length):
dna = ['A', 'C', 'G', 'T']
sequence = ''
for i in range(int(length/2)):
sequence += random.choice(dna)
sequence_reverse = sequence[::-1]
palindrom = sequence + sequence_reverse
return palindrom
def simulate_crispr(num, file):
file = open(file,'w')
for i in range(1, num):
# create repeat
repeat = simulate_repeat(random.randint(repeat_length_min, repeat_length_max))
# create spacer set
sequenceset = []
for i in range(random.randint(setsize_min, setsize_max)):
rlength = random.randint(minlength, maxlength)
sequenceset.append(simulate_random_sequence(rlength))
#create neightboring sequence
left = simulate_repeat(random.randint(neightbor_min, neightbor_max))
right = simulate_repeat(random.randint(neightbor_min, neightbor_max))
# join to final crispr
for spacer in sequenceset:
joined = repeat + spacer
file.write(joined+ '\n')
file.close()
simulate_crispr(100000, 'crisprs.txt')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment