Skip to content

Instantly share code, notes, and snippets.

@soh-i
Last active November 3, 2015 09:48
Show Gist options
  • Select an option

  • Save soh-i/b41e47bac996afa02866 to your computer and use it in GitHub Desktop.

Select an option

Save soh-i/b41e47bac996afa02866 to your computer and use it in GitHub Desktop.
Random sequence generator and basic regexp
import random
import re
def generate_random_sequence(seq_length=20, n=10):
"""
Random sequence generator.
Args:
seq_length: Int #sequence length
n: Int #total number of sequences
Returns:
barcode_seqs: List<Str>
"""
n_code = {0: "A", 1: "T", 2: "C", 3: "G"}
barcode_seqs = []
for c in range(0, n):
barcode = ""
for i in range(0, seq_length):
base_index = random.randint(0, 3)
barcode += n_code[base_index]
barcode_seqs.append(barcode)
return barcode_seqs
if __name__ == "__main__":
# generate 500 random sequences (20-mer)
seqs = generate_random_sequence(n=500)
# define regular expression pattern
head_ttt_p = re.compile(r"^TTT[ATGC]{17}$")
tail_ttt_p = re.compile(r"^[ATGC]{17}TTT$")
A_at_10_p = re.compile(r"^[ATGC]{9}[A][ATGC]{10}$")
GCGC_p = re.compile(r"(?:GC){2}")
GCGC_or_ATAT_p = re.compile(r"(?:AT){2}|(?:GC){2}")
for seq in seqs:
# matching
if head_ttt_p.match(seq):
print "TTT in the head -> %s" % (seq)
if tail_ttt_p.match(seq):
print "TTT in the tail -> %s" % (seq)
if A_at_10_p.match(seq):
print "A that is located at position 10 -> %s" % (seq)
if GCGC_p.match(seq):
print "GCGC in any position in the sequence -> %s" % (seq)
if GCGC_or_ATAT_p.match(seq):
print "GCGC or ATAT in any position in the sequence -> %s" % (seq)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment