Last active
November 3, 2015 09:48
-
-
Save soh-i/b41e47bac996afa02866 to your computer and use it in GitHub Desktop.
Random sequence generator and basic regexp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import random | |
| import re | |
| def generate_random_sequence(seq_length=20, n=10): | |
| """ | |
| Random sequence generator. | |
| Args: | |
| seq_length: Int #sequence length | |
| n: Int #total number of sequences | |
| Returns: | |
| barcode_seqs: List<Str> | |
| """ | |
| n_code = {0: "A", 1: "T", 2: "C", 3: "G"} | |
| barcode_seqs = [] | |
| for c in range(0, n): | |
| barcode = "" | |
| for i in range(0, seq_length): | |
| base_index = random.randint(0, 3) | |
| barcode += n_code[base_index] | |
| barcode_seqs.append(barcode) | |
| return barcode_seqs | |
| if __name__ == "__main__": | |
| # generate 500 random sequences (20-mer) | |
| seqs = generate_random_sequence(n=500) | |
| # define regular expression pattern | |
| head_ttt_p = re.compile(r"^TTT[ATGC]{17}$") | |
| tail_ttt_p = re.compile(r"^[ATGC]{17}TTT$") | |
| A_at_10_p = re.compile(r"^[ATGC]{9}[A][ATGC]{10}$") | |
| GCGC_p = re.compile(r"(?:GC){2}") | |
| GCGC_or_ATAT_p = re.compile(r"(?:AT){2}|(?:GC){2}") | |
| for seq in seqs: | |
| # matching | |
| if head_ttt_p.match(seq): | |
| print "TTT in the head -> %s" % (seq) | |
| if tail_ttt_p.match(seq): | |
| print "TTT in the tail -> %s" % (seq) | |
| if A_at_10_p.match(seq): | |
| print "A that is located at position 10 -> %s" % (seq) | |
| if GCGC_p.match(seq): | |
| print "GCGC in any position in the sequence -> %s" % (seq) | |
| if GCGC_or_ATAT_p.match(seq): | |
| print "GCGC or ATAT in any position in the sequence -> %s" % (seq) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment