Created
March 18, 2016 12:17
-
-
Save Swarchal/c4ea4a1e2092218dfead to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"# Open reading frames" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from Bio.Seq import Seq\n", | |
"from Bio import SeqIO\n", | |
"from Bio.Alphabet import generic_rna\n", | |
"import re\n", | |
"\n", | |
"def get_seq(path):\n", | |
" 'get single sequence from fasta file'\n", | |
" fasta = SeqIO.parse(open(path), \"fasta\")\n", | |
" for i in fasta: s = str(i.seq)\n", | |
" return s\n", | |
"\n", | |
"def get_orf(s):\n", | |
" \"\"\"\n", | |
" Given a DNA sequence, return\n", | |
" 6 possible ORFS as a list\n", | |
" \"\"\"\n", | |
" revc = lambda s: str(Seq(s).reverse_complement())\n", | |
" out = []\n", | |
" out.extend((s, s[1:], s[2:]))\n", | |
" revc_s = revc(s)\n", | |
" out.extend((revc_s, revc_s[1:], revc_s[2:])) \n", | |
" assert len(out) == 6\n", | |
" return out\n", | |
" \n", | |
"def translate(seq):\n", | |
" 'nucleotides -> aa'\n", | |
" out = Seq(str(seq), generic_rna).translate(to_stop=True)\n", | |
" return str(out)\n", | |
"\n", | |
"def trim_m(s):\n", | |
" \"\"\"\n", | |
" trim aa sequence before start aa,\n", | |
" if no M is present, return None\n", | |
" \"\"\"\n", | |
" start_aa = 'M'\n", | |
" if start_aa not in s:\n", | |
" return None\n", | |
" else:\n", | |
" return s[s.index(s):]\n", | |
"\n", | |
"def split_short(s):\n", | |
" \"\"\" e.g:\n", | |
" in : 'MAAMBBMCC'\n", | |
" out: ['MAAMBBMCC, MBBMCC, MCC]\n", | |
" \"\"\"\n", | |
" o = ['M' + i for i in s.split('M')[1:]]\n", | |
" new = []\n", | |
" new.append(''.join(o))\n", | |
" for i in range(1, len(o)):\n", | |
" new.append(''.join(o[i:len(o)]))\n", | |
" return new" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['MGMTPRLGLESLLE', 'MTPRLGLESLLE', 'M', 'MLLGSFRLIPKETLIQVAGSSPCNLS']\n" | |
] | |
} | |
], | |
"source": [ | |
"path_test = 'rosalind_orf_test.txt'\n", | |
"\n", | |
"def open_reading_frame(path):\n", | |
" seq = get_seq(path)\n", | |
" orfs = get_orf(seq)\n", | |
" trans = [translate(i) for i in orfs]\n", | |
" trimd = [trim_m(j) for j in trans]\n", | |
" no_none = [x for x in trimd if x is not None]\n", | |
" out = [split_short(k) for k in no_none]\n", | |
" flatten_out = [v for s in out for v in s] #WTF python\n", | |
" return flatten_out\n", | |
"\n", | |
"\n", | |
"print open_reading_frame(path_test)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment