Skip to content

Instantly share code, notes, and snippets.

@Swarchal
Created March 18, 2016 12:17
Show Gist options
  • Save Swarchal/c4ea4a1e2092218dfead to your computer and use it in GitHub Desktop.
Save Swarchal/c4ea4a1e2092218dfead to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# Open reading frames"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from Bio.Seq import Seq\n",
"from Bio import SeqIO\n",
"from Bio.Alphabet import generic_rna\n",
"import re\n",
"\n",
"def get_seq(path):\n",
" 'get single sequence from fasta file'\n",
" fasta = SeqIO.parse(open(path), \"fasta\")\n",
" for i in fasta: s = str(i.seq)\n",
" return s\n",
"\n",
"def get_orf(s):\n",
" \"\"\"\n",
" Given a DNA sequence, return\n",
" 6 possible ORFS as a list\n",
" \"\"\"\n",
" revc = lambda s: str(Seq(s).reverse_complement())\n",
" out = []\n",
" out.extend((s, s[1:], s[2:]))\n",
" revc_s = revc(s)\n",
" out.extend((revc_s, revc_s[1:], revc_s[2:])) \n",
" assert len(out) == 6\n",
" return out\n",
" \n",
"def translate(seq):\n",
" 'nucleotides -> aa'\n",
" out = Seq(str(seq), generic_rna).translate(to_stop=True)\n",
" return str(out)\n",
"\n",
"def trim_m(s):\n",
" \"\"\"\n",
" trim aa sequence before start aa,\n",
" if no M is present, return None\n",
" \"\"\"\n",
" start_aa = 'M'\n",
" if start_aa not in s:\n",
" return None\n",
" else:\n",
" return s[s.index(s):]\n",
"\n",
"def split_short(s):\n",
" \"\"\" e.g:\n",
" in : 'MAAMBBMCC'\n",
" out: ['MAAMBBMCC, MBBMCC, MCC]\n",
" \"\"\"\n",
" o = ['M' + i for i in s.split('M')[1:]]\n",
" new = []\n",
" new.append(''.join(o))\n",
" for i in range(1, len(o)):\n",
" new.append(''.join(o[i:len(o)]))\n",
" return new"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['MGMTPRLGLESLLE', 'MTPRLGLESLLE', 'M', 'MLLGSFRLIPKETLIQVAGSSPCNLS']\n"
]
}
],
"source": [
"path_test = 'rosalind_orf_test.txt'\n",
"\n",
"def open_reading_frame(path):\n",
" seq = get_seq(path)\n",
" orfs = get_orf(seq)\n",
" trans = [translate(i) for i in orfs]\n",
" trimd = [trim_m(j) for j in trans]\n",
" no_none = [x for x in trimd if x is not None]\n",
" out = [split_short(k) for k in no_none]\n",
" flatten_out = [v for s in out for v in s] #WTF python\n",
" return flatten_out\n",
"\n",
"\n",
"print open_reading_frame(path_test)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment