Created
February 16, 2021 17:17
-
-
Save gregcaporaso/b7206060986f36737b7826b55096ce8d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"genetic_code = {\n", | |
" # this was adapted from \n", | |
" # https://gitlab.com/RebelCoder/dna-toolset/-/blob/90e8d9249c6dcc82a850aa854da4baabef98ee6f/structures.py\n", | |
" \"GCT\": \"A\", \"GCC\": \"A\", \"GCA\": \"A\", \"GCG\": \"A\",\n", | |
" \"TGT\": \"C\", \"TGC\": \"C\",\n", | |
" \"GAT\": \"D\", \"GAC\": \"D\",\n", | |
" \"GAA\": \"E\", \"GAG\": \"E\",\n", | |
" \"TTT\": \"F\", \"TTC\": \"F\",\n", | |
" \"GGT\": \"G\", \"GGC\": \"G\", \"GGA\": \"G\", \"GGG\": \"G\",\n", | |
" \"CAT\": \"H\", \"CAC\": \"H\",\n", | |
" \"ATA\": \"I\", \"ATT\": \"I\", \"ATC\": \"I\",\n", | |
" \"AAA\": \"K\", \"AAG\": \"K\",\n", | |
" \"TTA\": \"L\", \"TTG\": \"L\", \"CTT\": \"L\", \"CTC\": \"L\", \"CTA\": \"L\", \"CTG\": \"L\",\n", | |
" \"ATG\": \"M\",\n", | |
" \"AAT\": \"N\", \"AAC\": \"N\",\n", | |
" \"CCT\": \"P\", \"CCC\": \"P\", \"CCA\": \"P\", \"CCG\": \"P\",\n", | |
" \"CAA\": \"Q\", \"CAG\": \"Q\",\n", | |
" \"CGT\": \"R\", \"CGC\": \"R\", \"CGA\": \"R\", \"CGG\": \"R\", \"AGA\": \"R\", \"AGG\": \"R\",\n", | |
" \"TCT\": \"S\", \"TCC\": \"S\", \"TCA\": \"S\", \"TCG\": \"S\", \"AGT\": \"S\", \"AGC\": \"S\",\n", | |
" \"ACT\": \"T\", \"ACC\": \"T\", \"ACA\": \"T\", \"ACG\": \"T\",\n", | |
" \"GTT\": \"V\", \"GTC\": \"V\", \"GTA\": \"V\", \"GTG\": \"V\",\n", | |
" \"TGG\": \"W\",\n", | |
" \"TAT\": \"Y\", \"TAC\": \"Y\",\n", | |
" \"TAA\": \"*\", \"TAG\": \"*\", \"TGA\": \"*\"\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"4" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"'TTTTATGATGT'.index('ATG')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[4, 7, 10, 13, 16, 19, 22]" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"list(range(4, 25, 3))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"T\n" | |
] | |
} | |
], | |
"source": [ | |
"test_sequence = 'TTTTATGACCT'\n", | |
"print(test_sequence[10:10+3])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'R'" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"genetic_code['AGA']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"False" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"'AAA' not in 'ACGAAAT'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def translate(sequence, genetic_code):\n", | |
" protein = []\n", | |
" \n", | |
" if 'ATG' not in sequence:\n", | |
" return protein\n", | |
" \n", | |
" first_codon_position = sequence.index('ATG')\n", | |
" \n", | |
" sequence_length = len(sequence)\n", | |
" \n", | |
" for codon_start in range(first_codon_position, sequence_length, 3):\n", | |
" codon = sequence[codon_start:codon_start+3]\n", | |
" if len(codon) == 3:\n", | |
" try:\n", | |
" amino_acid = genetic_code[codon]\n", | |
" except KeyError:\n", | |
" raise KeyError(codon + \" is not present in the genetic code.\")\n", | |
" if amino_acid == '*':\n", | |
" return protein\n", | |
" protein.append(amino_acid)\n", | |
" \n", | |
" return protein" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "KeyError", | |
"evalue": "'ACU is not present in the genetic code.'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-22-02b19746c2c2>\u001b[0m in \u001b[0;36mtranslate\u001b[0;34m(sequence, genetic_code)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mamino_acid\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgenetic_code\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcodon\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mKeyError\u001b[0m: 'ACU'", | |
"\nDuring handling of the above exception, another exception occurred:\n", | |
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-23-a63d615a2bdf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtranslate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"AGGTACGTGGAACGTACGTGACCGATGGACCACACUCATTGAGTGTGTACACACACGTGTGTGTGACACAACAAC\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgenetic_code\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m<ipython-input-22-02b19746c2c2>\u001b[0m in \u001b[0;36mtranslate\u001b[0;34m(sequence, genetic_code)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0mamino_acid\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgenetic_code\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcodon\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcodon\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\" is not present in the genetic code.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 18\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mamino_acid\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'*'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mprotein\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mKeyError\u001b[0m: 'ACU is not present in the genetic code.'" | |
] | |
} | |
], | |
"source": [ | |
"translate(\"AGGTACGTGGAACGTACGTGACCGATGGACCACACUCATTGAGTGTGTACACACACGTGTGTGTGACACAACAAC\", genetic_code)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.11" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment