Skip to content

Instantly share code, notes, and snippets.

@chilampoon
Created May 1, 2021 21:22
Show Gist options
  • Save chilampoon/ac3e7b473773708fc4a3053479e22fa1 to your computer and use it in GitHub Desktop.
Save chilampoon/ac3e7b473773708fc4a3053479e22fa1 to your computer and use it in GitHub Desktop.
2-state Hidden Markov Model
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2-state Hidden Markov Model\n",
"_Chi-Lam Poon_\n",
"\n",
"This program implements a 2-state HMM for detecting G+C-rich regions in the Vibrio cholerae IEC224 chromosome II sequence. Conceptually, state 1 in the HMM would correspond to the more frequent 'A+T rich' state, whereas state 2 would correspond to the less frequent G+C rich state.\n",
"\n",
"The starting parameter values should be as follows:\n",
"\n",
"- Transition probabilities a_11 = .999, a_12 = .001, a_21 = .01, a_22 =\n",
".99.\n",
"- Initiation probabilities for each state (i.e. the transition probabilities from the 'begin' state into state 1 or 2) should be .996 for state 1, and .004 for state 2; these should be held fixed throughout the Viterbi training\n",
"- Emission probabilities (which should also be held fixed) are \n",
" 1. e_A = e_T = .291, e_G = e_C = .209 for state1\n",
" 2. e_A = e_T = .169, e_G = e_C = .331 for state2"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import random \n",
"\n",
"class Edge(object):\n",
" def __init__(self, start, end):\n",
" self.start = start\n",
" self.end = end\n",
" \n",
" def __str__(self):\n",
" return str(self.start) + \"->\" + str(self.end)\n",
"\n",
" \n",
"class Node(object):\n",
" '''equals to state; stateName,{emissions}'''\n",
" def __init__(self, name):\n",
" self.name = name\n",
" self.edges = []\n",
" \n",
" def AddEdge(self, edge):\n",
" print(\"Adding to \" + self.name)\n",
" self.edges.append(edge)\n",
" \n",
" def __eq__(self, other):\n",
" return self.name == other.name\n",
" \n",
" def __hash__(self):\n",
" return hash(self.name)\n",
"\n",
" \n",
"class Transition(Edge):\n",
" '''Connect between states, = transition'''\n",
" def __init__(self, start, end, prob):\n",
" Edge.__init__(self, start, end)\n",
" self.prob = prob\n",
"\n",
"\n",
"class State(Node):\n",
" def __init__(self, name, emissions, initial = False):\n",
" Node.__init__(self, name)\n",
" self.emissions = emissions\n",
" self.is_initial = initial\n",
" \n",
" def GetEmission(self):\n",
" r = random.random()\n",
" current_prob = 0.0\n",
" for state, prob in self.emissions.items():\n",
" current_prob += prob\n",
" if current_prob > r:\n",
" return state\n",
" return None\n",
" \n",
" def GetTransition(self):\n",
" r = random.random()\n",
" current_prob = 0.0\n",
" for edge in self.edges:\n",
" current_prob += edge.prob\n",
" if current_prob > r:\n",
" return edge.end\n",
" return None\n",
"\n",
" \n",
"class Graph(object):\n",
" def __init__(self):\n",
" self.nodes = {}\n",
" self.edges = {}\n",
" \n",
" def add_node(self, node):\n",
" self.nodes[node] = node\n",
" self.edges[node] = []\n",
" \n",
" def add_edge(self, edge):\n",
" self.nodes[edge.start].AddEdge(edge)\n",
" if edge.start not in self.edges.keys():\n",
" self.edges[edge.start] = []\n",
" self.edges[edge.start].append(edge) \n",
" \n",
" def get_edges(self, node):\n",
" return self.edges[node]\n",
" \n",
" def __str__(self):\n",
" return \"Nodes: \" + str(self.nodes) + \"\\n\" + \"Edges: \" + str(self.edges) "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from math import log\n",
"\n",
" \n",
"class HMM(Graph):\n",
" def __init__(self):\n",
" Graph.__init__(self)\n",
" self.current_state = None\n",
" \n",
" def MakeStep(self):\n",
" self.MakeTransition()\n",
" self.MakeEmission()\n",
" \n",
" def MakeEmission(self):\n",
" print(self.current_state.GetEmission())\n",
" \n",
" def MakeTransition(self):\n",
" new_state = self.nodes[self.current_state.GetTransition()]\n",
" print(\"From \" + self.current_state.name + \" to \" + new_state.name)\n",
" self.current_state = new_state\n",
" \n",
" def RandomWalk(self, start_node, length):\n",
" self.current_state = self.nodes[start_node]\n",
" for i in range(length):\n",
" self.MakeStep()\n",
" \n",
" def Viterbi(self, genome):\n",
" matrix = {}\n",
" backtrack = {}\n",
" # initiation\n",
" for name, state in self.nodes.items():\n",
" if not state.is_initial:\n",
" matrix[name] = [float('-inf') for i in range(len(genome) + 1)]\n",
" backtrack[name] = [\"\" for i in range(len(genome) + 1)]\n",
" \n",
" \n",
" for name, state in self.nodes.items():\n",
" if state.is_initial:\n",
" for edge in state.edges:\n",
" matrix[edge.end][0] = log(edge.prob)\n",
" backtrack[edge.end][0] = \"initial\"\n",
" \n",
" \n",
" for s in range(1, len(genome)+1):\n",
" nt = genome[s-1]\n",
" final_state = ''\n",
" final_state_val = float('-inf')\n",
" for curr_name, curr_state in self.nodes.items():\n",
" if curr_state.is_initial: \n",
" continue \n",
" \n",
" if s == 1:\n",
" matrix[curr_name][s] = mlog(log(curr_state.emissions[nt]), matrix[curr_name][s-1])\n",
" backtrack[curr_name][s] = curr_name.name\n",
" continue\n",
" \n",
" for prev_name, prev_state in self.nodes.items():\n",
" if prev_state.is_initial:\n",
" continue\n",
" \n",
" # get the transition probability \n",
" tmp_a = [edge.prob for edge in prev_state.edges if edge.end == curr_name][0]\n",
" s_curr = mlog(log(curr_state.emissions[nt]), mlog(matrix[prev_name][s-1], log(tmp_a)))\n",
" \n",
" # get the max value\n",
" if s_curr > matrix[curr_name][s]:\n",
" matrix[curr_name][s] = s_curr\n",
" backtrack[curr_name][s] = prev_name.name\n",
" \n",
" if s == len(genome) and matrix[curr_name][s] > final_state_val:\n",
" final_state_val = matrix[curr_name][s]\n",
" final_state = curr_name.name\n",
"\n",
" \n",
" # termination\n",
" states_seq = [final_state]\n",
" \n",
" # backtracking\n",
" for rs in reversed(range(1, len(genome))):\n",
" prev_state = [backtrack[name][rs+1] for name in backtrack if name.name == states_seq[0]][0]\n",
" states_seq.insert(0, prev_state)\n",
" \n",
" self.numOfStates(states_seq)\n",
" segments, switch = self.numOfSegments(states_seq)\n",
" new_probs = self.newProbs(switch)\n",
" \n",
" return states_seq, segments, new_probs\n",
"\n",
"\n",
" def numOfStates(self, states_seq):\n",
" for name, state in self.nodes.items():\n",
" if not state.is_initial:\n",
" print('State ' + name.name + \"'s occurence = \" + str(states_seq.count(name.name)))\n",
" \n",
" def numOfSegments(self, states_seq):\n",
" segments = {}\n",
" seg_start = 0\n",
" switch = {}\n",
" \n",
" state = states_seq[0]\n",
" segments[state] = []\n",
" switch[(state, state)] = 0\n",
"\n",
" for i in range(1, len(states_seq)):\n",
" new_state = states_seq[i]\n",
"\n",
" if new_state != state:\n",
" # transition occurred\n",
" if new_state not in segments:\n",
" segments[new_state] = []\n",
" if (state, new_state) not in switch:\n",
" switch[(state, new_state)] = 0\n",
" if (new_state, new_state) not in switch:\n",
" switch[(new_state, new_state)] = 0\n",
" \n",
" segments[state].append((seg_start, i-1))\n",
" switch[(state, new_state)] += 1\n",
" seg_start = i\n",
" state = new_state\n",
" else:\n",
" switch[(state, new_state)] += 1\n",
" if i == len(states_seq) - 1:\n",
" # end at the end if nothing happened\n",
" segments[state].append((seg_start, i))\n",
" \n",
" # print the info\n",
" for state in segments:\n",
" print('Num. of segments in state ' + state + ' = ' + str(len(segments[state])))\n",
" \n",
" return segments,switch\n",
"\n",
" def newProbs(self, switch):\n",
" # switch is like {('1', '1'): 33, ('1', '2'): 1, ('2', '2'): 37, ('2', '1'): 1}\n",
" probs = {}\n",
" \n",
" for name, state in self.nodes.items():\n",
" if not state.is_initial:\n",
" probs[name.name] = {}\n",
" # count total switches for one node\n",
" node_total = 0\n",
" for next_name, next_state in self.nodes.items():\n",
" if not next_state.is_initial:\n",
" if (name.name, next_name.name) in switch:\n",
" node_total += switch[(name.name, next_name.name)]\n",
" else:\n",
" node_total += 0\n",
" # calculate\n",
" for next_name, next_state in self.nodes.items():\n",
" if not next_state.is_initial:\n",
" if (name.name, next_name.name) in switch: \n",
" prob = switch[(name.name, next_name.name)] / node_total\n",
" print(str((name.name, next_name.name)) + \"'s new prob = \" + str(round(prob, 4)))\n",
" probs[name.name][next_name.name] = prob\n",
" else:\n",
" probs[name.name][next_name.name] = 0\n",
" return probs\n",
"\n",
"def mlog(x, y):\n",
" return x + y"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Add parameters to the HMM model"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Adding to initial\n",
"Adding to initial\n",
"Adding to 1\n",
"Adding to 1\n",
"Adding to 2\n",
"Adding to 2\n"
]
}
],
"source": [
"g = HMM()\n",
"g.add_node(State(\"initial\", {}, True))\n",
"# emissions\n",
"g.add_node(State(\"1\", {\"A\" : 0.291, \"T\" : 0.291, \"G\" : 0.209, \"C\" : 0.209}))\n",
"g.add_node(State(\"2\", {\"A\" : 0.169, \"T\" : 0.169, \"G\" : 0.331, \"C\" : 0.331}))\n",
"# initial transitions\n",
"g.add_edge(Transition(State(\"initial\", {}), State(\"1\", {}), 0.996))\n",
"g.add_edge(Transition(State(\"initial\", {}), State(\"2\", {}), 0.004))\n",
"\n",
"g.add_edge(Transition(State(\"1\", {}), State(\"2\", {}), 0.001))\n",
"g.add_edge(Transition(State(\"1\", {}), State(\"1\", {}), 0.999))\n",
"g.add_edge(Transition(State(\"2\", {}), State(\"1\", {}), 0.01))\n",
"g.add_edge(Transition(State(\"2\", {}), State(\"2\", {}), 0.99))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"State 1's occurence = 35\n",
"State 2's occurence = 36\n",
"Num. of segments in state 1 = 2\n",
"Num. of segments in state 2 = 1\n",
"('1', '1')'s new prob = 0.9706\n",
"('1', '2')'s new prob = 0.0294\n",
"('2', '1')'s new prob = 0.0278\n",
"('2', '2')'s new prob = 0.9722\n"
]
}
],
"source": [
"# have a try\n",
"seq = 'GGAAATTGTTAATATATATGGGGGCGCGCCCACCGGGGCGCGGCTGCGGCGCGCGTTATATATATTCTTTT'\n",
"states_seq, segments, new_probs = g.Viterbi(seq)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'1': {'1': 0.9705882352941176, '2': 0.029411764705882353},\n",
" '2': {'1': 0.027777777777777776, '2': 0.9722222222222222}}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_probs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Parse the fasta\n",
"\n",
"Downloaded from https://www.ncbi.nlm.nih.gov/nuccore/CP003331"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File name: ./IEC224_chr2.fasta\n",
">CP003331.1 Vibrio cholerae IEC224 chromosome II, complete sequence\n"
]
}
],
"source": [
"def parse_fasta(fasta_file):\n",
" file = open(fasta_file, 'r')\n",
" fasta = ''\n",
" \n",
" for line in file:\n",
" if line.startswith('>') == False:\n",
" fasta += line.replace('\\n','')\n",
" else:\n",
" print(line.replace('\\n',''))\n",
" return fasta\n",
"\n",
"\n",
"fasta_file = './IEC224_chr2.fasta'\n",
"print('File name: ' + fasta_file)\n",
"fa = parse_fasta(fasta_file)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Iteration 1...\n",
"State 1's occurence = 1061417\n",
"State 2's occurence = 10719\n",
"Num. of segments in state 1 = 48\n",
"Num. of segments in state 2 = 47\n",
"('1', '1')'s new prob = 1.0\n",
"('1', '2')'s new prob = 0.0\n",
"('2', '1')'s new prob = 0.0044\n",
"('2', '2')'s new prob = 0.9956\n",
"{'1': {'1': 0.9999557195293833, '2': 4.428047061661027e-05}, '2': {'1': 0.004384737382218491, '2': 0.9956152626177815}}\n",
"Adding to initial\n",
"Adding to initial\n",
"Adding to 1\n",
"Adding to 1\n",
"Adding to 2\n",
"Adding to 2\n",
"\n",
"\n",
"Iteration 2...\n",
"State 1's occurence = 1067281\n",
"State 2's occurence = 4855\n",
"Num. of segments in state 1 = 12\n",
"Num. of segments in state 2 = 11\n",
"('1', '1')'s new prob = 1.0\n",
"('1', '2')'s new prob = 0.0\n",
"('2', '1')'s new prob = 0.0023\n",
"('2', '2')'s new prob = 0.9977\n",
"{'1': {'1': 0.9999896934262799, '2': 1.0306573720110937e-05}, '2': {'1': 0.0022657054582904223, '2': 0.9977342945417096}}\n",
"Adding to initial\n",
"Adding to initial\n",
"Adding to 1\n",
"Adding to 1\n",
"Adding to 2\n",
"Adding to 2\n",
"\n",
"\n",
"Iteration 3...\n",
"State 1's occurence = 1068265\n",
"State 2's occurence = 3871\n",
"Num. of segments in state 1 = 8\n",
"Num. of segments in state 2 = 7\n",
"('1', '1')'s new prob = 1.0\n",
"('1', '2')'s new prob = 0.0\n",
"('2', '1')'s new prob = 0.0018\n",
"('2', '2')'s new prob = 0.9982\n",
"{'1': {'1': 0.9999934473126493, '2': 6.5526873506923385e-06}, '2': {'1': 0.0018083182640144665, '2': 0.9981916817359855}}\n",
"Adding to initial\n",
"Adding to initial\n",
"Adding to 1\n",
"Adding to 1\n",
"Adding to 2\n",
"Adding to 2\n",
"\n",
"\n",
"Iteration 4...\n",
"State 1's occurence = 1068256\n",
"State 2's occurence = 3880\n",
"Num. of segments in state 1 = 7\n",
"Num. of segments in state 2 = 6\n",
"('1', '1')'s new prob = 1.0\n",
"('1', '2')'s new prob = 0.0\n",
"('2', '1')'s new prob = 0.0015\n",
"('2', '2')'s new prob = 0.9985\n",
"{'1': {'1': 0.9999943833635228, '2': 5.616636477245601e-06}, '2': {'1': 0.0015463917525773195, '2': 0.9984536082474227}}\n",
"Adding to initial\n",
"Adding to initial\n",
"Adding to 1\n",
"Adding to 1\n",
"Adding to 2\n",
"Adding to 2\n",
"\n",
"\n",
"Iteration 5...\n",
"State 1's occurence = 1068256\n",
"State 2's occurence = 3880\n",
"Num. of segments in state 1 = 7\n",
"Num. of segments in state 2 = 6\n",
"('1', '1')'s new prob = 1.0\n",
"('1', '2')'s new prob = 0.0\n",
"('2', '1')'s new prob = 0.0015\n",
"('2', '2')'s new prob = 0.9985\n",
"{'1': {'1': 0.9999943833635228, '2': 5.616636477245601e-06}, '2': {'1': 0.0015463917525773195, '2': 0.9984536082474227}}\n",
"Adding to initial\n",
"Adding to initial\n",
"Adding to 1\n",
"Adding to 1\n",
"Adding to 2\n",
"Adding to 2\n",
"\n",
"\n",
"Iteration 6...\n",
"State 1's occurence = 1068256\n",
"State 2's occurence = 3880\n",
"Num. of segments in state 1 = 7\n",
"Num. of segments in state 2 = 6\n",
"('1', '1')'s new prob = 1.0\n",
"('1', '2')'s new prob = 0.0\n",
"('2', '1')'s new prob = 0.0015\n",
"('2', '2')'s new prob = 0.9985\n",
"{'1': {'1': 0.9999943833635228, '2': 5.616636477245601e-06}, '2': {'1': 0.0015463917525773195, '2': 0.9984536082474227}}\n",
"Adding to initial\n",
"Adding to initial\n",
"Adding to 1\n",
"Adding to 1\n",
"Adding to 2\n",
"Adding to 2\n",
"\n",
"\n",
"Iteration 7...\n",
"State 1's occurence = 1068256\n",
"State 2's occurence = 3880\n",
"Num. of segments in state 1 = 7\n",
"Num. of segments in state 2 = 6\n",
"('1', '1')'s new prob = 1.0\n",
"('1', '2')'s new prob = 0.0\n",
"('2', '1')'s new prob = 0.0015\n",
"('2', '2')'s new prob = 0.9985\n",
"{'1': {'1': 0.9999943833635228, '2': 5.616636477245601e-06}, '2': {'1': 0.0015463917525773195, '2': 0.9984536082474227}}\n",
"Adding to initial\n",
"Adding to initial\n",
"Adding to 1\n",
"Adding to 1\n",
"Adding to 2\n",
"Adding to 2\n",
"\n",
"\n",
"Iteration 8...\n",
"State 1's occurence = 1068256\n",
"State 2's occurence = 3880\n",
"Num. of segments in state 1 = 7\n",
"Num. of segments in state 2 = 6\n",
"('1', '1')'s new prob = 1.0\n",
"('1', '2')'s new prob = 0.0\n",
"('2', '1')'s new prob = 0.0015\n",
"('2', '2')'s new prob = 0.9985\n",
"{'1': {'1': 0.9999943833635228, '2': 5.616636477245601e-06}, '2': {'1': 0.0015463917525773195, '2': 0.9984536082474227}}\n",
"Adding to initial\n",
"Adding to initial\n",
"Adding to 1\n",
"Adding to 1\n",
"Adding to 2\n",
"Adding to 2\n",
"\n",
"\n",
"Iteration 9...\n",
"State 1's occurence = 1068256\n",
"State 2's occurence = 3880\n",
"Num. of segments in state 1 = 7\n",
"Num. of segments in state 2 = 6\n",
"('1', '1')'s new prob = 1.0\n",
"('1', '2')'s new prob = 0.0\n",
"('2', '1')'s new prob = 0.0015\n",
"('2', '2')'s new prob = 0.9985\n",
"{'1': {'1': 0.9999943833635228, '2': 5.616636477245601e-06}, '2': {'1': 0.0015463917525773195, '2': 0.9984536082474227}}\n",
"Adding to initial\n",
"Adding to initial\n",
"Adding to 1\n",
"Adding to 1\n",
"Adding to 2\n",
"Adding to 2\n",
"\n",
"\n"
]
}
],
"source": [
"for i in range(1, 10):\n",
" print('Iteration ' + str(i) + '...')\n",
" states_seq, segments, new_probs = g.Viterbi(fa)\n",
" print(new_probs)\n",
" \n",
" g = HMM()\n",
" g.add_node(State(\"initial\", {}, True))\n",
" # emissions\n",
" g.add_node(State(\"1\", {\"A\" : 0.291, \"T\" : 0.291, \"G\" : 0.209, \"C\" : 0.209}))\n",
" g.add_node(State(\"2\", {\"A\" : 0.169, \"T\" : 0.169, \"G\" : 0.331, \"C\" : 0.331}))\n",
" # initial transitions\n",
" g.add_edge(Transition(State(\"initial\", {}), State(\"1\", {}), 0.996))\n",
" g.add_edge(Transition(State(\"initial\", {}), State(\"2\", {}), 0.004))\n",
" \n",
" g.add_edge(Transition(State(\"1\", {}), State(\"2\", {}), new_probs[\"1\"][\"2\"]))\n",
" g.add_edge(Transition(State(\"1\", {}), State(\"1\", {}), new_probs[\"1\"][\"1\"]))\n",
" g.add_edge(Transition(State(\"2\", {}), State(\"1\", {}), new_probs[\"2\"][\"1\"]))\n",
" g.add_edge(Transition(State(\"2\", {}), State(\"2\", {}), new_probs[\"2\"][\"2\"]))\n",
" \n",
" print('\\n')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Iteration 10...\n",
"State 1's occurence = 1068256\n",
"State 2's occurence = 3880\n",
"Num. of segments in state 1 = 7\n",
"Num. of segments in state 2 = 6\n",
"('1', '1')'s new prob = 1.0\n",
"('1', '2')'s new prob = 0.0\n",
"('2', '1')'s new prob = 0.0015\n",
"('2', '2')'s new prob = 0.9985\n",
"{'1': {'1': 0.9999943833635228, '2': 5.616636477245601e-06}, '2': {'1': 0.0015463917525773195, '2': 0.9984536082474227}}\n"
]
}
],
"source": [
"# one more time\n",
"for i in range(10, 11):\n",
" print('Iteration ' + str(i) + '...')\n",
" states_seq, segments, new_probs = g.Viterbi(fa)\n",
" print(new_probs)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'1': [(0, 275666),\n",
" (276364, 666684),\n",
" (667662, 669450),\n",
" (670313, 746323),\n",
" (746563, 952633),\n",
" (953264, 1023943),\n",
" (1024419, 1072135)],\n",
" '2': [(275667, 276363),\n",
" (666685, 667661),\n",
" (669451, 670312),\n",
" (746324, 746562),\n",
" (952634, 953263),\n",
" (1023944, 1024418)]}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"segments"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"GCCAACGAGCCAGTGACGATGCGCAGCCCGAAGGCGTGGCGGGGAAGCGGATAGTGAGATCGGCCGCGCTGGATTGTTCAATCCACGGTGTGCTGGAAGGGGCCGAAATCAGCAGCGCCATGCGCTGCTCGCCCAGCGTGAGCCAATAGACACCTGCGGCATACGGCGAGAGAAGATCGCGACTTTCCACTTCGGCGTAATCGGCAGGCGCAAGCTCTGCTGCTAACAAGGTTGCACCTTGCGAAGATGCCAGACGCACCGCCAGCGGCTGGCTGAGGTTTTCGGCGCTCAGCCTCACTTGGTAGCCTTGAAAGGTCTGATTGAACTTATGAATAAAAGTCACCGCAATCGCAAGATCGTTAAGCGATGCACTTGGCAGTGCTTGTCCACATTGGGTTTGACTGAGGATGGCCTCTTTAATTGGTTGCCAAAAAGGCTCGGCTACGGGCGTGGTAGACAGTGTGTGGATGAGCTCGTGCCACGCTTGCGTGGGATGCCCCTCAACCAATAGCTGGTAACTGCTTTGCAGCGGTGTGGTGCGAAACCAAGCATGGGTGGGGCTTGGCGGCGTTTCCGCTATCGCGGCGCCGCTTAGTAATGCGCTGAGTATGCCAAGTCGCGCAATGTGATTGCGAAAAGAGGCGCAGATCATACGGAGGCTCCTTTGAGGCGATAACCGACGCCGCGCAGGGTCTC\n",
"\n",
"GCGGGGCCACTGCGCGTTAACGGCTCTCATGCGCAAGGGGACTATTTGGTTCCGCTGGCCACCACAGAAGCGGCGTTAGTGGCGTCATACCATCGTGGTAGCCAACTCATCACGGCAGCAGGCGGCGCAAGTGCGCTACTGCTCAATGAAGGGGTGACTCGCACACCCGTGTTTGCTTTTCTTTCGCTGGCGCAAGCTGGGCAATTTGTCGGTTGGGTGACCAGCCAATTTGAGCAGATGAAAGAGATAGCGCAGTCCACCACCGCGCACGGCAAACTGAAAGATATTCAGGTCAATATCGAAGGGAATCACGTCTATCTGGTGTTTGAATACACCACCGGCGATGCCAGCGGTCAAAATATGGTGACGATCGCCACTCACGCTGTTTTTGAGTTCATCATGCGCCACTCACCGGTTGCGCCAGTGCAAGCCTTTCTCGATGGCAATCTCTCCGGTGACAAAAAAGCCAATAGCTATACGCTGCGCAGCGTGCGCGGCAAAAAAGTCAGCGCTGAGGTGCATCTCTCTGCCGAGTTAGTCAAAAAGTATCTCCACACCACGCCAGAGCAGATGGTGCAGTTTGGGCAAATGACGACAGTGGGCGGCGCACTGAGCGGCGCGATTGGTGTGAATGCTCATTACGCCAATGCCCTTGCCGCGCTTTATATTGCGTGTGGGCAAGATGCCGCGTGTGTAGCCGAATCCGCAATAGGGATGACGCGCATGGAGATCCACCCTCACGGCGGTTTATACGCCAGCGTCACCCTGCCTAATTTGATGGTCGGAACCGTTGGTGGTGGCACCCATCTCCCTAGCCAACACGCCTGTTTATCGCTGATGGGATTGGCCGGGCAAGGCCACGCGCGAGCCTTAGCTGAAGTGGCCGCAGCCCTCTGTTTAGCTGGTGAATTATCGATCGTCGGGGCATTTTGCGCGGGACATTTTTCCCGTGCGCACCATAAGCTCGCGCGCTGAG\n",
"\n",
"GGTGCGCGCCAGCACCCACGCCAGCACAATGCCTATCGGCAGCAACCACAAGGTCGCATACCCAGCCACTTTTAAACTCAGCGCCAGCGCCGCCCATTCGTAATCGGAGAGACCCATCAATTACTCACTCACCGACTCAAAACCAAAACGTTGCAAGATTTGCTGCGCGGCGTCTGAGCGTAAATACGCCACCCATTCGGCGCTCGCCGCTTTATCATTCAATTGCGCCAGCGGATAGCGGATCGGCTGGTGAGACTGAGCTGAAAATGCCGTCACGATCGTCACTTTATCACTGAGCAGCGCATCGGTTTTGTACACGATGCCAAGCGGCGATTCACCACGCTCGACCAACGCTAGCGCCAAACGCACGTTATTGGTTTGCGCAAGGCGTGATTCCAGCTCAGGCCACACCCCCGCATGTTGTAGCGCTTGCTTGGCATACATGCCCGCCGGCACCGCATCCACTTGCGCAACCGCCAAGCGAGAATCGGCCAAGGCCGTTTGCCAAGCCGCGGCATCTTGCAGCTCAAAACTTGCCACCGGCTGTGCAGTCGGGCGAATCAGCACCAAACTATTCGCCGCCAACGTCACCACCTTGTTCGGTTTCACTAAGCCTTTCTCGACTAAGTAATTCGCCCACTCTTCATTGGCGGAAATAAACAGATCCGCAGGTGCGCCCGCTTCAATTTGACGCGCCAGCGAAGAAGAGCCGCCGTACACCGTCACCAGCTTCACATCATGCTGCTGTGAGTAGTCGGCCACTAACGCATTGACGGCATTGGTCATCGACGAGGCCGCGTAAATGTGCAGTGTCTCTTTCGCGCTCGCCAGCGGCGCGTTAAGCGCCACACACAGCAGGAG\n",
"\n",
"CGCGACGACGGCTATGGCCACGCTCACCACGGTAGCTACCACTGCTCTCACCACTACGGTTGCGCTCAAAACGACGCTCACCTTCACGGAAAGGACGGCCTTCGCCACCACGGCCACGACCACCTTCGCGGCCGCCTTCACGACCACCACGGAAACCGCCTTCACGGTTGCCTTCTGGACGACGACCACCTTCGCGACGTGGACCACCTTCACGACGAGCACCACCACGAGATTCGCG\n",
"\n",
"CGCCCCCGGAGACACACGCAGCGCAGTGCGCTCTGCGCCAATACGAGCGATCACCGCATCCACTACTTCCAAAGCAAAACGCGCCATATTGGCCGGCGTTTCACCGTACTCATCGCTGCGTTGGTTAGAATCAAAATGCAGGAACTGGTCGATGAGATAGCCGTTCGCGCCATGAATCTCGACTCCATCAAAACCCGCCAAGCGTGCATTTTCCGCCGCTTGTGCGTAATCGGCCACCAGTTGCTTGATCTCCGCTTGGCTCGCCGCTTTCGGCACTGTGTACTGCAATTCGCGGCGGCGTGGCACGCTGCCTTCGACACCGAGAGCTGAAGGCGCTAGAACGTATTCACCCGCAAAAAAGGCAGGGTGAGCCACGCGTCCGGTATGCCATAACTGAGCGAAAATCTTACCGCCATTGGCATGCACGGCGTCCGTTACTTTTTTCCAGCCGGCAATTTGCGCTTGCGTAAACAGCCCCGGAGTGTTCGGGTAGCCTTGAGCATCGGGGCGAATAATGGTGGCTTCGGAAATGATCAGCCCAGCATCGGCACGGCGTGCGTAATACGCCACCATGGCATCGGTCGGCACCAAATCGTCATCAGCCATGCAGCGCGTCAGTGGCGCCATCG\n",
"\n",
"GCACACGCAGTGCCGAACAGCGTTGGCCAGCAGAGGCAAAAGCGGAACGCAGCACATCACGCACCACTTGCTCTGGCAGCGCGGTACTGTCGACAATCATGGCGTTTTGACCGCCAGTTTCTGCAATAAATGGTACCGGAGCCGCTTCACGTTGCGCTAACGTTTGGTTAATGCGCTGCGCCGTGGCGGTCGAGCCAGTAAACGCCACACCCGCGATGGCCGGATGAGAAGTGAGTGCGCTGCCGATATCGGCACCGCGTCCCGGCAAAAGTTGGATGGTTCCAGCAGGGAAACCCGCTTCTTGCATCAGCTCAACCGCGCGATACGCGATCAAACTGGTTTGCTCGGCAGGTTTGGCAATCACGGTATTTCCGGCCACCAAGGCGGCACTGATTTGGCCAAGGAAAATCGCCAGCGGGAAGTTCCACGGACTAATACACACAAACACTCCGCGACCTTGGCGGCTGACACGAC\n",
"\n"
]
}
],
"source": [
"for region in segments['2']:\n",
" print(fa[region[0]:region[1]]+'\\n')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment