Last active
August 22, 2025 04:53
-
-
Save kevinah95/f6f9d16ebd17a3a28208471f4e4bb878 to your computer and use it in GitHub Desktop.
Here is the gist to update this question http://stackoverflow.com/q/43691093/4752488
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import matplotlib.pyplot as plt | |
| import matplotlib.ticker as ticker | |
| import numpy as np | |
| from Bio.Seq import Seq | |
| #------------------------------ | |
| seq1 = Seq("GATCCA") | |
| seq2 = Seq("GTGCCT") | |
| match_award = 1 | |
| mismatch_penalty = -1 | |
| gap_penalty = -2 | |
| arrows = None | |
| def match_score(alpha, beta): | |
| if alpha == beta: | |
| return match_award | |
| else: | |
| return mismatch_penalty | |
| def needleman_wunsch(seq1, seq2): | |
| m, n = len(seq2), len(seq1) # length of two sequences | |
| #--------- | |
| dt = np.dtype([('diagonal', np.str, 1), | |
| ('up', np.str, 1), ('left', np.str, 1)]) | |
| """ | |
| Pointer Matrix size: (m + 1) x (n + 1) | |
| Datatype: 3 tuple to save direction of the path | |
| For more info: https://docs.scipy.org/doc/numpy/user/basics.rec.html | |
| """ | |
| pt_mat = np.zeros((m + 1, n + 1), dtype=dt) | |
| #--------- | |
| score_matrix = np.zeros((m + 1, n + 1), dtype=int) # the DP table | |
| # Calculate DynamicProgramming table | |
| for i in range(0, m + 1): | |
| score_matrix[i][0] = gap_penalty * i | |
| pt_mat[i][0]['up'] = 'U' | |
| for j in range(0, n + 1): | |
| score_matrix[0][j] = gap_penalty * j | |
| pt_mat[0][j]['left'] = 'L' | |
| for i in range(1, m + 1): | |
| for j in range(1, n + 1): | |
| diagonal = score_matrix[i - 1][j - 1] + \ | |
| match_score(seq2[i - 1], seq1[j - 1]) | |
| up = score_matrix[i - 1][j] + gap_penalty | |
| left = score_matrix[i][j - 1] + gap_penalty | |
| max_pointer = max(diagonal, up, left) | |
| score_matrix[i][j] = max_pointer | |
| if (diagonal == max_pointer): | |
| pt_mat[i][j]['diagonal'] = 'D' | |
| if (up == max_pointer): | |
| pt_mat[i][j]['up'] = 'U' | |
| if (left == max_pointer): | |
| pt_mat[i][j]['left'] = 'L' | |
| # print(score_matrix) | |
| # print(pt_mat) | |
| # Traceback and compute the alignment | |
| align1, align2 = '', '' | |
| i, j = m, n # start from the bottom right cell | |
| a = np.array([[0, 0, 0, 0]]) | |
| first = True | |
| global arrows | |
| while i > 0 and j > 0: # end toching the top or the left edge | |
| score_current = score_matrix[i][j] | |
| score_diagonal = score_matrix[i - 1][j - 1] | |
| score_up = score_matrix[i][j - 1] | |
| score_left = score_matrix[i - 1][j] | |
| #print("current ",i,j,end='') | |
| a[:, :2] = j, i | |
| if score_current == score_diagonal + match_score(seq2[i - 1], seq1[j - 1]): | |
| align1 += seq2[i - 1] | |
| align2 += seq1[j - 1] | |
| i -= 1 | |
| j -= 1 | |
| elif score_current == score_left + gap_penalty: | |
| align1 += seq2[i - 1] | |
| align2 += '-' | |
| i -= 1 | |
| elif score_current == score_up + gap_penalty: | |
| align1 += '-' | |
| align2 += seq1[j - 1] | |
| j -= 1 | |
| a[:, 2:] = j, i | |
| if first: | |
| # First loop copy a | |
| arrows = np.copy(a) | |
| first = False | |
| else: | |
| # Concatenate origin-target | |
| arrows = np.concatenate((arrows, a), axis=0) | |
| a[:, :2] = a[:, 2:] | |
| # Finish tracing up to the top left cell | |
| while i > 0: | |
| align1 += seq2[i - 1] | |
| align2 += '-' | |
| i -= 1 | |
| a[:, 2:] = j, i | |
| arrows = np.concatenate((arrows, a), axis=0) | |
| while j > 0: | |
| align1 += '-' | |
| align2 += seq1[j - 1] | |
| j -= 1 | |
| a[:, 2:] = j, i | |
| arrows = np.concatenate((arrows, a), axis=0) | |
| return score_matrix, pt_mat | |
| #------------------------------- | |
| plt.rcParams["figure.figsize"] = 4, 5 | |
| param = {"grid.linewidth": 1.6, | |
| "grid.color": "lightgray", | |
| "axes.linewidth": 1.6, | |
| "axes.edgecolor": "lightgray"} | |
| plt.rcParams.update(param) | |
| # Data | |
| headh = seq1 | |
| headv = seq2 | |
| v, pt_mat = needleman_wunsch(seq1, seq2) | |
| # Plot | |
| fig, ax = plt.subplots() | |
| ax.set_xlim(-1.5, v.shape[1] - .5) | |
| ax.set_ylim(-1.5, v.shape[0] - .5) | |
| ax.invert_yaxis() | |
| for i in range(v.shape[0]): | |
| for j in range(v.shape[1]): | |
| ax.text(j, i, v[i, j], ha="center", va="center") | |
| for i, l in enumerate(headh): | |
| ax.text(i + 1, -1, l, ha="center", va="center", fontweight="semibold") | |
| for i, l in enumerate(headv): | |
| ax.text(-1, i + 1, l, ha="center", va="center", fontweight="semibold") | |
| ax.xaxis.set_minor_locator(ticker.FixedLocator( | |
| np.arange(-1.5, v.shape[1] - .5, 1))) | |
| ax.yaxis.set_minor_locator(ticker.FixedLocator( | |
| np.arange(-1.5, v.shape[1] - .5, 1))) | |
| plt.tick_params(axis='both', which='both', bottom='off', top='off', | |
| left="off", right="off", labelbottom='off', labelleft='off') | |
| ax.grid(True, which='minor') | |
| arrowprops = dict(facecolor='blue', alpha=0.5, lw=0, | |
| shrink=0.2, width=2, headwidth=7, headlength=7) | |
| # all paths | |
| for i in range(1, pt_mat.shape[0]): | |
| for j in range(1, pt_mat.shape[1]): | |
| if(pt_mat[i][j]['left'] != ''): | |
| ax.annotate("", xy=(j - 1, i), | |
| xytext=(j, i), arrowprops=arrowprops) | |
| if(pt_mat[i][j]['diagonal'] != ''): | |
| ax.annotate("", xy=(j - 1, i - 1), | |
| xytext=(j, i), arrowprops=arrowprops) | |
| if(pt_mat[i][j]['up'] != ''): | |
| ax.annotate("", xy=(j, i - 1), | |
| xytext=(j, i), arrowprops=arrowprops) | |
| # optimal path | |
| arrowprops.update(facecolor='crimson') | |
| for i in range(arrows.shape[0]): | |
| ax.annotate("", xy=arrows[i, 2:], | |
| xytext=arrows[i, :2], arrowprops=arrowprops) | |
| plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment