Created
January 18, 2025 13:13
-
-
Save YektaDev/c4b2b631f7d456c19118002dcf2b0e68 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# ---------------------------------------------------------------------------------------------------------------------- | |
# Implementation of the Needleman-Wunsch Algorithm with Variable Gap Penalty + Breast Cancer Sample | |
# ---------------------------------------------------------------------------------------------------------------------- | |
import numpy as np | |
def nw(x: str, y: str, match: int = 3, mismatch: int = -3, gap_start: int = -5, gap_extend: int = -3): | |
nx = len(x) | |
ny = len(y) | |
optimal = np.zeros((nx + 1, ny + 1)) | |
# Init first column | |
optimal[0][0] = 0 | |
for i in range(1, nx + 1): | |
optimal[i][0] = gap_start + (i - 1) * gap_extend | |
# Init first row | |
for j in range(1, ny + 1): | |
optimal[0][j] = gap_start + (j - 1) * gap_extend | |
ptrs = np.zeros((nx + 1, ny + 1)) | |
ptrs[:, 0] = 3 | |
ptrs[0, :] = 4 | |
for i in range(nx): | |
for j in range(ny): | |
t0 = optimal[i, j] + (match if x[i] == y[j] else mismatch) | |
# Calculate t1 (gap in x) | |
if ptrs[i, j + 1] in [3, 5, 7, 9]: | |
t1 = optimal[i, j + 1] + gap_extend | |
else: | |
t1 = optimal[i, j + 1] + gap_start | |
# Calculate t2 (gap in y) | |
if ptrs[i + 1, j] in [4, 6, 7, 9]: | |
t2 = optimal[i + 1, j] + gap_extend | |
else: | |
t2 = optimal[i + 1, j] + gap_start | |
tmax = max(t0, t1, t2) | |
optimal[i + 1, j + 1] = tmax | |
# Update pointers | |
if t0 == tmax: | |
ptrs[i + 1, j + 1] += 2 | |
if t1 == tmax: | |
ptrs[i + 1, j + 1] += 3 | |
if t2 == tmax: | |
ptrs[i + 1, j + 1] += 4 | |
# Find an optimal alignment | |
i = nx | |
j = ny | |
rx = [] | |
ry = [] | |
while i > 0 or j > 0: | |
p = ptrs[i, j] | |
if p in [2, 5, 6, 9]: | |
rx.append(x[i - 1]) | |
ry.append(y[j - 1]) | |
i -= 1 | |
j -= 1 | |
elif p in [3, 5, 7, 9]: | |
rx.append(x[i - 1]) | |
ry.append('-') | |
i -= 1 | |
else: | |
rx.append('-') | |
ry.append(y[j - 1]) | |
j -= 1 | |
rx = ''.join(rx[::-1]) | |
ry = ''.join(ry[::-1]) | |
return '\n'.join([rx, ry, "Score: " + str(optimal[nx, ny])]) | |
# Random sub-samples of Breast Cancer Genes - From: https://www.ncbi.nlm.nih.gov/ | |
BRCA1 = "AAATTTAGCTTATCTGGTATAAAAATCATACGAGAAGCACTGTTAAATGTAAAATGGTATTTGGCTTTCT" | |
BRCA2 = "AAATTTATTCCCTGTAATAAAGCATCAAAAAGCAAAGTACCTGTTATATATTATCTCAGCATGACATGGA" | |
print(nw(BRCA1, BRCA2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment