Skip to content

Instantly share code, notes, and snippets.

@YektaDev
Created January 18, 2025 13:13
Show Gist options
  • Save YektaDev/c4b2b631f7d456c19118002dcf2b0e68 to your computer and use it in GitHub Desktop.
Save YektaDev/c4b2b631f7d456c19118002dcf2b0e68 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# ----------------------------------------------------------------------------------------------------------------------
# Implementation of the Needleman-Wunsch Algorithm with Variable Gap Penalty + Breast Cancer Sample
# ----------------------------------------------------------------------------------------------------------------------
import numpy as np
def nw(x: str, y: str, match: int = 3, mismatch: int = -3, gap_start: int = -5, gap_extend: int = -3):
nx = len(x)
ny = len(y)
optimal = np.zeros((nx + 1, ny + 1))
# Init first column
optimal[0][0] = 0
for i in range(1, nx + 1):
optimal[i][0] = gap_start + (i - 1) * gap_extend
# Init first row
for j in range(1, ny + 1):
optimal[0][j] = gap_start + (j - 1) * gap_extend
ptrs = np.zeros((nx + 1, ny + 1))
ptrs[:, 0] = 3
ptrs[0, :] = 4
for i in range(nx):
for j in range(ny):
t0 = optimal[i, j] + (match if x[i] == y[j] else mismatch)
# Calculate t1 (gap in x)
if ptrs[i, j + 1] in [3, 5, 7, 9]:
t1 = optimal[i, j + 1] + gap_extend
else:
t1 = optimal[i, j + 1] + gap_start
# Calculate t2 (gap in y)
if ptrs[i + 1, j] in [4, 6, 7, 9]:
t2 = optimal[i + 1, j] + gap_extend
else:
t2 = optimal[i + 1, j] + gap_start
tmax = max(t0, t1, t2)
optimal[i + 1, j + 1] = tmax
# Update pointers
if t0 == tmax:
ptrs[i + 1, j + 1] += 2
if t1 == tmax:
ptrs[i + 1, j + 1] += 3
if t2 == tmax:
ptrs[i + 1, j + 1] += 4
# Find an optimal alignment
i = nx
j = ny
rx = []
ry = []
while i > 0 or j > 0:
p = ptrs[i, j]
if p in [2, 5, 6, 9]:
rx.append(x[i - 1])
ry.append(y[j - 1])
i -= 1
j -= 1
elif p in [3, 5, 7, 9]:
rx.append(x[i - 1])
ry.append('-')
i -= 1
else:
rx.append('-')
ry.append(y[j - 1])
j -= 1
rx = ''.join(rx[::-1])
ry = ''.join(ry[::-1])
return '\n'.join([rx, ry, "Score: " + str(optimal[nx, ny])])
# Random sub-samples of Breast Cancer Genes - From: https://www.ncbi.nlm.nih.gov/
BRCA1 = "AAATTTAGCTTATCTGGTATAAAAATCATACGAGAAGCACTGTTAAATGTAAAATGGTATTTGGCTTTCT"
BRCA2 = "AAATTTATTCCCTGTAATAAAGCATCAAAAAGCAAAGTACCTGTTATATATTATCTCAGCATGACATGGA"
print(nw(BRCA1, BRCA2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment