mzdravkov · October 10, 2022 13:12
diff --git a/needleman-wunsch.py b/needleman-wunsch.py
 import sys
 import pprint
 from collections import defaultdict


 if len(sys.argv) != 3:
    print("Please pass two sequences as separate arguments. E.g. ./program TCAT CGAT")
    exit(1)

 seq1 = sys.argv[1].upper()
 seq2 = sys.argv[2].upper()

 # Matrix to keep the scores in.
 matrix = []

 # Substitution matrix for DNA
 sub_matrix = {
    'A': {
        'A': 10,
        'C': -5,
        'G': 0,
        'T': -5,
    },
    'C': {
        'A': -5,
        'C': 10,
        'G': -5,
        'T': 0,
    },
    'G': {
        'A': 0,
        'C': -5,
        'G': 10,
        'T': -5,
    },
    'T': {
        'A': -5,
        'C': 0,
        'G': -5,
        'T': 10,
    },
 }

 # A map from position to a list of possible previous steps leading to it.
 backsteps = defaultdict(lambda: [])

 GAP_PENALTY = -4

 for i in range(len(seq2) + 1):
    matrix.append([])
    for j in range(len(seq1) + 1):
        # Top-left corner is initialized with score 0
        if i == 0 and j == 0:
            matrix[i].append(0)
            continue

        # First row initialization uses only the values to the left
        if i == 0:
            matrix[i].append(matrix[i][j-1] + GAP_PENALTY)
            backsteps[(i, j)].append((i, j-1))
            continue

        # First column initialization uses only the values above
        if j == 0:
            matrix[i].append(matrix[i-1][j] + GAP_PENALTY)
            backsteps[(i, j)].append((i-1, j))
            continue
        
        sub_score = matrix[i-1][j-1] + sub_matrix[seq2[i-1]][seq1[j-1]]
        seq1_gap_score = matrix[i-1][j] + GAP_PENALTY
        seq2_gap_score = matrix[i][j-1] + GAP_PENALTY

        max_score = max(sub_score, seq1_gap_score, seq2_gap_score)
        matrix[i].append(max_score)

        if max_score == sub_score:
            backsteps[(i, j)].append((i-1, j-1))
        if max_score == seq1_gap_score:
            backsteps[(i, j)].append((i-1, j))
        if max_score == seq2_gap_score:
            backsteps[(i, j)].append((i, j-1))

 # Backtrack

 # We start from the bottom-right corner of the matrix and look for possible
 # routes that lead to it.
 start = (len(seq2), len(seq1))

 def backtrack(pos):
    """Recursively finds all possible alternative (equivalent) paths
    leading to a specific position."""
    if pos == (0, 0):
        return [[(0, 0)]]

    paths = []
    for possible_prev_step in backsteps[pos]:
        for possible_backtrack in backtrack(possible_prev_step):
            path = possible_backtrack + [pos]
            paths.append(path)

    return paths


 print("Score matrix:")
 print('\n'.join(' '.join('{0: >5}'.format(str(x)) for x in row) for row in matrix))


 print("\nAlternative backsteps:")
 pprint.pprint(backsteps)

 print("\nAlternative paths:")
 alternative_paths = backtrack(start)
 pprint.pprint(alternative_paths)

 print("\nAlternative alignments:")
 for path in alternative_paths:
    print('=======================')
    align1, align2 = "", ""
    seq1_counter = 0
    seq2_counter = 0
    for i in range(len(path) - 1):
        ri, ci = path[i]
        rn, cn = path[i+1]

        seq1_diff = cn - ci
        seq2_diff = rn - ri

        if seq1_diff == 0:
            align1 += '-'
        else:
            align1 += seq1[seq1_counter]
            seq1_counter += 1

        if seq2_diff == 0:
            align2 += '-'
        else:
            align2 += seq2[seq2_counter]
            seq2_counter += 1

    print(align1)
    print(align2)
	import sys
	import pprint
	from collections import defaultdict


	if len(sys.argv) != 3:
	print("Please pass two sequences as separate arguments. E.g. ./program TCAT CGAT")
	exit(1)

	seq1 = sys.argv[1].upper()
	seq2 = sys.argv[2].upper()

	# Matrix to keep the scores in.
	matrix = []

	# Substitution matrix for DNA
	sub_matrix = {
	'A': {
	'A': 10,
	'C': -5,
	'G': 0,
	'T': -5,
	},
	'C': {
	'A': -5,
	'C': 10,
	'G': -5,
	'T': 0,
	},
	'G': {
	'A': 0,
	'C': -5,
	'G': 10,
	'T': -5,
	},
	'T': {
	'A': -5,
	'C': 0,
	'G': -5,
	'T': 10,
	},
	}

	# A map from position to a list of possible previous steps leading to it.
	backsteps = defaultdict(lambda: [])

	GAP_PENALTY = -4

	for i in range(len(seq2) + 1):
	matrix.append([])
	for j in range(len(seq1) + 1):
	# Top-left corner is initialized with score 0
	if i == 0 and j == 0:
	matrix[i].append(0)
	continue

	# First row initialization uses only the values to the left
	if i == 0:
	matrix[i].append(matrix[i][j-1] + GAP_PENALTY)
	backsteps[(i, j)].append((i, j-1))
	continue

	# First column initialization uses only the values above
	if j == 0:
	matrix[i].append(matrix[i-1][j] + GAP_PENALTY)
	backsteps[(i, j)].append((i-1, j))
	continue

	sub_score = matrix[i-1][j-1] + sub_matrix[seq2[i-1]][seq1[j-1]]
	seq1_gap_score = matrix[i-1][j] + GAP_PENALTY
	seq2_gap_score = matrix[i][j-1] + GAP_PENALTY

	max_score = max(sub_score, seq1_gap_score, seq2_gap_score)
	matrix[i].append(max_score)

	if max_score == sub_score:
	backsteps[(i, j)].append((i-1, j-1))
	if max_score == seq1_gap_score:
	backsteps[(i, j)].append((i-1, j))
	if max_score == seq2_gap_score:
	backsteps[(i, j)].append((i, j-1))

	# Backtrack

	# We start from the bottom-right corner of the matrix and look for possible
	# routes that lead to it.
	start = (len(seq2), len(seq1))

	def backtrack(pos):
	"""Recursively finds all possible alternative (equivalent) paths
	leading to a specific position."""
	if pos == (0, 0):
	return [[(0, 0)]]

	paths = []
	for possible_prev_step in backsteps[pos]:
	for possible_backtrack in backtrack(possible_prev_step):
	path = possible_backtrack + [pos]
	paths.append(path)

	return paths


	print("Score matrix:")
	print('\n'.join(' '.join('{0: >5}'.format(str(x)) for x in row) for row in matrix))


	print("\nAlternative backsteps:")
	pprint.pprint(backsteps)

	print("\nAlternative paths:")
	alternative_paths = backtrack(start)
	pprint.pprint(alternative_paths)

	print("\nAlternative alignments:")
	for path in alternative_paths:
	print('=======================')
	align1, align2 = "", ""
	seq1_counter = 0
	seq2_counter = 0
	for i in range(len(path) - 1):
	ri, ci = path[i]
	rn, cn = path[i+1]

	seq1_diff = cn - ci
	seq2_diff = rn - ri

	if seq1_diff == 0:
	align1 += '-'
	else:
	align1 += seq1[seq1_counter]
	seq1_counter += 1

	if seq2_diff == 0:
	align2 += '-'
	else:
	align2 += seq2[seq2_counter]
	seq2_counter += 1

	print(align1)
	print(align2)