Created
March 19, 2014 09:09
-
-
Save suxue/9638109 to your computer and use it in GitHub Desktop.
Calculate Edit Distance
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# vim: set fileencoding=utf-8: | |
import sys | |
import numpy | |
import random | |
class Direction: | |
def __str__(self): | |
if self._dir == 0: | |
return '-' | |
elif self._dir == 1: | |
return '|' | |
elif self._dir == 2: | |
return '\\' | |
elif self._dir == 3: | |
return 'm' | |
else: | |
1/0 | |
def __repr__(self): | |
return self.__str__() | |
def data(self): | |
return self._self | |
class Horizontal(Direction): | |
value = 0 | |
def __init__(self): | |
self._dir = Horizontal.value | |
class Vertical(Direction): | |
value = 1 | |
def __init__(self): | |
self._dir = Vertical.value | |
class Diagonal(Direction): | |
value = 2 | |
def __init__(self): | |
self._dir = Diagonal.value | |
class Match(Direction): | |
value = 3 | |
def __init__(self): | |
self._dir = Match.value | |
Directions = [Horizontal(), Vertical(), Diagonal(), Match()] | |
def default_printer(v): | |
return v.__str__() | |
def print_matrix(m, printer=default_printer): | |
for row in m: | |
for col in row: | |
sys.stdout.write("%3s" % printer(col)) | |
sys.stdout.write("\n") | |
def initialize_distance_matrix(src_len, dest_len): | |
matrix = numpy.zeros([src_len + 1, dest_len + 1], dtype=numpy.int16) | |
for no, row in enumerate(matrix): | |
row[0] = no | |
for x, _ in enumerate(matrix[0]): | |
matrix[0][x] = x | |
return matrix | |
def initialize_action_matrix(src_len, dest_len): | |
actions = numpy.zeros([src_len + 1, dest_len + 1], dtype=numpy.int8) | |
for _, row in enumerate(actions): | |
row[0] = Vertical.value | |
for x, _ in enumerate(actions[0]): | |
actions[0][x] = Horizontal.value | |
actions[0][0] = -1 # no predecessor | |
return actions | |
def compute_edit_distance(src, dest): | |
matrix = initialize_distance_matrix(len(src), len(dest)) | |
actions = initialize_action_matrix(len(src), len(dest)) | |
def get_horizontal_predecessor(y, x): | |
return Horizontal.value, matrix[y][x-1] + 1 | |
def get_vertical_predecessor(y, x): | |
return Vertical.value, matrix[y-1][x] + 1 | |
def get_diagonal_predecessor(y, x): | |
src_char = src[y-1] | |
dest_char = dest[x-1] | |
if src_char == dest_char: | |
return Match.value, matrix[y-1][x-1], | |
else: | |
return Diagonal.value, matrix[y-1][x-1] + 2 | |
# fill the tables | |
for y, row in enumerate(matrix[1:]): | |
y += 1 | |
for x, col in enumerate(row[1:]): | |
x += 1 | |
results = [get_horizontal_predecessor(y, x), | |
get_vertical_predecessor(y, x), | |
get_diagonal_predecessor(y, x)] | |
values = map(lambda v: v[1], results) | |
minval = min(values) | |
# deterministic way | |
# index = values.index(minval) | |
# randomly | |
indexes = [] | |
for i, v in enumerate(values): | |
if v == minval: | |
indexes.append(i) | |
index = random.choice(indexes) | |
direction = results[index][0] | |
matrix[y][x] = minval | |
actions[y][x] = direction | |
print "\nedit distance is", matrix[-1][-1], '\n' | |
print 'DISTANCE MATRIX' | |
print_matrix(matrix) | |
print 'MOVEMENT MATRIX' | |
print_matrix(actions, | |
printer = lambda v: Directions[v] if v >= 0 else 'o') | |
print '' | |
# visualization | |
class Number: | |
def __init__(self, value): | |
self.value = value | |
def increase(self): | |
self.value += 1 | |
def decrease(self): | |
self.value -= 1 | |
y = Number(len(src)) | |
x = Number(len(dest)) | |
action_list = [] | |
def move_up(): | |
# delete from src | |
y.decrease() | |
char = src[y.value] | |
action_list.append(('Delete',)) | |
def move_left(): | |
# append to dest | |
x.decrease() | |
char = dest[x.value] | |
action_list.append(('Append', char,)) | |
pass | |
def move_diagonal(): | |
x.decrease() | |
y.decrease() | |
inchar = dest[x.value] | |
outchar = src[y.value] | |
action_list.append(('Change', outchar, inchar,)) | |
def just_match(): | |
x.decrease() | |
y.decrease() | |
action_list.append(('Match',)) | |
movements = [move_left, move_up, move_diagonal, just_match] | |
while not (x.value == 0 and y.value == 0): | |
action = actions[y.value][x.value] | |
movements[action]() | |
action_list.reverse() | |
string = list(src) | |
char = '' | |
index = 0 | |
red = '\033[31;1m' | |
green = '\033[32;1m' | |
yellow = '\033[33;1m' | |
gray = '\033[38;5;241m' | |
reset = '\033[0;m' | |
print "VISUALIZE EDITING SEQUENCE" | |
while len(action_list) > 0: | |
action = action_list.pop(0) | |
if (action[0] == 'Delete'): | |
before = ''.join(string[:index]) | |
after = ''.join(string[index+1:]) | |
char = string.pop(index) | |
print 'Delete ', before + red + char + gray + after + reset | |
elif (action[0] == 'Append'): | |
before = ''.join(string[:index]) | |
after = ''.join(string[index:]) | |
char = action[1] | |
string.insert(index, char) | |
index = index + 1 | |
print 'Append ', before + green + char + gray + after + reset | |
elif (action[0] == 'Change'): | |
before = ''.join(string[:index]) | |
after = ''.join(string[index+1:]) | |
oldchar = string.pop(index) | |
newchar = action[2] | |
string.insert(index, newchar) | |
index = index + 1 | |
print 'Change ', before + red + oldchar + green + newchar + gray + after + reset | |
elif (action[0] == 'Match'): | |
before = ''.join(string[:index]) | |
after = ''.join(string[index+1:]) | |
char = string[index] | |
index = index + 1 | |
print 'Match ', before + yellow + char + gray + after + reset | |
print '' | |
def main(argv): | |
if len(argv) != 2: | |
sys.stderr.write("Usage: ed.py src dest\n") | |
exit(1) | |
else: | |
compute_edit_distance(argv[0], argv[1]) | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment