Skip to content

Instantly share code, notes, and snippets.

@david-batranu
Created November 19, 2018 16:55
Show Gist options
  • Save david-batranu/315012f6fb578ab3a261b224b30169c3 to your computer and use it in GitHub Desktop.
Save david-batranu/315012f6fb578ab3a261b224b30169c3 to your computer and use it in GitHub Desktop.
DEF MAX_STR = 2096
def levenshtein(char* s1, char* s2, int bail_at=5):
cdef int len_s1 = len(s1)
cdef int len_s2 = len(s2)
cdef int v0[MAX_STR]
cdef int v1[MAX_STR]
cdef char c1, c2
cdef int x
cdef int i
cdef int j
cdef int smallest
for x in range(MAX_STR):
v1[x] = v0[x] = 0
for x in range(len_s2 + 1):
v1[x] = x
for i in range(len_s1):
c1 = s1[i]
for x in range(len_s2 + 1):
v0[x] = v1[x]
v1[0] += 1
for j in range(1, len_s2 + 1):
c2 = s2[j - 1]
if c1 == c2:
v1[j] = v0[j - 1]
else:
v1[j] = 1 + min(v0[j - 1], v0[j], v1[j - 1])
smallest = 1000
for j in range(len_s2 + 1):
if v1[j] < smallest:
smallest = v1[j]
if smallest >= bail_at:
return bail_at + 1
return v1[len_s2]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment