Skip to content

Instantly share code, notes, and snippets.

@TApicella
Created May 9, 2017 18:29
Show Gist options
  • Save TApicella/fd7a5ba8e48a36ffac5f3385695082c2 to your computer and use it in GitHub Desktop.
Save TApicella/fd7a5ba8e48a36ffac5f3385695082c2 to your computer and use it in GitHub Desktop.
RosettaCode JaroDistance created by tapicella - https://repl.it/Hous/36
'''
http://rosettacode.org/wiki/Jaro_distance
d = 0 if no matching characters
otherwise d = (1/3) * (m/len(s1) + m/len(s2) + (m-(transpose/2))/m )
characters match if distance >= (max(len(s1), len(s2))/2)-1
if distance != 0, counts as a transpose
'''
def JaroDistance(s1, s2):
l1 = len(s1)
l2 = len(s2)
chars1 = list(s1)
chars2 = list(s2)
dist = (max(l1, l2)//2)-1
m = 0
t = 0
#Find matches
matches1 = []
matches2 = []
for i in range(min(l1, l2)+dist): #check only characters in longer string that are in range
min_index = max(0, i-dist)
max_index_1 = min(l1, (i+dist+1))
max_index_2 = min(l2, (i+dist+1))
if i<l1 and chars1[i] in chars2[min_index:max_index_2]:
matches1.append(chars1[i])
if i<l2 and chars2[i] in chars1[min_index:max_index_1]:
matches2.append(chars2[i])
m = len(matches1)
#Find transposes
for j in range(m):
for k in range(m):
if matches1[j]==matches2[k]:
matches1[j] = "T1"
matches2[k] = "T2"
if j!=k:
t+=1
#Symmetric transposes i.e. AB and BA should only count once. Compare to ABC CAB.
if matches1[k]==matches2[j]:
matches1[k] = "T1"
matches2[j] = "T2"
print("L1: %d, L2: %d, m: %d, t: %d" % (l1, l2, m, t))
if m==0:
jdist = 0
else:
jdist = (1/3) * ((m/l1)+(m/l2)+((m-t)/m))
return round(jdist, 5)
print("Jaro Distance for %s and %s is %s" % ("DWAYNE", "DUANE", JaroDistance("DWAYNE", "DUANE")))
print("Jaro Distance for %s and %s is %s" % ("MARTHA", "MARHTA", JaroDistance("MARTHA", "MARHTA")))
print("Jaro Distance for %s and %s is %s" % ("DIXON", "DICKSONX", JaroDistance("DIXON", "DICKSONX")))
print("Jaro Distance for %s and %s is %s" % ("JELLYFISH", "SMELLYFISH", JaroDistance("JELLYFISH", "SMELLYFISH")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment