Skip to content

Instantly share code, notes, and snippets.

@josepdecid
Created June 8, 2020 18:10
Show Gist options
  • Save josepdecid/48974807a83aa07dd541a25074891813 to your computer and use it in GitHub Desktop.
Save josepdecid/48974807a83aa07dd541a25074891813 to your computer and use it in GitHub Desktop.
N-gram distance between two strings
def get_n_grams(s, n):
n_grams = []
i = 0
while i + n <= len(s):
n_grams.append(s[i:i + n])
i += 1
return set(n_grams)
def n_gram_distance(s1, s2, n, case_sensitive=False):
if not case_sensitive:
s1 = s1.lower()
s2 = s2.lower()
n_grams_s1 = get_n_grams(s1, n)
n_grams_s2 = get_n_grams(s2, n)
distance = 2 * len(n_grams_s1.intersection(n_grams_s2)) / (len(n_grams_s1) + len(n_grams_s2))
print(f'{2}-grams')
print(n_grams_s1)
print(n_grams_s2)
print(distance)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment