Skip to content

Instantly share code, notes, and snippets.

@stucka
Last active May 6, 2016 13:51
Show Gist options
  • Save stucka/c5e1e924b8bc15df39ba82b309bcbd49 to your computer and use it in GitHub Desktop.
Save stucka/c5e1e924b8bc15df39ba82b309bcbd49 to your computer and use it in GitHub Desktop.
Find similar strings
def similar(a, b):
from difflib import SequenceMatcher
return SequenceMatcher(None, a, b).ratio()
def list_match(key, mylist):
mylist = list(set(mylist)) # Get unique values to speed processing
top_score = -1
top_name = ""
for this_name in mylist:
this_score = similar(key, this_name)
if this_score > top_score:
top_score = this_score
top_name = this_name
return (top_name, top_score)
# .. Where filelist contains a list of keys
# .. and people_without_photos contains the list of possible matches
for orphan in filelist:
best_name, best_score = list_match(orphan, people_without_photos)
print("\t\t" + orphan + " " + "\t\t Did you mean this?: " + unicode(best_name).encode("utf-8") + " (" + str(int(round(100*best_score))) + "% match)")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment