Skip to content

Instantly share code, notes, and snippets.

@rakaar
Created April 19, 2020 11:51
Show Gist options
  • Save rakaar/2c8f7a88d8960815ac4afa0f7793f035 to your computer and use it in GitHub Desktop.
Save rakaar/2c8f7a88d8960815ac4afa0f7793f035 to your computer and use it in GitHub Desktop.
Python code to get results on fuzzy search through an list of dictionaries
# We are going to search through this list of dictionaries
list_of_dicts = [
{
"name": "Usual Suspects",
"imdb": '7.0',
"category": "Thriller"
},
{
"name": "Hitman",
"imdb": '6.3',
"category": "Action"
},
{
"name": "Dark Knight",
"imdb": '9.0',
"category": "Adventure"
},
{
"name": "The Help",
"imdb": '8.0',
"category": "Drama"
},
{
"name": "The Choice",
"imdb": '6.2',
"category": "Romance"
},
{
"name": "Colonia",
"imdb": 7,
"category": "Romance"
}
]
# INSTALL THESE PACKAGES
# pip install fuzzywuzzy
# pip install python-Levenshtein
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
# parameters - search query and threshold
query = 'roman'
threshold = 30 # a number which ranges from 0 to 100, adjust it as per your requirement
def return_results(list_of_dicts, query, threshold):
scores = []
for index, item in enumerate(list_of_dicts):
values = list(item.values())
ratios = [fuzz.ratio(str(query), str(value)) for value in values] # ensure both are in string
scores.append({ "index": index, "score": max(ratios)})
filtered_scores = [item for item in scores if item['score'] >= threshold]
sorted_filtered_scores = sorted(filtered_scores, key = lambda k: k['score'], reverse=True)
filtered_list_of_dicts = [ list_of_dicts[item["index"]] for item in sorted_filtered_scores ]
return filtered_list_of_dicts
# Using it
search_results = return_results(list_of_dicts, query, threshold)
print(search_results)
# Output is
#[{'name': 'The Choice', 'imdb': '6.2', 'category': 'Romance'}, {'name': 'Colonia', 'imdb': 7, 'category': 'Romance'}, {'name': 'The Help', 'imdb': '8.0', 'category': 'Drama'}, {'name': 'Hitman', 'imdb': '6.3', 'category': 'Action'}]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment