Last active
December 27, 2015 08:29
-
-
Save marcwebbie/7297141 to your computer and use it in GitHub Desktop.
testing performance of filtering list for best match
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from timeit import timeit | |
from difflib import SequenceMatcher | |
import difflib | |
import re | |
word_list = ["balla", "baby"] | |
song_list = [ | |
"Give Em Some Mo", | |
"Fall'n (Feat. G.I.B.)", | |
"Balla Baby", | |
"Jackpot The Pimp Pt. 2 (Skit)", | |
"Leave Wit Me (Feat. R. Kelly & Ziggy)", | |
"Make That Ass Talk (Feat. Ziggy)", | |
"I Do", | |
"We Clubbin", | |
"We Do (Feat. Bun B)", | |
"Wurr Da Git It Gurlz At (Feat. G.I.B.)", | |
"Bring Da Beef (Feat. G.I.B.)", | |
"Powerballin' (Outro)", | |
"Balla Baby (Remix) (Feat. Lil' Flip & Boozie)", | |
"What Up Wit It (Feat. G.I.B.)", | |
"Don't Really Care" | |
] | |
def get_best_match_dict(): | |
result_dict = {} | |
for song in song_list: | |
match_count = 0 | |
for word in word_list: | |
if word in song.lower().split(): | |
match_count += 1 | |
if match_count: | |
result_dict[song] = match_count - len(song.split()) | |
import operator | |
return max(result_dict.items(), key=operator.itemgetter(1))[0] | |
def get_best_match_list(): | |
result_list = [] | |
for song in song_list: | |
match_count = 0 | |
for word in word_list: | |
if word in song.lower().split(): | |
match_count += 1 | |
if match_count: | |
result_list.append((match_count - len(song.split()), song)) | |
return max(result_list)[1] | |
def get_best_match_regex(): | |
return max((len(re.findall('|'.join(word_list), song.lower())) - len(song.split()), song) | |
for song in song_list) | |
def get_best_match_difflib(): | |
wlist = ' '.join(word_list) | |
return max((s for s in song_list), | |
key=lambda x: SequenceMatcher(None, wlist, x.lower()).ratio()) | |
def get_best_match_difflib_close_match(): | |
wlist = ' '.join(word_list) | |
return difflib.get_close_matches(wlist, song_list)[0] | |
fstr = "Function: {}, Time: {}, result: {}" | |
print(fstr.format( | |
'get_best_match_dict()', | |
timeit('get_best_match_dict()', | |
setup='from __main__ import get_best_match_dict', number=1000), | |
get_best_match_dict() | |
)) | |
print(fstr.format( | |
'get_best_match_list()', | |
timeit('get_best_match_list()', | |
setup='from __main__ import get_best_match_list', number=1000), | |
get_best_match_list() | |
)) | |
print(fstr.format( | |
'max + list comprehension', | |
timeit( | |
'max([(len([w for w in word_list if w in song.lower().split()]), song) for song in song_list], key=lambda x: x[0] - len(x[1].split()))', | |
setup='from __main__ import word_list, song_list', number=1000), | |
get_best_match_list() | |
)) | |
print(fstr.format( | |
'max + list comprehension', | |
timeit( | |
'max((len([w for w in word_list if w in song.lower().split()]) - len(song.split()), song) for song in song_list)', | |
setup='from __main__ import word_list, song_list', number=1000), | |
get_best_match_list() | |
)) | |
print(fstr.format( | |
'get_best_match_regex()', | |
timeit('get_best_match_regex()', | |
setup='from __main__ import get_best_match_regex', number=1000), | |
get_best_match_list() | |
)) | |
print(fstr.format( | |
'get_best_match_difflib()', | |
timeit('get_best_match_difflib()', | |
setup='from __main__ import get_best_match_difflib', number=1000), | |
get_best_match_list() | |
)) | |
print(fstr.format( | |
'get_best_match_difflib_close_match()', | |
timeit('get_best_match_difflib_close_match()', | |
setup='from __main__ import get_best_match_difflib_close_match', number=1000), | |
get_best_match_list() | |
)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment