Created
October 11, 2012 14:05
-
-
Save fspot/3872523 to your computer and use it in GitHub Desktop.
autre façon de récupérer le best match
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ################# | |
| file_words = set(extractIdentifiers(file)) | |
| bestIndex = -1 | |
| bestScore = 0 | |
| bestWords = [] | |
| i = -1 | |
| for link in links: | |
| i=i+1 | |
| links_words = set(extractIdentifiers(link.text)) | |
| matching = file_words.intersection(links_words) | |
| if len(matching): | |
| if len(matching)>bestScore: | |
| bestScore = len(matching) | |
| bestIndex = i | |
| bestWords = matching | |
| if bestIndex == -1: | |
| print "\033[31m"+file+" could not be found on OpenSubtitles... \033[00m" | |
| else: | |
| ... | |
| #################### | |
| from operator import itemgetter | |
| from clint.textui import colored | |
| file_words = extractIdentifiers(file) | |
| # On recupere les identifiers de chaque link | |
| links_ids = [extractIdentifiers(link) for link in links] | |
| # Pour chaque identifier on recupere ses intersections | |
| links_matches = [file_words.intersection(linkids) for linkids in links_ids] | |
| # dans un dict, on associe chaque link à ses matches | |
| links_and_matches = {link: matches for (link, matches) in zip(links, links_matches)} | |
| # On peut pas trier un dict donc on trie ses (clef, valeur) en tant que tuples. | |
| # Puis on récupère le dernier (=meilleur) : | |
| (best_link, best_matches) = sorted(links_and_matches.items(), key=itemgetter(1))[-1] | |
| if len(best_matches): | |
| print colored.red("{} could not be found on OpenSubtitles...".format(file)) | |
| else: | |
| ... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment