Created
June 5, 2013 13:23
-
-
Save Slater-Victoroff/5713821 to your computer and use it in GitHub Desktop.
General case synonym matching using nltk.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.corpus import wordnet | |
from nltk.stem.wordnet import WordNetLemmatizer | |
import itertools | |
def Synonym_Checker(word1, word2): | |
"""Checks if word1 and word2 and synonyms. Returns True if they are, otherwise False""" | |
equivalence = WordNetLemmatizer() | |
word1 = equivalence.lemmatize(word1) | |
word2 = equivalence.lemmatize(word2) | |
word1_synonyms = wordnet.synsets(word1) | |
word2_synonyms = wordnet.synsets(word2) | |
scores = [i.wup_similarity(j) for i, j in list(itertools.product(word1_synonyms, word2_synonyms))] | |
max_index = scores.index(max(scores)) | |
best_match = (max_index/len(word1_synonyms), max_index % len(word1_synonyms)-1) | |
word1_set = word1_synonyms[best_match[0]].lemma_names | |
word2_set = word2_synonyms[best_match[1]].lemma_names | |
match = False | |
match = [match or word in word2_set for word in word1_set][0] | |
return match | |
print Synonym_Checker("tomato", "Lycopersicon_esculentum") |
best_match should contain integers but often it contains floats so the code fails when you call word1_synonyms[best_match[0]] or word2_synonyms[best_match[0]].
When I run this code I get this error:
Traceback (most recent call last):
File "/home/shanika//Lemmatizer.py", line 76, in <module>
print Synonym_Checker("tomato", "Lycopersicon_esculentum")
File "/home/shanika/Lemmatizer.py", line 72, in Synonym_Checker
match = [match or word in word2_set for word in word1_set][0]
TypeError: 'instancemethod' object is not iterable
Change
word1_set = word1_synonyms[best_match[0]].lemma_names
word2_set = word2_synonyms[best_match[1]].lemma_names
To
word1_set = word1_synonyms[int(best_match[0])].lemma_names()
word2_set = word2_synonyms[int(best_match[1])].lemma_names()
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
word1_set = word1_synonyms[best_match[0]].lemma_names
should beword1_set = word1_synonyms[best_match[0]].lemma_names()
Notice the parens at the end()