Created
April 5, 2020 14:35
-
-
Save vikeshsingh37/6487665e9e12ff008bd155df79f3e4b1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import nltk | |
from nltk.corpus import wordnet as wn | |
from snorkel.augmentation import transformation_function | |
nltk.download("wordnet", quiet=True) | |
def get_synonyms(word): | |
"""Get the synonyms of word from Wordnet.""" | |
lemmas = set().union(*[s.lemmas() for s in wn.synsets(word)]) | |
return list(set(l.name().lower().replace("_", " ") for l in lemmas) - {word}) | |
@transformation_function() | |
def tf_replace_word_with_synonym(x): | |
"""Try to replace a random word with a synonym.""" | |
words = x.text.lower().split() | |
idx = random.choice(range(len(words))) | |
synonyms = get_synonyms(words[idx]) | |
if len(synonyms) > 0: | |
x.text = " ".join(words[:idx] + [synonyms[0]] + words[idx + 1 :]) | |
return x |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment