Skip to content

Instantly share code, notes, and snippets.

@estasney
Last active October 15, 2018 02:16
Show Gist options
  • Save estasney/2a188ce6b59a4032908cef84bd37aa3e to your computer and use it in GitHub Desktop.
Save estasney/2a188ce6b59a4032908cef84bd37aa3e to your computer and use it in GitHub Desktop.
WordMappings
from cytoolz import groupby
class WordPair(object):
PREFERRED = 'preferred'
OTHERS = 'others'
def __init__(self, preferred, others):
self.preferred = preferred
if isinstance(others, list):
self.others = set(others)
else:
self.others = others
def __call__(self, tokens):
# Scan for word2 in tokens. If present, replace with word1
r = []
for t in tokens:
if t in self.others:
if self.preferred: # Removing Stop words
r.append(self.preferred)
else:
r.append(t)
return r
def __contains__(self, item):
if self.preferred == item:
return True
elif item in self.others:
return True
else:
return False
def word_preference(self, item):
if item == self.preferred:
return self.PREFERRED
elif item in self.others:
return self.OTHERS
else:
return None
class WordMappings(object):
def __init__(self):
self.pairs = []
def add_pair(self, preferred, others):
# Check if preferred word exists in self.pairs
pairs_with_word = list(filter(lambda x: preferred in x, self.pairs))
if not pairs_with_word:
p = self._make_new_pair(preferred, others)
return p
# Further segment by type of relationship
segmented_pairs = groupby(lambda x: x.word_preference(preferred), self.pairs)
# Access the first preferred match
if not WordPair.PREFERRED in segmented_pairs:
p = self._make_new_pair(preferred, others)
return p
target = segmented_pairs[WordPair.PREFERRED][0]
# Ensure others is a set
if not isinstance(others, set):
others = set(others)
target.others.update(others)
return target
def _make_new_pair(self, preferred, others):
new_pair = WordPair(preferred, others)
self.pairs.append(new_pair)
return new_pair
def __call__(self, tokens):
for p in self.pairs:
tokens = p(tokens)
return tokens
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment