Created
January 2, 2018 20:27
-
-
Save mayhewsw/5f38ddeb59e154c277bbef7f8ff3d5f9 to your computer and use it in GitHub Desktop.
Codenames clue giver
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
from gensim.models import KeyedVectors | |
# Downloaded from fasttext: https://fasttext.cc/docs/en/english-vectors.html | |
# Converted to word2vec binary format for faster loading (see convert.py) | |
vec = KeyedVectors.load_word2vec_format("~/data/wiki-news-300d-1M.vec.bin", binary=True) | |
from itertools import combinations | |
from nltk.stem import WordNetLemmatizer,PorterStemmer | |
wnl = WordNetLemmatizer() | |
stemmer = PorterStemmer() | |
def isplural(word): | |
lemma = wnl.lemmatize(word, 'n') | |
plural = True if word is not lemma else False | |
return plural | |
def isCap(word): | |
return word != word.lower() | |
def isSame(a,b): | |
return stemmer.stem(a) == stemmer.stem(b) or a != a.lower() or b != b.lower() | |
while True: | |
# words is all the words on your team (red or blue words) | |
words = input("words >> ").strip().split() | |
# give one negative word (usually the black word) | |
negword = input("neg >> ").strip() | |
neg = [negword] | |
print(words) | |
print(neg) | |
bestpair = None | |
bestcand = "" | |
bestscore = -1 | |
num = 2 | |
for p in combinations(words,num): | |
print(p) | |
sim = vec.most_similar(positive=p, negative=neg) | |
for s in sim[:5]: | |
cand = s[0] | |
score = s[1] | |
if isSame(cand, p[0]) or isSame(cand, p[1]): | |
continue | |
print(cand, score) | |
if score > bestscore: | |
bestpair = p | |
bestscore = score | |
print() | |
print(bestpair) | |
sim = vec.most_similar(positive=bestpair, negative=neg) | |
for s in sim[:5]: | |
cand = s[0] | |
if isSame(bestpair[0], cand) or isSame(bestpair[1], cand): | |
continue | |
print(s) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gensim.models.keyedvectors import KeyedVectors | |
fname = "~/data/wiki-news-300d-1M.vec" | |
model = KeyedVectors.load_word2vec_format(fname, binary=False) | |
model.save_word2vec_format(fname + ".bin", binary=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment