Last active
December 29, 2019 05:46
-
-
Save maxwellmckinnon/944e8634b351f3e81d78d1d1fb3dfee9 to your computer and use it in GitHub Desktop.
Obscure analogy generator from word embeddings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def random_analogies(): | |
while(1): | |
r = lambda: random.randint(0,len(embeddings)-1) | |
x = list(embeddings.keys())[r()] | |
y = list(embeddings.keys())[r()] | |
z = list(embeddings.keys())[r()] | |
restart = False | |
for w in [x, y, z]: | |
if not w.isalpha(): | |
restart = True | |
if restart: | |
continue | |
vx = embeddings[x] | |
vy = embeddings[y] | |
vz = embeddings[z] | |
vector = vx - vy + vz # SOooo cool! :) | |
word, cosine_sim = get_nearest(vector, embeddings)[0] | |
if not word.isalpha(): | |
continue | |
if cosine_sim > 1e-4: | |
continue | |
if word not in {x, y, z}: | |
break | |
print(x, "is to", y, "as", z, "is to", word, 'cosine sim:', cosine_sim) | |
return word | |
def x_is_to_y_as_z_is_to(x, y, z, embeddings): | |
vx = embeddings[x.lower()] | |
vy = embeddings[y.lower()] | |
vz = embeddings[z.lower()] | |
vector = vy - vx + vz | |
word, cosine_sim = get_nearest(vector, embeddings)[0] | |
print(x, "is to", y, "as", z, "is to", word, 'cosine sim:', cosine_sim) | |
return word |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment