Skip to content

Instantly share code, notes, and snippets.

@ludwiktrammer
Last active February 19, 2020 10:01
Show Gist options
  • Save ludwiktrammer/8b9ad680d68b0aac409350576318873f to your computer and use it in GitHub Desktop.
Save ludwiktrammer/8b9ad680d68b0aac409350576318873f to your computer and use it in GitHub Desktop.
professions = [
'director', 'secretary', 'doctor', 'nurse', 'volunteer', 'politician', 'artist', 'scientist',
'teacher', 'professor', 'parent', 'expert', 'veterinarian', 'physician', 'chef', 'cook',
'physicist', 'babysitter', 'plumber', 'unemployed',
]
def describe_closer(woman_rank, man_rank):
if woman_rank == man_rank:
return "Both are equal!"
if woman_rank < man_rank:
return "Woman is {}% closer".format(round((man_rank - woman_rank) * 100 / woman_rank))
return "Man is {}% closer".format(round((woman_rank - man_rank) * 100 / man_rank))
def print_pretty(profession, woman_rank, man_rank, html=False):
html_pattern = '<tr>\n' + '<td>{}</td>' * 4 + '\n</tr>' ''
pattern = '{:<15} | {:10} | {:10} | {}' if not html else html_pattern
print(pattern.format(
profession,
woman_rank,
man_rank,
describe_closer(woman_rank, man_rank)
))
from gensim.models import KeyedVectors
# You can download the model here:
# https://github.com/eyaler/word2vec-slim
# The model was trained on 100 billion words from Google News.
print("Loading the model...")
model = KeyedVectors.load_word2vec_format('./GoogleNews-vectors-negative300-SLIM.bin', binary=True)
print("Model loaded.")
print()
print('{:<15} | {:10} | {:10} | {}'.format(
"profession",
"woman rank",
"man rank",
"comment",
))
print('-' * 60)
for profession in professions:
woman_rank = model.rank(profession, 'woman')
man_rank = model.rank(profession, 'man')
print_pretty(profession, woman_rank, man_rank, html=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment