Skip to content

Instantly share code, notes, and snippets.

@ChrisBeaumont
Created March 13, 2015 16:00
Show Gist options
  • Save ChrisBeaumont/4f2f09665adde0fecbf9 to your computer and use it in GitHub Desktop.
Save ChrisBeaumont/4f2f09665adde0fecbf9 to your computer and use it in GitHub Desktop.
word2vec examples
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from gensim.models import Word2Vec"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"fn = \"freebase-vectors-skipgram1000-en.bin\"\n",
"model = Word2Vec.load_word2vec_format(fn, binary=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Expect 'trip' to be top hit, at 0.723, from Chris' blog"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('/en/summer_vacation', 0.35146796703338623),\n",
" ('/en/assistant_manager', 0.34348905086517334),\n",
" ('/en/annual_leave', 0.3420445919036865),\n",
" ('/en/sick_leave', 0.3194432854652405),\n",
" ('/en/hospital_beds', 0.31834304332733154),\n",
" ('/en/bonus_track', 0.2938608229160309),\n",
" ('/en/part_time', 0.28707462549209595),\n",
" ('/en/day-tripper', 0.2864302694797516),\n",
" ('/en/spring_break', 0.284801185131073),\n",
" ('/en/staycation', 0.28439220786094666)]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.most_similar('/en/vacation')"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'/en/trip' in model.vocab"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('/en/t_i_vs_t_i_p', 0.4389854669570923),\n",
" ('/en/shoulder_lean', 0.4161432087421417),\n",
" ('/en/what_you_know', 0.40737658739089966),\n",
" ('/en/the_adventures_of_b_o_b', 0.4072108864784241),\n",
" ('/en/the_inspiration_thug_motivation_102', 0.40640342235565186),\n",
" ('/en/rubberband_man', 0.4014279246330261),\n",
" ('/en/just_like_you', 0.4010814428329468),\n",
" ('/en/the_red_light_district', 0.3974035680294037),\n",
" ('/en/the_sound_of_revenge', 0.39560556411743164),\n",
" ('/en/confessions', 0.38883811235427856)]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.most_similar(['/en/woman', '/en/king'], ['/en/man'])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"[('/en/king', 0.7387572526931763),\n",
" ('/en/t_i_vs_t_i_p', 0.4389854669570923),\n",
" ('/en/shoulder_lean', 0.41614317893981934),\n",
" ('/en/what_you_know', 0.40737655758857727),\n",
" ('/en/the_adventures_of_b_o_b', 0.4072108864784241),\n",
" ('/en/the_inspiration_thug_motivation_102', 0.40640342235565186),\n",
" ('/en/rubberband_man', 0.4014279246330261),\n",
" ('/en/just_like_you', 0.40108147263526917),\n",
" ('/en/the_red_light_district', 0.3974035680294037),\n",
" ('/en/the_sound_of_revenge', 0.39560556411743164)]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.most_similar([model['/en/woman'] + model['/en/king'] - model['/en/man']])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment