Created
April 16, 2017 16:17
-
-
Save duyet/26d6436c9157f75d69fa23b7a0d44886 to your computer and use it in GitHub Desktop.
Truyện Kiều W2V - http://blog.duyet.net/2017/04/nlp-truyen-kieu-word2vec.html Raw
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"3258\n" | |
] | |
} | |
], | |
"source": [ | |
"df[\"context\"] = df[\"1gram\"] + df[\"2gram\"]\n", | |
"train_data = df.context.tolist()\n", | |
"print len(train_data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Training gensim model\n", | |
"from gensim.models import Word2Vec\n", | |
"import logging\n", | |
"\n", | |
"logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) \n", | |
"model = Word2Vec(train_data, size=100, window=10, min_count=3, workers=4, sg=1)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [Root]", | |
"language": "python", | |
"name": "Python [Root]" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment