##Training word2vec using gensim
###Train word2vec model
full_text = map(lambda x: x.split(), list(preprocess_text)) # each element is a list of words in sentence
num_features = 500 # Word vector dimensionality
min_word_count = 40 # Minimum word count
num_workers = 4 # Number of threads to run in parallel
context = 10 # Context window size
downsampling = 1e-3 # Downsample setting for frequent words
from gensim.models import word2vec
print "Training model..."
model = word2vec.Word2Vec(full_text, workers=num_workers,
size=num_features, min_count = min_word_count,
window = context, sample = downsampling)
print "Finish training model..."
model.init_sims(replace=True) # if don't plan to train model further
model_name = "pm_500features_40minwords_10context"
model.save(model_name)
###Load trained word2vec model
from gensim.models import word2vec
model = word2vec.Word2Vec.load('pm_500features_40minwords_10context')
# test model
model.most_similar('regression')