Skip to content

Instantly share code, notes, and snippets.

@hsanchez
Forked from dav009/word2vec_to_tsv.py
Created February 25, 2020 21:16
Show Gist options
  • Save hsanchez/349119f160ac75cf6fdb2af23c2d8147 to your computer and use it in GitHub Desktop.
Save hsanchez/349119f160ac75cf6fdb2af23c2d8147 to your computer and use it in GitHub Desktop.
import gensim
import codecs
from gensim.models import Word2Vec
import json
def export_to_file(path_to_model, output_file):
output = codecs.open(output_file, 'w' , 'utf-8')
model = Word2Vec.load_word2vec_format(path_to_model, binary=True)
vocab = model.vocab
for mid in vocab:
#print(model[mid])
print(mid)
vector = list()
for dimension in model[mid]:
vector.append(str(dimension))
#line = { "mid": mid, "vector": vector }
vector_str = ",".join(vector)
line = mid + "\t" + vector_str
#line = json.dumps(line)
output.write(line + "\n")
output.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment