Skip to content

Instantly share code, notes, and snippets.

@AlJohri
Created April 7, 2018 06:19
Show Gist options
  • Select an option

  • Save AlJohri/c6b825198f4400847ede847eb23ba34d to your computer and use it in GitHub Desktop.

Select an option

Save AlJohri/c6b825198f4400847ede847eb23ba34d to your computer and use it in GitHub Desktop.
# encoding: utf-8
import io
import torch
import json
import numpy as np
vocab = torch.load('data/vocab.pt')
np.savez_compressed(file='data/embedding.npz', embedding=vocab.embed)
with open('data/word2id.json', 'w') as f:
json.dump(vocab.word2id, f)
with open('data/id2word.json', 'w') as f:
json.dump(vocab.id2word, f)
test = torch.load('data/test.pt')
with io.open('data/test.json', 'w', encoding='utf8') as f:
for row in test.examples:
row = {k.decode('latin-1').encode('utf-8'):v.decode('latin-1').encode('utf-8') for k,v in row.items()}
f.write(unicode(json.dumps(row, ensure_ascii=False), 'UTF-8') + "\n")
train = torch.load('data/train.pt')
with io.open('data/train.json', 'w', encoding='utf8') as f:
for row in train.examples:
row = {k.decode('latin-1').encode('utf-8'):v.decode('latin-1').encode('utf-8') for k,v in row.items()}
f.write(unicode(json.dumps(row, ensure_ascii=False), 'UTF-8') + "\n")
val = torch.load('data/val.pt')
with io.open('data/val.json', 'w', encoding='utf8') as f:
for row in val.examples:
row = {k.decode('latin-1').encode('utf-8'):v.decode('latin-1').encode('utf-8') for k,v in row.items()}
f.write(unicode(json.dumps(row, ensure_ascii=False), 'UTF-8') + "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment