Skip to content

Instantly share code, notes, and snippets.

@xmfbit
Created September 27, 2017 02:38
Show Gist options
  • Save xmfbit/dffc25e0e684d6f3163b34e4c7b3b17c to your computer and use it in GitHub Desktop.
Save xmfbit/dffc25e0e684d6f3163b34e4c7b3b17c to your computer and use it in GitHub Desktop.
Char level RNN generator
import os
class CharDataset(object):
def __init__(self, path):
if not os.path.exists(path):
raise RuntimeError('Cannot open the file: {}'.format(path))
self.raw_data = open(path, 'r').read()
self.chars = list(set(self.raw_data))
self.data_size = len(self.raw_data)
print('There are {} characters in the file'.format(self.data_size))
self.char_size = len(self.chars)
print('There are {} different characters in the file'.format(self.char_size))
self.char_to_idx = {ch: i for i, ch in enumerate(self.chars)}
self.idx_to_char = {i: ch for i, ch in enumerate(self.chars)}
self._encode()
def _encode(self):
self.coded_data = map(lambda x: self.char_to_idx[x], self.raw_data)
print('There are the mapping of char to integer:')
print('='*90)
for k, v in self.char_to_idx.items():
print('{}: {:3d}'.format(k, v))
print('='*90)
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from data import CharDataset
from model import RNNModel
#from tensorboard_logger import configure, log_value
# using dlmc/tensorboard
from tensorboard import SummaryWriter, summary, FileWriter
import numpy as np
#configure('runs/for-linux-kernel')
log_dir = './dmlc-tensorboard-log/04'
summary_writer = SummaryWriter(log_dir)
input_txt = './data/linux-kernel/input.txt'
#input_txt = './data/tinyshakespeare/input.txt'
dataset = CharDataset(input_txt)
data = dataset.coded_data
nchars = dataset.char_size
use_cuda = True
seq_len = 125
def batchfy(bsz):
nbat = len(data) // bsz
narrow_data = data[:nbat*bsz]
# for each batch, we have batch_length chars
batch_length = nbat
tensor_target = torch.Tensor(narrow_data).view(bsz, batch_length).long()
tensor_data = torch.zeros(bsz, batch_length, nchars).float()
for i in xrange(bsz):
for j in xrange(batch_length):
nonzero_idx = narrow_data[i*batch_length+j]
tensor_data[i][j][nonzero_idx] = 1.
if use_cuda:
tensor_data = tensor_data.cuda()
tensor_target = tensor_target.cuda()
return tensor_data, tensor_target
bsz = 64
tensor_data, tensor_target = batchfy(bsz)
def get_batch(i):
avaliable_length = min(seq_len, tensor_data.size(1)-1-i)
end_idx = i + avaliable_length
data = tensor_data[:, i: end_idx, :]
target = tensor_target[:, i+1: end_idx+1]
return data, target
ninp, nout = nchars, nchars
nhid = 100
nlayers = 2
model = RNNModel(nlayers=nlayers, ninp=ninp, nout=nout, nhid=nhid, dropout=0.5)
if use_cuda:
model = model.cuda()
def get_init_hidden(bsz, volatile=False):
h0 = Variable(torch.zeros(nlayers, bsz, nhid).float(), volatile=volatile)
c0 = Variable(torch.zeros(nlayers, bsz, nhid).float(), volatile=volatile)
if use_cuda:
h0, c0 = h0.cuda(), c0.cuda()
return h0, c0
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
step = 0
def train():
global step
h, c = get_init_hidden(bsz)
for idx, i in enumerate(range(0, tensor_data.size(1), seq_len)):
optimizer.zero_grad()
data, target = get_batch(i)
data, target = data.contiguous(), target.contiguous()
target = target.view(target.size(0)*target.size(1))
data, target = Variable(data), Variable(target)
h, c = Variable(h.data), Variable(c.data)
output, (h, c) = model(data, (h, c))
output = output.view(output.size(0)*output.size(1), -1)
loss = criterion(output, target)
loss.backward()
optimizer.step()
#log_value('loss', loss.data[0])
#summary_writer.add_scalar('loss', loss.data[0], global_step=step)
summary_writer.add_scalar('loss', loss.data[0], step)
step += 1
if idx % 100 == 0:
print('idx: {}, loss = {:.4f}'.format(idx, loss.data[0]))
def generate(len=1000):
content = []
print('*'*90)
idx2char = dataset.idx_to_char
input = torch.zeros(1, 1, nchars).float()
idx = np.random.randint(nchars)
input[0, 0, idx] = 1.
content.append(idx2char[idx])
input = Variable(input, volatile=True)
h, c = get_init_hidden(1, True)
if use_cuda:
input = input.cuda()
for i in xrange(len):
out, (h, c) = model(input, (h,c))
out = out.view(-1, nchars)
out = F.softmax(out)
choice = torch.multinomial(out.data.squeeze(), 1)
idx = choice.cpu()[0]
content.append(idx2char[idx])
input.data.fill_(0)
input.data[0, 0, idx] = 1.
print(''.join(content))
print('*'*90)
max_epoch = 50
for epoch in xrange(1, max_epoch+1):
print('='*20 + ' epoch {:03d} '.format(epoch) + '='*20)
train()
if epoch % 1 == 0:
generate()
summary_writer.close()
import torch
import torch.nn as nn
import torch.nn.functional as F
class RNNModel(nn.Module):
def __init__(self, nlayers, ninp, nhid, nout, dropout=None):
super(RNNModel, self).__init__()
if dropout is not None:
rnn_dropout = dropout
else:
rnn_dropout = 0
self.rnn = nn.LSTM(input_size=ninp, hidden_size=nhid,
num_layers=nlayers, batch_first=True,
dropout=rnn_dropout)
self.fc = nn.Linear(nhid, nout)
def forward(self, input, hidden):
# input: N x L x C
out, hidden = self.rnn(input, hidden)
# output: N x L x H
bsz, seq = out.size(0), out.size(1)
out = out.contiguous()
out = out.view(bsz*seq, -1)
out = self.fc(out)
# out: (N x L) x C
out = out.view(bsz, seq, -1)
return out, hidden
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment