chahuja · May 9, 2017 03:17
diff --git a/pytorch-char.py b/pytorch-char.py
 import numpy as np
 import torch
 from torch.autograd import Variable
 from tqdm import tqdm

 num_unrolling, batch_size, vocab_size = 100, 100, 65
 num_layers, rnn_size, NUM_EPOCHS = 2, 418, 100

 train = torch.Tensor(np.random.rand(batch_size*10, num_unrolling, vocab_size))
 y_t = torch.LongTensor(np.random.randint(low=0, high=vocab_size, size=(batch_size*10, num_unrolling)))

 class lang_model(torch.nn.Module):
  def __init__(self, vocab_size, batch_size, hidden_state, num_layers):
    super(lang_model,self).__init__()
    self.vocab_size, self.batch_size, self.hidden_state = vocab_size, batch_size, hidden_state
    self.num_layers = num_layers
    self.model = torch.nn.LSTM(input_size=vocab_size, hidden_size=hidden_state, num_layers=num_layers, batch_first=True)
    self.predict = torch.nn.Linear(hidden_state,vocab_size)

  def forward(self,x):
    h, _ = self.model(x, None)
    y = self.predict(h.contiguous().view(-1,self.hidden_state))
    return y    

 model = lang_model(vocab_size=vocab_size, batch_size=batch_size, hidden_state=rnn_size, num_layers=num_layers)
 model = model.cuda()

 criterion = torch.nn.CrossEntropyLoss() 
 optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

 indices = range(0,train.size(0)+1,batch_size)
 for i in tqdm(range(NUM_EPOCHS)):
  running_loss = 0.0
  for i, (start,end) in enumerate(zip(indices[:-1],indices[1:])):
    inputs, labels = torch.autograd.Variable(train[start:end,:,:]).cuda(), torch.autograd.Variable(y_t[start:end,:]).cuda()
    
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels.view(-1))    
    loss.backward()        
    running_loss += loss.data[0]
    optimizer.step()
  tqdm.write('Loss:%f'%(running_loss/(i+1)))
	import numpy as np
	import torch
	from torch.autograd import Variable
	from tqdm import tqdm

	num_unrolling, batch_size, vocab_size = 100, 100, 65
	num_layers, rnn_size, NUM_EPOCHS = 2, 418, 100

	train = torch.Tensor(np.random.rand(batch_size*10, num_unrolling, vocab_size))
	y_t = torch.LongTensor(np.random.randint(low=0, high=vocab_size, size=(batch_size*10, num_unrolling)))

	class lang_model(torch.nn.Module):
	def __init__(self, vocab_size, batch_size, hidden_state, num_layers):
	super(lang_model,self).__init__()
	self.vocab_size, self.batch_size, self.hidden_state = vocab_size, batch_size, hidden_state
	self.num_layers = num_layers
	self.model = torch.nn.LSTM(input_size=vocab_size, hidden_size=hidden_state, num_layers=num_layers, batch_first=True)
	self.predict = torch.nn.Linear(hidden_state,vocab_size)

	def forward(self,x):
	h, _ = self.model(x, None)
	y = self.predict(h.contiguous().view(-1,self.hidden_state))
	return y

	model = lang_model(vocab_size=vocab_size, batch_size=batch_size, hidden_state=rnn_size, num_layers=num_layers)
	model = model.cuda()

	criterion = torch.nn.CrossEntropyLoss()
	optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

	indices = range(0,train.size(0)+1,batch_size)
	for i in tqdm(range(NUM_EPOCHS)):
	running_loss = 0.0
	for i, (start,end) in enumerate(zip(indices[:-1],indices[1:])):
	inputs, labels = torch.autograd.Variable(train[start:end,:,:]).cuda(), torch.autograd.Variable(y_t[start:end,:]).cuda()

	optimizer.zero_grad()
	outputs = model(inputs)
	loss = criterion(outputs, labels.view(-1))
	loss.backward()
	running_loss += loss.data[0]
	optimizer.step()
	tqdm.write('Loss:%f'%(running_loss/(i+1)))