This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if __name__=="__main__": | |
#make the iterators and next element op | |
next_element, training_init_op, validation_init_op = prepare_dataset_iterators(batch_size=32) | |
... | |
for epoch in range(1000): | |
#Initialize the iterator to consume training data | |
sess.run(training_init_op) | |
while True: | |
#As long as the iterator is not empty |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
from preppy import BibPreppy | |
def expand(x): | |
''' | |
Hack. Because padded_batch doesn't play nice with scalres, so we expand the scalar to a vector of length 1 | |
:param x: | |
:return: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class BibPreppy(Preppy): | |
''' | |
We'll slightly extend to way we right tfrecords to store the id of the book it came from | |
''' | |
def __init__(self,tokenizer_fn): | |
super(BibPreppy,self).__init__(tokenizer_fn) | |
self.book_map ={} | |
def sequence_to_tf_example(self, sequence, book_id): | |
id_list = self.sentance_to_id_list(sequence) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Tools to take a directory of txt files and convert them to TF records | |
''' | |
from collections import defaultdict, Counter | |
import numpy as np | |
import tensorflow as tf | |
PAD = "<PAD>" | |
EOS = "<EOS>" | |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tf.contrib.rnn import GRUCell | |
cell = GruCell() | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
GRU layer implementation orignally taken from https://github.com/ottokart/punctuator2 | |
''' | |
class GRULayer(object): | |
def __init__(self, rng, n_in, n_out, minibatch_size): | |
super(GRULayer, self).__init__() | |
# Notation from: An Empirical Exploration of Recurrent Network Architectures | |
self.n_in = n_in |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
def conv1d(input_, output_size, width, stride): | |
''' | |
:param input_: A tensor of embedded tokens with shape [batch_size,max_length,embedding_size] | |
:param output_size: The number of feature maps we'd like to calculate | |
:param width: The filter width | |
:param stride: The stride | |
:return: A tensor of the concolved input with shape [batch_size,max_length,output_size] | |
''' | |
inputSize = input_.get_shape()[-1] # How many channels on the input (The size of our embedding for instance) |