Skip to content

Instantly share code, notes, and snippets.

@reuben
Created November 7, 2016 12:47
Show Gist options
  • Select an option

  • Save reuben/889462a3cd7bfe6aa422d7c5860c99c8 to your computer and use it in GitHub Desktop.

Select an option

Save reuben/889462a3cd7bfe6aa422d7c5860c99c8 to your computer and use it in GitHub Desktop.
import tensorflow as tf
import numpy as np
import os
from util.audio import audiofile_to_input_vector
from util.text import *
from glob import glob
from threading import Thread
from Queue import Queue
all_txt_files = glob("./data/ldc93s1/*.txt")
num_mfcc_features = 26
num_context = 5
def text_to_char_array(original):
# Create list of sentence's words w/spaces replaced by ''
result = original.replace(" '", "") # TODO: Deal with this properly
result = result.replace("'", "") # TODO: Deal with this properly
result = result.replace(' ', ' ')
result = result.split(' ')
# Tokenize words into letters adding in SPACE_TOKEN where required
result = np.hstack([SPACE_TOKEN if xt == '' else list(xt) for xt in result])
# Map characters into indicies
result = np.asarray([SPACE_INDEX if xt == SPACE_TOKEN else ord(xt) - FIRST_INDEX for xt in result])
# Add result to results
return result
def data_iterator():
for file in all_txt_files:
with open(file, "r") as fin:
y = ' '.join(fin.read().strip().lower().split(' ')[2:]).replace('.', '')
wav_file = os.path.splitext(file)[0] + ".wav"
x = audiofile_to_input_vector(wav_file, num_mfcc_features, num_context)
x_length = len(x)
yield x, x_length, text_to_char_array(y), len(y)
x = tf.placeholder(tf.float32, [None, num_mfcc_features + (2 * num_mfcc_features * num_context)])
x_length = tf.placeholder(tf.int32, [])
y = tf.placeholder(tf.int32, [None,])
y_length = tf.placeholder(tf.int32, [])
s = tf.InteractiveSession()
queue = tf.PaddingFIFOQueue(shapes=[[None, num_mfcc_features + (2 * num_mfcc_features * num_context)], [], [None,], []],
dtypes=[tf.float32, tf.int32, tf.int32, tf.int32],
capacity=4)
enqueue_op = queue.enqueue([x, x_length, y, y_length])
def fill_queue_thread(session):
while True:
for data_x, data_x_length, data_y, data_y_length in data_iterator():
session.run(enqueue_op, feed_dict={x: data_x,
x_length: data_x_length,
y: data_y,
y_length: data_y_length})
thread = Thread(target=fill_queue_thread, args=(s,))
thread.daemon = True
thread.start()
batch_size = 2
assert len(all_txt_files) % batch_size == 0
batch_x, batch_x_lengths, batch_y, batch_y_lengths = queue.dequeue_many(batch_size)
# batch_y = tf.SparseTensor(batch_y_indices, batch_y_values, batch_y_shape)
print(s.run(batch_x))
print(s.run(batch_x_lengths))
print(s.run(batch_y))
def ctc_label_dense_to_sparse(labels, label_lengths):
label_shape = tf.shape(labels)
num_batches_tns = tf.pack([label_shape[0]])
max_num_labels_tns = tf.pack([label_shape[1]])
def range_less_than(previous_state, current_input):
return tf.expand_dims(tf.range(label_shape[1]), 0) < current_input
init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool)
dense_mask = tf.scan(range_less_than, label_lengths , initializer=init,
parallel_iterations=1)
dense_mask = dense_mask[ :, 0, : ]
label_array = tf.reshape(tf.tile(tf.range(0, label_shape[1]), num_batches_tns),
label_shape)
label_ind = tf.boolean_mask(label_array, dense_mask)
batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns),
tf.reverse(label_shape,[True])))
batch_ind = tf.boolean_mask(batch_array, dense_mask)
indices = tf.transpose(tf.reshape(tf.concat(0, [batch_ind, label_ind]), [2,-1]))
vals_sparse = tf.gather_nd(labels, indices)
return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape))
sparse_labels = ctc_label_dense_to_sparse(batch_y, batch_y_lengths)
print(sparse_labels)
print(s.run(sparse_labels))
# batch_x, batch_x_lengths, sparse_labels are the inputs to the network (this may require switching to a dynamic RNN)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment