Created
November 7, 2016 12:47
-
-
Save reuben/889462a3cd7bfe6aa422d7c5860c99c8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import tensorflow as tf | |
| import numpy as np | |
| import os | |
| from util.audio import audiofile_to_input_vector | |
| from util.text import * | |
| from glob import glob | |
| from threading import Thread | |
| from Queue import Queue | |
| all_txt_files = glob("./data/ldc93s1/*.txt") | |
| num_mfcc_features = 26 | |
| num_context = 5 | |
| def text_to_char_array(original): | |
| # Create list of sentence's words w/spaces replaced by '' | |
| result = original.replace(" '", "") # TODO: Deal with this properly | |
| result = result.replace("'", "") # TODO: Deal with this properly | |
| result = result.replace(' ', ' ') | |
| result = result.split(' ') | |
| # Tokenize words into letters adding in SPACE_TOKEN where required | |
| result = np.hstack([SPACE_TOKEN if xt == '' else list(xt) for xt in result]) | |
| # Map characters into indicies | |
| result = np.asarray([SPACE_INDEX if xt == SPACE_TOKEN else ord(xt) - FIRST_INDEX for xt in result]) | |
| # Add result to results | |
| return result | |
| def data_iterator(): | |
| for file in all_txt_files: | |
| with open(file, "r") as fin: | |
| y = ' '.join(fin.read().strip().lower().split(' ')[2:]).replace('.', '') | |
| wav_file = os.path.splitext(file)[0] + ".wav" | |
| x = audiofile_to_input_vector(wav_file, num_mfcc_features, num_context) | |
| x_length = len(x) | |
| yield x, x_length, text_to_char_array(y), len(y) | |
| x = tf.placeholder(tf.float32, [None, num_mfcc_features + (2 * num_mfcc_features * num_context)]) | |
| x_length = tf.placeholder(tf.int32, []) | |
| y = tf.placeholder(tf.int32, [None,]) | |
| y_length = tf.placeholder(tf.int32, []) | |
| s = tf.InteractiveSession() | |
| queue = tf.PaddingFIFOQueue(shapes=[[None, num_mfcc_features + (2 * num_mfcc_features * num_context)], [], [None,], []], | |
| dtypes=[tf.float32, tf.int32, tf.int32, tf.int32], | |
| capacity=4) | |
| enqueue_op = queue.enqueue([x, x_length, y, y_length]) | |
| def fill_queue_thread(session): | |
| while True: | |
| for data_x, data_x_length, data_y, data_y_length in data_iterator(): | |
| session.run(enqueue_op, feed_dict={x: data_x, | |
| x_length: data_x_length, | |
| y: data_y, | |
| y_length: data_y_length}) | |
| thread = Thread(target=fill_queue_thread, args=(s,)) | |
| thread.daemon = True | |
| thread.start() | |
| batch_size = 2 | |
| assert len(all_txt_files) % batch_size == 0 | |
| batch_x, batch_x_lengths, batch_y, batch_y_lengths = queue.dequeue_many(batch_size) | |
| # batch_y = tf.SparseTensor(batch_y_indices, batch_y_values, batch_y_shape) | |
| print(s.run(batch_x)) | |
| print(s.run(batch_x_lengths)) | |
| print(s.run(batch_y)) | |
| def ctc_label_dense_to_sparse(labels, label_lengths): | |
| label_shape = tf.shape(labels) | |
| num_batches_tns = tf.pack([label_shape[0]]) | |
| max_num_labels_tns = tf.pack([label_shape[1]]) | |
| def range_less_than(previous_state, current_input): | |
| return tf.expand_dims(tf.range(label_shape[1]), 0) < current_input | |
| init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool) | |
| dense_mask = tf.scan(range_less_than, label_lengths , initializer=init, | |
| parallel_iterations=1) | |
| dense_mask = dense_mask[ :, 0, : ] | |
| label_array = tf.reshape(tf.tile(tf.range(0, label_shape[1]), num_batches_tns), | |
| label_shape) | |
| label_ind = tf.boolean_mask(label_array, dense_mask) | |
| batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), | |
| tf.reverse(label_shape,[True]))) | |
| batch_ind = tf.boolean_mask(batch_array, dense_mask) | |
| indices = tf.transpose(tf.reshape(tf.concat(0, [batch_ind, label_ind]), [2,-1])) | |
| vals_sparse = tf.gather_nd(labels, indices) | |
| return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape)) | |
| sparse_labels = ctc_label_dense_to_sparse(batch_y, batch_y_lengths) | |
| print(sparse_labels) | |
| print(s.run(sparse_labels)) | |
| # batch_x, batch_x_lengths, sparse_labels are the inputs to the network (this may require switching to a dynamic RNN) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment