reuben · November 7, 2016 12:47
diff --git a/input.py b/input.py
 import tensorflow as tf
 import numpy as np
 import os

 from util.audio import audiofile_to_input_vector
 from util.text import *
 from glob import glob
 from threading import Thread
 from Queue import Queue

 all_txt_files = glob("./data/ldc93s1/*.txt")

 num_mfcc_features = 26
 num_context = 5

 def text_to_char_array(original):
    # Create list of sentence's words w/spaces replaced by ''
    result = original.replace(" '", "") # TODO: Deal with this properly
    result = result.replace("'", "")    # TODO: Deal with this properly
    result = result.replace(' ', '  ')
    result = result.split(' ')

    # Tokenize words into letters adding in SPACE_TOKEN where required
    result = np.hstack([SPACE_TOKEN if xt == '' else list(xt) for xt in result])

    # Map characters into indicies
    result = np.asarray([SPACE_INDEX if xt == SPACE_TOKEN else ord(xt) - FIRST_INDEX for xt in result])

    # Add result to results
    return result

 def data_iterator():
    for file in all_txt_files:
        with open(file, "r") as fin:
            y = ' '.join(fin.read().strip().lower().split(' ')[2:]).replace('.', '')
        wav_file = os.path.splitext(file)[0] + ".wav"
        x = audiofile_to_input_vector(wav_file, num_mfcc_features, num_context)
        x_length = len(x)
        yield x, x_length, text_to_char_array(y), len(y)

 x = tf.placeholder(tf.float32, [None, num_mfcc_features + (2 * num_mfcc_features * num_context)])
 x_length = tf.placeholder(tf.int32, [])
 y = tf.placeholder(tf.int32, [None,])
 y_length = tf.placeholder(tf.int32, [])

 s = tf.InteractiveSession()

 queue = tf.PaddingFIFOQueue(shapes=[[None, num_mfcc_features + (2 * num_mfcc_features * num_context)], [], [None,], []],
                            dtypes=[tf.float32, tf.int32, tf.int32, tf.int32],
                            capacity=4)
 enqueue_op = queue.enqueue([x, x_length, y, y_length])

 def fill_queue_thread(session):
    while True:
        for data_x, data_x_length, data_y, data_y_length in data_iterator():
            session.run(enqueue_op, feed_dict={x: data_x,
                                               x_length: data_x_length,
                                               y: data_y,
                                               y_length: data_y_length})
            
 thread = Thread(target=fill_queue_thread, args=(s,))
 thread.daemon = True
 thread.start()

 batch_size = 2
 assert len(all_txt_files) % batch_size == 0

 batch_x, batch_x_lengths, batch_y, batch_y_lengths = queue.dequeue_many(batch_size)

 # batch_y = tf.SparseTensor(batch_y_indices, batch_y_values, batch_y_shape)

 print(s.run(batch_x))
 print(s.run(batch_x_lengths))
 print(s.run(batch_y))

 def ctc_label_dense_to_sparse(labels, label_lengths):
    label_shape = tf.shape(labels)
    num_batches_tns = tf.pack([label_shape[0]])
    max_num_labels_tns = tf.pack([label_shape[1]])
    def range_less_than(previous_state, current_input):
        return tf.expand_dims(tf.range(label_shape[1]), 0) < current_input

    init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool)
    dense_mask = tf.scan(range_less_than, label_lengths , initializer=init, 
         parallel_iterations=1)
    dense_mask = dense_mask[ :, 0, : ]

    label_array = tf.reshape(tf.tile(tf.range(0, label_shape[1]), num_batches_tns),
          label_shape)
    label_ind = tf.boolean_mask(label_array, dense_mask)

    batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(0,  label_shape[0]), max_num_labels_tns),
          tf.reverse(label_shape,[True])))
    batch_ind = tf.boolean_mask(batch_array, dense_mask)

    indices = tf.transpose(tf.reshape(tf.concat(0, [batch_ind, label_ind]), [2,-1]))
    vals_sparse = tf.gather_nd(labels, indices)
    return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape))

 sparse_labels = ctc_label_dense_to_sparse(batch_y, batch_y_lengths)
 print(sparse_labels)
 print(s.run(sparse_labels))

 # batch_x, batch_x_lengths, sparse_labels are the inputs to the network (this may require switching to a dynamic RNN)
	import tensorflow as tf
	import numpy as np
	import os

	from util.audio import audiofile_to_input_vector
	from util.text import *
	from glob import glob
	from threading import Thread
	from Queue import Queue

	all_txt_files = glob("./data/ldc93s1/*.txt")

	num_mfcc_features = 26
	num_context = 5

	def text_to_char_array(original):
	# Create list of sentence's words w/spaces replaced by ''
	result = original.replace(" '", "") # TODO: Deal with this properly
	result = result.replace("'", "") # TODO: Deal with this properly
	result = result.replace(' ', ' ')
	result = result.split(' ')

	# Tokenize words into letters adding in SPACE_TOKEN where required
	result = np.hstack([SPACE_TOKEN if xt == '' else list(xt) for xt in result])

	# Map characters into indicies
	result = np.asarray([SPACE_INDEX if xt == SPACE_TOKEN else ord(xt) - FIRST_INDEX for xt in result])

	# Add result to results
	return result

	def data_iterator():
	for file in all_txt_files:
	with open(file, "r") as fin:
	y = ' '.join(fin.read().strip().lower().split(' ')[2:]).replace('.', '')
	wav_file = os.path.splitext(file)[0] + ".wav"
	x = audiofile_to_input_vector(wav_file, num_mfcc_features, num_context)
	x_length = len(x)
	yield x, x_length, text_to_char_array(y), len(y)

	x = tf.placeholder(tf.float32, [None, num_mfcc_features + (2 * num_mfcc_features * num_context)])
	x_length = tf.placeholder(tf.int32, [])
	y = tf.placeholder(tf.int32, [None,])
	y_length = tf.placeholder(tf.int32, [])

	s = tf.InteractiveSession()

	queue = tf.PaddingFIFOQueue(shapes=[[None, num_mfcc_features + (2 * num_mfcc_features * num_context)], [], [None,], []],
	dtypes=[tf.float32, tf.int32, tf.int32, tf.int32],
	capacity=4)
	enqueue_op = queue.enqueue([x, x_length, y, y_length])

	def fill_queue_thread(session):
	while True:
	for data_x, data_x_length, data_y, data_y_length in data_iterator():
	session.run(enqueue_op, feed_dict={x: data_x,
	x_length: data_x_length,
	y: data_y,
	y_length: data_y_length})

	thread = Thread(target=fill_queue_thread, args=(s,))
	thread.daemon = True
	thread.start()

	batch_size = 2
	assert len(all_txt_files) % batch_size == 0

	batch_x, batch_x_lengths, batch_y, batch_y_lengths = queue.dequeue_many(batch_size)

	# batch_y = tf.SparseTensor(batch_y_indices, batch_y_values, batch_y_shape)

	print(s.run(batch_x))
	print(s.run(batch_x_lengths))
	print(s.run(batch_y))

	def ctc_label_dense_to_sparse(labels, label_lengths):
	label_shape = tf.shape(labels)
	num_batches_tns = tf.pack([label_shape[0]])
	max_num_labels_tns = tf.pack([label_shape[1]])
	def range_less_than(previous_state, current_input):
	return tf.expand_dims(tf.range(label_shape[1]), 0) < current_input

	init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool)
	dense_mask = tf.scan(range_less_than, label_lengths , initializer=init,
	parallel_iterations=1)
	dense_mask = dense_mask[ :, 0, : ]

	label_array = tf.reshape(tf.tile(tf.range(0, label_shape[1]), num_batches_tns),
	label_shape)
	label_ind = tf.boolean_mask(label_array, dense_mask)

	batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns),
	tf.reverse(label_shape,[True])))
	batch_ind = tf.boolean_mask(batch_array, dense_mask)

	indices = tf.transpose(tf.reshape(tf.concat(0, [batch_ind, label_ind]), [2,-1]))
	vals_sparse = tf.gather_nd(labels, indices)
	return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape))

	sparse_labels = ctc_label_dense_to_sparse(batch_y, batch_y_lengths)
	print(sparse_labels)
	print(s.run(sparse_labels))

	# batch_x, batch_x_lengths, sparse_labels are the inputs to the network (this may require switching to a dynamic RNN)
No results found