MartinThoma · August 29, 2015 14:22
diff --git a/experiment.py b/experiment.py
 import scipy.io
 import lasagne
 import theano
 import theano.tensor as T
 import numpy as np
 import time

 import logging

 logger = logging.getLogger('')
 logger.setLevel(logging.DEBUG)
 fh = logging.FileHandler('experiment.log')
 fh.setLevel(logging.DEBUG)
 ch = logging.StreamHandler()
 ch.setLevel(logging.DEBUG)
 formatter = logging.Formatter('%(message)s')
 ch.setFormatter(formatter)
 fh.setFormatter(formatter)
 logger.addHandler(ch)
 logger.addHandler(fh)

 TRAIN_NC = '../data/train_1_speaker.nc'
 VAL_NC = '../data/val_1_speaker.nc'
 BATCH_SIZE = 50


 def one_hot(labels, n_classes):
    '''
    Converts an array of label integers to a one-hot matrix encoding

    :parameters:
        - labels : np.ndarray, dtype=int
            Array of integer labels, in {0, n_classes - 1}
        - n_classes : int
            Total number of classes

    :returns:
        - one_hot : np.ndarray, dtype=bool, shape=(labels.shape[0], n_classes)
            One-hot matrix of the input
    '''
    one_hot = np.zeros((labels.shape[0], n_classes)).astype(bool)
    one_hot[range(labels.shape[0]), labels] = True
    return one_hot


 def load_netcdf(filename):
    '''
    Loads in data from a netcdf file in rnnlib format

    :parameters:
        - filename : str
            Path to a netcdf file

    :returns:
        - X : list of np.ndarray
            List of time series matrices
        - y : list of np.ndarray
            List of label arrays in one-hot form (see one_hot)
    '''
    with open(filename, 'r') as f:
        netcdf_data = scipy.io.netcdf_file(f).variables

    X = []
    y = []
    n = 0
    for length in netcdf_data['seqLengths'].data:
        X_n = netcdf_data['inputs'].data[n:n + length]
        X.append(X_n.astype(theano.config.floatX))
        y_n = one_hot(netcdf_data['targetClasses'].data[n:n + length],
                      netcdf_data['numTargetClasses'].data)
        y.append(y_n.astype(theano.config.floatX))
        n += length
    return X, y


 def make_batches(X, length, batch_size=BATCH_SIZE):
    '''
    Convert a list of matrices into batches of uniform length

    :parameters:
        - X : list of np.ndarray
            List of matrices
        - length : int
            Desired sequence length.  Smaller sequences will be padded with 0s,
            longer will be truncated.
        - batch_size : int
            Mini-batch size

    :returns:
        - X_batch : np.ndarray
            Tensor of time series matrix batches,
            shape=(n_batches, batch_size, length, n_features)
        - X_mask : np.ndarray
            Mask denoting whether to include each time step of each time series
            matrix
    '''
    n_batches = len(X)//batch_size
    X_batch = np.zeros((n_batches, batch_size, length, X[0].shape[1]),
                       dtype=theano.config.floatX)
    X_mask = np.zeros(X_batch.shape, dtype=np.bool)
    for b in range(n_batches):
        for n in range(batch_size):
            X_m = X[b*batch_size + n]
            X_batch[b, n, :X_m.shape[0]] = X_m[:length]
            X_mask[b, n, :X_m.shape[0]] = 1
    return X_batch, X_mask


 logger.info('Loading data...')
 X_train, y_train = load_netcdf(TRAIN_NC)
 X_train = X_train
 y_train = y_train
 X_val, y_val = load_netcdf(VAL_NC)
 X_val = X_val
 y_val = y_val

 print X_train[0].shape

 # Find the longest sequence
 length = max(max([X.shape[0] for X in X_train]),
             max([X.shape[0] for X in X_val]))
 # Convert to batches of time series of uniform length
 X_train, _ = make_batches(X_train, length)
 y_train, train_mask = make_batches(y_train, length)
 X_val, _ = make_batches(X_val, length)
 y_val, val_mask = make_batches(y_val, length)

 n_epochs = 500
 learning_rate = 10
 momentum = .9
 precompute = False
 l_in = lasagne.layers.InputLayer(shape=(BATCH_SIZE, length, X_val.shape[-1]))
 l_noise = lasagne.layers.GaussianNoiseLayer(l_in, sigma=0.6)

 l_forward_1 = lasagne.layers.LSTMLayer(l_noise, num_units=156, backwards=False, precompute_input=precompute)
 l_backward_1 = lasagne.layers.LSTMLayer(l_noise, num_units=156, backwards=True, precompute_input=precompute)
 l_recurrent_1 = lasagne.layers.ElemwiseSumLayer(
    [l_forward_1, l_backward_1])

 l_forward_2 = lasagne.layers.LSTMLayer(
    l_recurrent_1, num_units=300, backwards=False, precompute_input=precompute)
 l_backward_2 = lasagne.layers.LSTMLayer(
    l_recurrent_1, num_units=300, backwards=True, precompute_input=precompute)
 l_recurrent_2 = lasagne.layers.ElemwiseSumLayer(
    [l_forward_2, l_backward_2])

 l_forward_3 = lasagne.layers.LSTMLayer(
    l_recurrent_2, num_units=102, backwards=False, precompute_input=precompute)
 l_backward_3 = lasagne.layers.LSTMLayer(
    l_recurrent_2, num_units=102, backwards=False, precompute_input=precompute)
 l_recurrent_3 = lasagne.layers.ElemwiseSumLayer(
    [l_forward_3, l_backward_3])

 l_reshape = lasagne.layers.ReshapeLayer(l_recurrent_3, (-1, 102))
 nonlinearity = lasagne.nonlinearities.softmax
 l_rec_out = lasagne.layers.DenseLayer(l_reshape, num_units=y_val.shape[-1],
                                      nonlinearity=nonlinearity)
 l_out = lasagne.layers.ReshapeLayer(l_rec_out,
                                    (BATCH_SIZE, length, y_val.shape[-1]))

 # Cost function is mean squared error
 input = T.tensor3('input')
 target_output = T.tensor3('target_output')
 mask = T.tensor3('mask')


 def cost(output):
    return -T.sum(mask*target_output*T.log(output))/T.sum(mask)

 cost_train = cost(lasagne.layers.get_output(l_out, input, deterministic=False))
 cost_eval = cost(lasagne.layers.get_output(l_out, input, deterministic=True))


 # Use SGD for training
 all_params = lasagne.layers.get_all_params(l_out, trainable=True)
 logger.info('Computing updates...')
 updates = lasagne.updates.momentum(cost_train, all_params,
                                   learning_rate, momentum)
 logger.info('Compiling functions...')
 # Theano functions for training, getting output, and computing cost
 train = theano.function([input, target_output, mask], cost_train,
                        updates=updates)
 y_pred = theano.function([input], lasagne.layers.get_output(l_out, input, deterministic=True))
 compute_cost = theano.function([input, target_output, mask], cost_eval)

 logger.info('Training...')
 # Train the net
 for epoch in range(n_epochs):

    batch_shuffle = np.random.choice(X_train.shape[0], X_train.shape[0], False)
    for sequences, labels, sequence_mask in zip(X_train[batch_shuffle],
                                                y_train[batch_shuffle],
                                                train_mask[batch_shuffle]):

        start_time = time.time()
        sequence_shuffle = np.random.choice(sequences.shape[0],
                                            sequences.shape[0], False)
        train(sequences[sequence_shuffle], labels[sequence_shuffle],
              sequence_mask[sequence_shuffle])
        end_time = time.time()

        print end_time - start_time
    cost_val = sum([compute_cost(X_val_n, y_val_n, mask_n)
                    for X_val_n, y_val_n, mask_n,
                    in zip(X_val, y_val, val_mask)])
    y_val_pred = np.array([y_pred(X_val_n) for X_val_n in X_val])
    y_val_labels = np.argmax(y_val*val_mask, axis=-1).flatten()
    y_val_pred_labels = np.argmax(y_val_pred*val_mask, axis=-1).flatten()
    n_time_steps = np.sum(val_mask)/val_mask.shape[-1]
    error = np.sum(y_val_labels != y_val_pred_labels)/float(n_time_steps)
    logger.info("Epoch {} took {}, cost = {}, error = {}".format(
        epoch, end_time - start_time, cost_val, error))
	import scipy.io
	import lasagne
	import theano
	import theano.tensor as T
	import numpy as np
	import time

	import logging

	logger = logging.getLogger('')
	logger.setLevel(logging.DEBUG)
	fh = logging.FileHandler('experiment.log')
	fh.setLevel(logging.DEBUG)
	ch = logging.StreamHandler()
	ch.setLevel(logging.DEBUG)
	formatter = logging.Formatter('%(message)s')
	ch.setFormatter(formatter)
	fh.setFormatter(formatter)
	logger.addHandler(ch)
	logger.addHandler(fh)

	TRAIN_NC = '../data/train_1_speaker.nc'
	VAL_NC = '../data/val_1_speaker.nc'
	BATCH_SIZE = 50


	def one_hot(labels, n_classes):
	'''
	Converts an array of label integers to a one-hot matrix encoding

	:parameters:
	- labels : np.ndarray, dtype=int
	Array of integer labels, in {0, n_classes - 1}
	- n_classes : int
	Total number of classes

	:returns:
	- one_hot : np.ndarray, dtype=bool, shape=(labels.shape[0], n_classes)
	One-hot matrix of the input
	'''
	one_hot = np.zeros((labels.shape[0], n_classes)).astype(bool)
	one_hot[range(labels.shape[0]), labels] = True
	return one_hot


	def load_netcdf(filename):
	'''
	Loads in data from a netcdf file in rnnlib format

	:parameters:
	- filename : str
	Path to a netcdf file

	:returns:
	- X : list of np.ndarray
	List of time series matrices
	- y : list of np.ndarray
	List of label arrays in one-hot form (see one_hot)
	'''
	with open(filename, 'r') as f:
	netcdf_data = scipy.io.netcdf_file(f).variables

	X = []
	y = []
	n = 0
	for length in netcdf_data['seqLengths'].data:
	X_n = netcdf_data['inputs'].data[n:n + length]
	X.append(X_n.astype(theano.config.floatX))
	y_n = one_hot(netcdf_data['targetClasses'].data[n:n + length],
	netcdf_data['numTargetClasses'].data)
	y.append(y_n.astype(theano.config.floatX))
	n += length
	return X, y


	def make_batches(X, length, batch_size=BATCH_SIZE):
	'''
	Convert a list of matrices into batches of uniform length

	:parameters:
	- X : list of np.ndarray
	List of matrices
	- length : int
	Desired sequence length. Smaller sequences will be padded with 0s,
	longer will be truncated.
	- batch_size : int
	Mini-batch size

	:returns:
	- X_batch : np.ndarray
	Tensor of time series matrix batches,
	shape=(n_batches, batch_size, length, n_features)
	- X_mask : np.ndarray
	Mask denoting whether to include each time step of each time series
	matrix
	'''
	n_batches = len(X)//batch_size
	X_batch = np.zeros((n_batches, batch_size, length, X[0].shape[1]),
	dtype=theano.config.floatX)
	X_mask = np.zeros(X_batch.shape, dtype=np.bool)
	for b in range(n_batches):
	for n in range(batch_size):
	X_m = X[b*batch_size + n]
	X_batch[b, n, :X_m.shape[0]] = X_m[:length]
	X_mask[b, n, :X_m.shape[0]] = 1
	return X_batch, X_mask


	logger.info('Loading data...')
	X_train, y_train = load_netcdf(TRAIN_NC)
	X_train = X_train
	y_train = y_train
	X_val, y_val = load_netcdf(VAL_NC)
	X_val = X_val
	y_val = y_val

	print X_train[0].shape

	# Find the longest sequence
	length = max(max([X.shape[0] for X in X_train]),
	max([X.shape[0] for X in X_val]))
	# Convert to batches of time series of uniform length
	X_train, _ = make_batches(X_train, length)
	y_train, train_mask = make_batches(y_train, length)
	X_val, _ = make_batches(X_val, length)
	y_val, val_mask = make_batches(y_val, length)

	n_epochs = 500
	learning_rate = 10
	momentum = .9
	precompute = False
	l_in = lasagne.layers.InputLayer(shape=(BATCH_SIZE, length, X_val.shape[-1]))
	l_noise = lasagne.layers.GaussianNoiseLayer(l_in, sigma=0.6)

	l_forward_1 = lasagne.layers.LSTMLayer(l_noise, num_units=156, backwards=False, precompute_input=precompute)
	l_backward_1 = lasagne.layers.LSTMLayer(l_noise, num_units=156, backwards=True, precompute_input=precompute)
	l_recurrent_1 = lasagne.layers.ElemwiseSumLayer(
	[l_forward_1, l_backward_1])

	l_forward_2 = lasagne.layers.LSTMLayer(
	l_recurrent_1, num_units=300, backwards=False, precompute_input=precompute)
	l_backward_2 = lasagne.layers.LSTMLayer(
	l_recurrent_1, num_units=300, backwards=True, precompute_input=precompute)
	l_recurrent_2 = lasagne.layers.ElemwiseSumLayer(
	[l_forward_2, l_backward_2])

	l_forward_3 = lasagne.layers.LSTMLayer(
	l_recurrent_2, num_units=102, backwards=False, precompute_input=precompute)
	l_backward_3 = lasagne.layers.LSTMLayer(
	l_recurrent_2, num_units=102, backwards=False, precompute_input=precompute)
	l_recurrent_3 = lasagne.layers.ElemwiseSumLayer(
	[l_forward_3, l_backward_3])

	l_reshape = lasagne.layers.ReshapeLayer(l_recurrent_3, (-1, 102))
	nonlinearity = lasagne.nonlinearities.softmax
	l_rec_out = lasagne.layers.DenseLayer(l_reshape, num_units=y_val.shape[-1],
	nonlinearity=nonlinearity)
	l_out = lasagne.layers.ReshapeLayer(l_rec_out,
	(BATCH_SIZE, length, y_val.shape[-1]))

	# Cost function is mean squared error
	input = T.tensor3('input')
	target_output = T.tensor3('target_output')
	mask = T.tensor3('mask')


	def cost(output):
	return -T.sum(masktarget_outputT.log(output))/T.sum(mask)

	cost_train = cost(lasagne.layers.get_output(l_out, input, deterministic=False))
	cost_eval = cost(lasagne.layers.get_output(l_out, input, deterministic=True))


	# Use SGD for training
	all_params = lasagne.layers.get_all_params(l_out, trainable=True)
	logger.info('Computing updates...')
	updates = lasagne.updates.momentum(cost_train, all_params,
	learning_rate, momentum)
	logger.info('Compiling functions...')
	# Theano functions for training, getting output, and computing cost
	train = theano.function([input, target_output, mask], cost_train,
	updates=updates)
	y_pred = theano.function([input], lasagne.layers.get_output(l_out, input, deterministic=True))
	compute_cost = theano.function([input, target_output, mask], cost_eval)

	logger.info('Training...')
	# Train the net
	for epoch in range(n_epochs):

	batch_shuffle = np.random.choice(X_train.shape[0], X_train.shape[0], False)
	for sequences, labels, sequence_mask in zip(X_train[batch_shuffle],
	y_train[batch_shuffle],
	train_mask[batch_shuffle]):

	start_time = time.time()
	sequence_shuffle = np.random.choice(sequences.shape[0],
	sequences.shape[0], False)
	train(sequences[sequence_shuffle], labels[sequence_shuffle],
	sequence_mask[sequence_shuffle])
	end_time = time.time()

	print end_time - start_time
	cost_val = sum([compute_cost(X_val_n, y_val_n, mask_n)
	for X_val_n, y_val_n, mask_n,
	in zip(X_val, y_val, val_mask)])
	y_val_pred = np.array([y_pred(X_val_n) for X_val_n in X_val])
	y_val_labels = np.argmax(y_val*val_mask, axis=-1).flatten()
	y_val_pred_labels = np.argmax(y_val_pred*val_mask, axis=-1).flatten()
	n_time_steps = np.sum(val_mask)/val_mask.shape[-1]
	error = np.sum(y_val_labels != y_val_pred_labels)/float(n_time_steps)
	logger.info("Epoch {} took {}, cost = {}, error = {}".format(
	epoch, end_time - start_time, cost_val, error))