-
Star
(118)
You must be signed in to star a gist -
Fork
(27)
You must be signed in to fork a gist
-
-
Save syhw/8a0f820261926e2f41cc to your computer and use it in GitHub Desktop.
| """ | |
| A deep neural network with or w/o dropout in one file. | |
| License: Do What The Fuck You Want to Public License http://www.wtfpl.net/ | |
| """ | |
| import numpy, theano, sys, math | |
| from theano import tensor as T | |
| from theano import shared | |
| from theano.tensor.shared_randomstreams import RandomStreams | |
| from collections import OrderedDict | |
| BATCH_SIZE = 100 | |
| def relu_f(vec): | |
| """ Wrapper to quickly change the rectified linear unit function """ | |
| return (vec + abs(vec)) / 2. | |
| def dropout(rng, x, p=0.5): | |
| """ Zero-out random values in x with probability p using rng """ | |
| if p > 0. and p < 1.: | |
| seed = rng.randint(2 ** 30) | |
| srng = theano.tensor.shared_randomstreams.RandomStreams(seed) | |
| mask = srng.binomial(n=1, p=1.-p, size=x.shape, | |
| dtype=theano.config.floatX) | |
| return x * mask | |
| return x | |
| def fast_dropout(rng, x): | |
| """ Multiply activations by N(1,1) """ | |
| seed = rng.randint(2 ** 30) | |
| srng = RandomStreams(seed) | |
| mask = srng.normal(size=x.shape, avg=1., dtype=theano.config.floatX) | |
| return x * mask | |
| def build_shared_zeros(shape, name): | |
| """ Builds a theano shared variable filled with a zeros numpy array """ | |
| return shared(value=numpy.zeros(shape, dtype=theano.config.floatX), | |
| name=name, borrow=True) | |
| class Linear(object): | |
| """ Basic linear transformation layer (W.X + b) """ | |
| def __init__(self, rng, input, n_in, n_out, W=None, b=None, fdrop=False): | |
| if W is None: | |
| W_values = numpy.asarray(rng.uniform( | |
| low=-numpy.sqrt(6. / (n_in + n_out)), | |
| high=numpy.sqrt(6. / (n_in + n_out)), | |
| size=(n_in, n_out)), dtype=theano.config.floatX) | |
| W_values *= 4 # This works for sigmoid activated networks! | |
| W = theano.shared(value=W_values, name='W', borrow=True) | |
| if b is None: | |
| b = build_shared_zeros((n_out,), 'b') | |
| self.input = input | |
| self.W = W | |
| self.b = b | |
| self.params = [self.W, self.b] | |
| self.output = T.dot(self.input, self.W) + self.b | |
| if fdrop: | |
| self.output = fast_dropout(rng, self.output) | |
| def __repr__(self): | |
| return "Linear" | |
| class SigmoidLayer(Linear): | |
| """ Sigmoid activation layer (sigmoid(W.X + b)) """ | |
| def __init__(self, rng, input, n_in, n_out, W=None, b=None, fdrop=False): | |
| super(SigmoidLayer, self).__init__(rng, input, n_in, n_out, W, b) | |
| self.pre_activation = self.output | |
| if fdrop: | |
| self.pre_activation = fast_dropout(rng, self.pre_activation) | |
| self.output = T.nnet.sigmoid(self.pre_activation) | |
| class ReLU(Linear): | |
| """ Rectified Linear Unit activation layer (max(0, W.X + b)) """ | |
| def __init__(self, rng, input, n_in, n_out, W=None, b=None, fdrop=False): | |
| if b is None: | |
| b = build_shared_zeros((n_out,), 'b') | |
| super(ReLU, self).__init__(rng, input, n_in, n_out, W, b) | |
| self.pre_activation = self.output | |
| if fdrop: | |
| self.pre_activation = fast_dropout(rng, self.pre_activation) | |
| self.output = relu_f(self.pre_activation) | |
| class DatasetMiniBatchIterator(object): | |
| """ Basic mini-batch iterator """ | |
| def __init__(self, x, y, batch_size=BATCH_SIZE, randomize=False): | |
| self.x = x | |
| self.y = y | |
| self.batch_size = batch_size | |
| self.randomize = randomize | |
| from sklearn.utils import check_random_state | |
| self.rng = check_random_state(42) | |
| def __iter__(self): | |
| n_samples = self.x.shape[0] | |
| if self.randomize: | |
| for _ in xrange(n_samples / BATCH_SIZE): | |
| if BATCH_SIZE > 1: | |
| i = int(self.rng.rand(1) * ((n_samples+BATCH_SIZE-1) / BATCH_SIZE)) | |
| else: | |
| i = int(math.floor(self.rng.rand(1) * n_samples)) | |
| yield (i, self.x[i*self.batch_size:(i+1)*self.batch_size], | |
| self.y[i*self.batch_size:(i+1)*self.batch_size]) | |
| else: | |
| for i in xrange((n_samples + self.batch_size - 1) | |
| / self.batch_size): | |
| yield (self.x[i*self.batch_size:(i+1)*self.batch_size], | |
| self.y[i*self.batch_size:(i+1)*self.batch_size]) | |
| class LogisticRegression: | |
| """Multi-class Logistic Regression | |
| """ | |
| def __init__(self, rng, input, n_in, n_out, W=None, b=None): | |
| if W != None: | |
| self.W = W | |
| else: | |
| self.W = build_shared_zeros((n_in, n_out), 'W') | |
| if b != None: | |
| self.b = b | |
| else: | |
| self.b = build_shared_zeros((n_out,), 'b') | |
| # P(Y|X) = softmax(W.X + b) | |
| self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b) | |
| self.y_pred = T.argmax(self.p_y_given_x, axis=1) | |
| self.output = self.y_pred | |
| self.params = [self.W, self.b] | |
| def negative_log_likelihood(self, y): | |
| return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]) | |
| def negative_log_likelihood_sum(self, y): | |
| return -T.sum(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]) | |
| def training_cost(self, y): | |
| """ Wrapper for standard name """ | |
| return self.negative_log_likelihood_sum(y) | |
| def errors(self, y): | |
| if y.ndim != self.y_pred.ndim: | |
| raise TypeError("y should have the same shape as self.y_pred", | |
| ("y", y.type, "y_pred", self.y_pred.type)) | |
| if y.dtype.startswith('int'): | |
| return T.mean(T.neq(self.y_pred, y)) | |
| else: | |
| print("!!! y should be of int type") | |
| return T.mean(T.neq(self.y_pred, numpy.asarray(y, dtype='int'))) | |
| class NeuralNet(object): | |
| """ Neural network (not regularized, without dropout) """ | |
| def __init__(self, numpy_rng, theano_rng=None, | |
| n_ins=40*3, | |
| layers_types=[Linear, ReLU, ReLU, ReLU, LogisticRegression], | |
| layers_sizes=[1024, 1024, 1024, 1024], | |
| n_outs=62 * 3, | |
| rho=0.9, | |
| eps=1.E-6, | |
| max_norm=0., | |
| debugprint=False): | |
| """ | |
| Basic feedforward neural network. | |
| """ | |
| self.layers = [] | |
| self.params = [] | |
| self.n_layers = len(layers_types) | |
| self.layers_types = layers_types | |
| assert self.n_layers > 0 | |
| self.max_norm = max_norm | |
| self._rho = rho # "momentum" for adadelta | |
| self._eps = eps # epsilon for adadelta | |
| self._accugrads = [] # for adadelta | |
| self._accudeltas = [] # for adadelta | |
| if theano_rng == None: | |
| theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) | |
| self.x = T.fmatrix('x') | |
| self.y = T.ivector('y') | |
| self.layers_ins = [n_ins] + layers_sizes | |
| self.layers_outs = layers_sizes + [n_outs] | |
| layer_input = self.x | |
| for layer_type, n_in, n_out in zip(layers_types, | |
| self.layers_ins, self.layers_outs): | |
| this_layer = layer_type(rng=numpy_rng, | |
| input=layer_input, n_in=n_in, n_out=n_out) | |
| assert hasattr(this_layer, 'output') | |
| self.params.extend(this_layer.params) | |
| self._accugrads.extend([build_shared_zeros(t.shape.eval(), | |
| 'accugrad') for t in this_layer.params]) | |
| self._accudeltas.extend([build_shared_zeros(t.shape.eval(), | |
| 'accudelta') for t in this_layer.params]) | |
| self.layers.append(this_layer) | |
| layer_input = this_layer.output | |
| assert hasattr(self.layers[-1], 'training_cost') | |
| assert hasattr(self.layers[-1], 'errors') | |
| # TODO standardize cost | |
| self.mean_cost = self.layers[-1].negative_log_likelihood(self.y) | |
| self.cost = self.layers[-1].training_cost(self.y) | |
| if debugprint: | |
| theano.printing.debugprint(self.cost) | |
| self.errors = self.layers[-1].errors(self.y) | |
| def __repr__(self): | |
| dimensions_layers_str = map(lambda x: "x".join(map(str, x)), | |
| zip(self.layers_ins, self.layers_outs)) | |
| return "_".join(map(lambda x: "_".join((x[0].__name__, x[1])), | |
| zip(self.layers_types, dimensions_layers_str))) | |
| def get_SGD_trainer(self): | |
| """ Returns a plain SGD minibatch trainer with learning rate as param. | |
| """ | |
| batch_x = T.fmatrix('batch_x') | |
| batch_y = T.ivector('batch_y') | |
| learning_rate = T.fscalar('lr') # learning rate to use | |
| # compute the gradients with respect to the model parameters | |
| # using mean_cost so that the learning rate is not too dependent | |
| # on the batch size | |
| gparams = T.grad(self.mean_cost, self.params) | |
| # compute list of weights updates | |
| updates = OrderedDict() | |
| for param, gparam in zip(self.params, gparams): | |
| if self.max_norm: | |
| W = param - gparam * learning_rate | |
| col_norms = W.norm(2, axis=0) | |
| desired_norms = T.clip(col_norms, 0, self.max_norm) | |
| updates[param] = W * (desired_norms / (1e-6 + col_norms)) | |
| else: | |
| updates[param] = param - gparam * learning_rate | |
| train_fn = theano.function(inputs=[theano.Param(batch_x), | |
| theano.Param(batch_y), | |
| theano.Param(learning_rate)], | |
| outputs=self.mean_cost, | |
| updates=updates, | |
| givens={self.x: batch_x, self.y: batch_y}) | |
| return train_fn | |
| def get_adagrad_trainer(self): | |
| """ Returns an Adagrad (Duchi et al. 2010) trainer using a learning rate. | |
| """ | |
| batch_x = T.fmatrix('batch_x') | |
| batch_y = T.ivector('batch_y') | |
| learning_rate = T.fscalar('lr') # learning rate to use | |
| # compute the gradients with respect to the model parameters | |
| gparams = T.grad(self.mean_cost, self.params) | |
| # compute list of weights updates | |
| updates = OrderedDict() | |
| for accugrad, param, gparam in zip(self._accugrads, self.params, gparams): | |
| # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012) | |
| agrad = accugrad + gparam * gparam | |
| dx = - (learning_rate / T.sqrt(agrad + self._eps)) * gparam | |
| if self.max_norm: | |
| W = param + dx | |
| col_norms = W.norm(2, axis=0) | |
| desired_norms = T.clip(col_norms, 0, self.max_norm) | |
| updates[param] = W * (desired_norms / (1e-6 + col_norms)) | |
| else: | |
| updates[param] = param + dx | |
| updates[accugrad] = agrad | |
| train_fn = theano.function(inputs=[theano.Param(batch_x), | |
| theano.Param(batch_y), | |
| theano.Param(learning_rate)], | |
| outputs=self.mean_cost, | |
| updates=updates, | |
| givens={self.x: batch_x, self.y: batch_y}) | |
| return train_fn | |
| def get_adadelta_trainer(self): | |
| """ Returns an Adadelta (Zeiler 2012) trainer using self._rho and | |
| self._eps params. | |
| """ | |
| batch_x = T.fmatrix('batch_x') | |
| batch_y = T.ivector('batch_y') | |
| # compute the gradients with respect to the model parameters | |
| gparams = T.grad(self.mean_cost, self.params) | |
| # compute list of weights updates | |
| updates = OrderedDict() | |
| for accugrad, accudelta, param, gparam in zip(self._accugrads, | |
| self._accudeltas, self.params, gparams): | |
| # c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012) | |
| agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam | |
| dx = - T.sqrt((accudelta + self._eps) | |
| / (agrad + self._eps)) * gparam | |
| updates[accudelta] = (self._rho * accudelta | |
| + (1 - self._rho) * dx * dx) | |
| if self.max_norm: | |
| W = param + dx | |
| col_norms = W.norm(2, axis=0) | |
| desired_norms = T.clip(col_norms, 0, self.max_norm) | |
| updates[param] = W * (desired_norms / (1e-6 + col_norms)) | |
| else: | |
| updates[param] = param + dx | |
| updates[accugrad] = agrad | |
| train_fn = theano.function(inputs=[theano.Param(batch_x), | |
| theano.Param(batch_y)], | |
| outputs=self.mean_cost, | |
| updates=updates, | |
| givens={self.x: batch_x, self.y: batch_y}) | |
| return train_fn | |
| def score_classif(self, given_set): | |
| """ Returns functions to get current classification errors. """ | |
| batch_x = T.fmatrix('batch_x') | |
| batch_y = T.ivector('batch_y') | |
| score = theano.function(inputs=[theano.Param(batch_x), | |
| theano.Param(batch_y)], | |
| outputs=self.errors, | |
| givens={self.x: batch_x, self.y: batch_y}) | |
| def scoref(): | |
| """ returned function that scans the entire set given as input """ | |
| return [score(batch_x, batch_y) for batch_x, batch_y in given_set] | |
| return scoref | |
| class RegularizedNet(NeuralNet): | |
| """ Neural net with L1 and L2 regularization """ | |
| def __init__(self, numpy_rng, theano_rng=None, | |
| n_ins=100, | |
| layers_types=[ReLU, ReLU, ReLU, LogisticRegression], | |
| layers_sizes=[1024, 1024, 1024], | |
| n_outs=2, | |
| rho=0.9, | |
| eps=1.E-6, | |
| L1_reg=0., | |
| L2_reg=0., | |
| max_norm=0., | |
| debugprint=False): | |
| """ | |
| Feedforward neural network with added L1 and/or L2 regularization. | |
| """ | |
| super(RegularizedNet, self).__init__(numpy_rng, theano_rng, n_ins, | |
| layers_types, layers_sizes, n_outs, rho, eps, max_norm, | |
| debugprint) | |
| L1 = shared(0.) | |
| for param in self.params: | |
| L1 += T.sum(abs(param)) | |
| if L1_reg > 0.: | |
| self.cost = self.cost + L1_reg * L1 | |
| L2 = shared(0.) | |
| for param in self.params: | |
| L2 += T.sum(param ** 2) | |
| if L2_reg > 0.: | |
| self.cost = self.cost + L2_reg * L2 | |
| class DropoutNet(NeuralNet): | |
| """ Neural net with dropout (see Hinton's et al. paper) """ | |
| def __init__(self, numpy_rng, theano_rng=None, | |
| n_ins=40*3, | |
| layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression], | |
| layers_sizes=[4000, 4000, 4000, 4000], | |
| dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5], | |
| n_outs=62 * 3, | |
| rho=0.9, | |
| eps=1.E-6, | |
| max_norm=0., | |
| fast_drop=False, | |
| debugprint=False): | |
| """ | |
| Feedforward neural network with dropout regularization. | |
| """ | |
| super(DropoutNet, self).__init__(numpy_rng, theano_rng, n_ins, | |
| layers_types, layers_sizes, n_outs, rho, eps, max_norm, | |
| debugprint) | |
| self.dropout_rates = dropout_rates | |
| if fast_drop: | |
| if dropout_rates[0]: | |
| dropout_layer_input = fast_dropout(numpy_rng, self.x) | |
| else: | |
| dropout_layer_input = self.x | |
| else: | |
| dropout_layer_input = dropout(numpy_rng, self.x, p=dropout_rates[0]) | |
| self.dropout_layers = [] | |
| for layer, layer_type, n_in, n_out, dr in zip(self.layers, | |
| layers_types, self.layers_ins, self.layers_outs, | |
| dropout_rates[1:] + [0]): # !!! we do not dropout anything | |
| # from the last layer !!! | |
| if dr: | |
| if fast_drop: | |
| this_layer = layer_type(rng=numpy_rng, | |
| input=dropout_layer_input, n_in=n_in, n_out=n_out, | |
| W=layer.W, b=layer.b, fdrop=True) | |
| else: | |
| this_layer = layer_type(rng=numpy_rng, | |
| input=dropout_layer_input, n_in=n_in, n_out=n_out, | |
| W=layer.W * 1. / (1. - dr), | |
| b=layer.b * 1. / (1. - dr)) | |
| # N.B. dropout with dr==1 does not dropanything!! | |
| this_layer.output = dropout(numpy_rng, this_layer.output, dr) | |
| else: | |
| this_layer = layer_type(rng=numpy_rng, | |
| input=dropout_layer_input, n_in=n_in, n_out=n_out, | |
| W=layer.W, b=layer.b) | |
| assert hasattr(this_layer, 'output') | |
| self.dropout_layers.append(this_layer) | |
| dropout_layer_input = this_layer.output | |
| assert hasattr(self.layers[-1], 'training_cost') | |
| assert hasattr(self.layers[-1], 'errors') | |
| # these are the dropout costs | |
| self.mean_cost = self.dropout_layers[-1].negative_log_likelihood(self.y) | |
| self.cost = self.dropout_layers[-1].training_cost(self.y) | |
| # these is the non-dropout errors | |
| self.errors = self.layers[-1].errors(self.y) | |
| def __repr__(self): | |
| return super(DropoutNet, self).__repr__() + "\n"\ | |
| + "dropout rates: " + str(self.dropout_rates) | |
| def add_fit_and_score(class_to_chg): | |
| """ Mutates a class to add the fit() and score() functions to a NeuralNet. | |
| """ | |
| from types import MethodType | |
| def fit(self, x_train, y_train, x_dev=None, y_dev=None, | |
| max_epochs=100, early_stopping=True, split_ratio=0.1, | |
| method='adadelta', verbose=False, plot=False): | |
| """ | |
| Fits the neural network to `x_train` and `y_train`. | |
| If x_dev nor y_dev are not given, it will do a `split_ratio` cross- | |
| validation split on `x_train` and `y_train` (for early stopping). | |
| """ | |
| import time, copy | |
| if x_dev == None or y_dev == None: | |
| from sklearn.cross_validation import train_test_split | |
| x_train, x_dev, y_train, y_dev = train_test_split(x_train, y_train, | |
| test_size=split_ratio, random_state=42) | |
| if method == 'sgd': | |
| train_fn = self.get_SGD_trainer() | |
| elif method == 'adagrad': | |
| train_fn = self.get_adagrad_trainer() | |
| elif method == 'adadelta': | |
| train_fn = self.get_adadelta_trainer() | |
| train_set_iterator = DatasetMiniBatchIterator(x_train, y_train) | |
| dev_set_iterator = DatasetMiniBatchIterator(x_dev, y_dev) | |
| train_scoref = self.score_classif(train_set_iterator) | |
| dev_scoref = self.score_classif(dev_set_iterator) | |
| best_dev_loss = numpy.inf | |
| epoch = 0 | |
| # TODO early stopping (not just cross val, also stop training) | |
| if plot: | |
| verbose = True | |
| self._costs = [] | |
| self._train_errors = [] | |
| self._dev_errors = [] | |
| self._updates = [] | |
| while epoch < max_epochs: | |
| if not verbose: | |
| sys.stdout.write("\r%0.2f%%" % (epoch * 100./ max_epochs)) | |
| sys.stdout.flush() | |
| avg_costs = [] | |
| timer = time.time() | |
| for x, y in train_set_iterator: | |
| if method == 'sgd' or method == 'adagrad': | |
| avg_cost = train_fn(x, y, lr=1.E-2) # TODO: you have to | |
| # play with this | |
| # learning rate | |
| # (dataset dependent) | |
| elif method == 'adadelta': | |
| avg_cost = train_fn(x, y) | |
| if type(avg_cost) == list: | |
| avg_costs.append(avg_cost[0]) | |
| else: | |
| avg_costs.append(avg_cost) | |
| if verbose: | |
| mean_costs = numpy.mean(avg_costs) | |
| mean_train_errors = numpy.mean(train_scoref()) | |
| print(' epoch %i took %f seconds' % | |
| (epoch, time.time() - timer)) | |
| print(' epoch %i, avg costs %f' % | |
| (epoch, mean_costs)) | |
| print(' epoch %i, training error %f' % | |
| (epoch, mean_train_errors)) | |
| if plot: | |
| self._costs.append(mean_costs) | |
| self._train_errors.append(mean_train_errors) | |
| dev_errors = numpy.mean(dev_scoref()) | |
| if plot: | |
| self._dev_errors.append(dev_errors) | |
| if dev_errors < best_dev_loss: | |
| best_dev_loss = dev_errors | |
| best_params = copy.deepcopy(self.params) | |
| if verbose: | |
| print('!!! epoch %i, validation error of best model %f' % | |
| (epoch, dev_errors)) | |
| epoch += 1 | |
| if not verbose: | |
| print("") | |
| for i, param in enumerate(best_params): | |
| self.params[i] = param | |
| def score(self, x, y): | |
| """ error rates """ | |
| iterator = DatasetMiniBatchIterator(x, y) | |
| scoref = self.score_classif(iterator) | |
| return numpy.mean(scoref()) | |
| class_to_chg.fit = MethodType(fit, None, class_to_chg) | |
| class_to_chg.score = MethodType(score, None, class_to_chg) | |
| if __name__ == "__main__": | |
| add_fit_and_score(DropoutNet) | |
| add_fit_and_score(RegularizedNet) | |
| def nudge_dataset(X, Y): | |
| """ | |
| This produces a dataset 5 times bigger than the original one, | |
| by moving the 8x8 images in X around by 1px to left, right, down, up | |
| """ | |
| from scipy.ndimage import convolve | |
| direction_vectors = [ | |
| [[0, 1, 0], | |
| [0, 0, 0], | |
| [0, 0, 0]], | |
| [[0, 0, 0], | |
| [1, 0, 0], | |
| [0, 0, 0]], | |
| [[0, 0, 0], | |
| [0, 0, 1], | |
| [0, 0, 0]], | |
| [[0, 0, 0], | |
| [0, 0, 0], | |
| [0, 1, 0]]] | |
| shift = lambda x, w: convolve(x.reshape((8, 8)), mode='constant', | |
| weights=w).ravel() | |
| X = numpy.concatenate([X] + | |
| [numpy.apply_along_axis(shift, 1, X, vector) | |
| for vector in direction_vectors]) | |
| Y = numpy.concatenate([Y for _ in range(5)], axis=0) | |
| return X, Y | |
| from sklearn import datasets, svm, naive_bayes | |
| from sklearn import cross_validation, preprocessing | |
| MNIST = True # MNIST dataset | |
| DIGITS = False # digits dataset | |
| FACES = True # faces dataset | |
| TWENTYNEWSGROUPS = False # 20 newgroups dataset | |
| VERBOSE = True # prints evolution of the loss/accuracy during the fitting | |
| SCALE = True # scale the dataset | |
| PLOT = True # plot losses and accuracies | |
| def train_models(x_train, y_train, x_test, y_test, n_features, n_outs, | |
| use_dropout=True, n_epochs=100, numpy_rng=None, | |
| svms=False, nb=False, deepnn=True, name=''): | |
| if svms: | |
| print("Linear SVM") | |
| classifier = svm.SVC(gamma=0.001) | |
| print(classifier) | |
| classifier.fit(x_train, y_train) | |
| print("score: %f" % classifier.score(x_test, y_test)) | |
| print("RBF-kernel SVM") | |
| classifier = svm.SVC(kernel='rbf', class_weight='auto') | |
| print(classifier) | |
| classifier.fit(x_train, y_train) | |
| print("score: %f" % classifier.score(x_test, y_test)) | |
| if nb: | |
| print("Multinomial Naive Bayes") | |
| classifier = naive_bayes.MultinomialNB() | |
| print(classifier) | |
| classifier.fit(x_train, y_train) | |
| print("score: %f" % classifier.score(x_test, y_test)) | |
| if deepnn: | |
| import warnings | |
| warnings.filterwarnings("ignore") # TODO remove | |
| if use_dropout: | |
| #n_epochs *= 4 TODO | |
| pass | |
| def new_dnn(dropout=False): | |
| if dropout: | |
| print("Dropout DNN") | |
| return DropoutNet(numpy_rng=numpy_rng, n_ins=n_features, | |
| layers_types=[ReLU, ReLU, LogisticRegression], | |
| layers_sizes=[200, 200], | |
| dropout_rates=[0.2, 0.5, 0.5], | |
| # TODO if you have a big enough GPU, use these: | |
| #layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression], | |
| #layers_sizes=[2000, 2000, 2000, 2000], | |
| #dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5], | |
| n_outs=n_outs, | |
| max_norm=4., | |
| fast_drop=True, | |
| debugprint=0) | |
| else: | |
| print("Simple (regularized) DNN") | |
| return RegularizedNet(numpy_rng=numpy_rng, n_ins=n_features, | |
| layers_types=[ReLU, ReLU, LogisticRegression], | |
| layers_sizes=[200, 200], | |
| n_outs=n_outs, | |
| #L1_reg=0.001/x_train.shape[0], | |
| #L2_reg=0.001/x_train.shape[0], | |
| L1_reg=0., | |
| L2_reg=1./x_train.shape[0], | |
| debugprint=0) | |
| import matplotlib.pyplot as plt | |
| plt.figure() | |
| ax1 = plt.subplot(221) | |
| ax2 = plt.subplot(222) | |
| ax3 = plt.subplot(223) | |
| ax4 = plt.subplot(224) # TODO plot the updates of the weights | |
| methods = ['sgd', 'adagrad', 'adadelta'] | |
| #methods = ['adadelta'] TODO if you want "good" results asap | |
| for method in methods: | |
| dnn = new_dnn(use_dropout) | |
| print dnn, "using", method | |
| dnn.fit(x_train, y_train, max_epochs=n_epochs, method=method, verbose=VERBOSE, plot=PLOT) | |
| test_error = dnn.score(x_test, y_test) | |
| print("score: %f" % (1. - test_error)) | |
| ax1.plot(numpy.log10(dnn._costs), label=method) | |
| ax2.plot(numpy.log10(dnn._train_errors), label=method) | |
| ax3.plot(numpy.log10(dnn._dev_errors), label=method) | |
| #ax2.plot(dnn._train_errors, label=method) | |
| #ax3.plot(dnn._dev_errors, label=method) | |
| ax4.plot([test_error for _ in range(10)], label=method) | |
| ax1.set_xlabel('epoch') | |
| ax1.set_ylabel('cost (log10)') | |
| ax2.set_xlabel('epoch') | |
| ax2.set_ylabel('train error') | |
| ax3.set_xlabel('epoch') | |
| ax3.set_ylabel('dev error') | |
| ax4.set_ylabel('test error') | |
| plt.legend() | |
| plt.savefig('training_' + name + '.png') | |
| if MNIST: | |
| from sklearn.datasets import fetch_mldata | |
| mnist = fetch_mldata('MNIST original') | |
| X = numpy.asarray(mnist.data, dtype='float32') | |
| if SCALE: | |
| #X = preprocessing.scale(X) | |
| X /= 255. | |
| y = numpy.asarray(mnist.target, dtype='int32') | |
| print("Total dataset size:") | |
| print("n samples: %d" % X.shape[0]) | |
| print("n features: %d" % X.shape[1]) | |
| print("n classes: %d" % len(set(y))) | |
| x_train, x_test, y_train, y_test = cross_validation.train_test_split( | |
| X, y, test_size=0.2, random_state=42) | |
| train_models(x_train, y_train, x_test, y_test, X.shape[1], | |
| len(set(y)), numpy_rng=numpy.random.RandomState(123), | |
| name='MNIST') | |
| if DIGITS: | |
| digits = datasets.load_digits() | |
| data = numpy.asarray(digits.data, dtype='float32') | |
| target = numpy.asarray(digits.target, dtype='int32') | |
| nudged_x, nudged_y = nudge_dataset(data, target) | |
| if SCALE: | |
| nudged_x = preprocessing.scale(nudged_x) | |
| x_train, x_test, y_train, y_test = cross_validation.train_test_split( | |
| nudged_x, nudged_y, test_size=0.2, random_state=42) | |
| train_models(x_train, y_train, x_test, y_test, nudged_x.shape[1], | |
| len(set(target)), numpy_rng=numpy.random.RandomState(123), | |
| name='digits') | |
| if FACES: | |
| import logging | |
| logging.basicConfig(level=logging.INFO, | |
| format='%(asctime)s %(message)s') | |
| lfw_people = datasets.fetch_lfw_people(min_faces_per_person=70, | |
| resize=0.4) | |
| X = numpy.asarray(lfw_people.data, dtype='float32') | |
| if SCALE: | |
| X = preprocessing.scale(X) | |
| y = numpy.asarray(lfw_people.target, dtype='int32') | |
| target_names = lfw_people.target_names | |
| print("Total dataset size:") | |
| print("n samples: %d" % X.shape[0]) | |
| print("n features: %d" % X.shape[1]) | |
| print("n classes: %d" % target_names.shape[0]) | |
| x_train, x_test, y_train, y_test = cross_validation.train_test_split( | |
| X, y, test_size=0.2, random_state=42) | |
| train_models(x_train, y_train, x_test, y_test, X.shape[1], | |
| len(set(y)), numpy_rng=numpy.random.RandomState(123), | |
| name='faces') | |
| if TWENTYNEWSGROUPS: | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| newsgroups_train = datasets.fetch_20newsgroups(subset='train') | |
| vectorizer = TfidfVectorizer(encoding='latin-1', max_features=10000) | |
| #vectorizer = HashingVectorizer(encoding='latin-1') | |
| x_train = vectorizer.fit_transform(newsgroups_train.data) | |
| x_train = numpy.asarray(x_train.todense(), dtype='float32') | |
| y_train = numpy.asarray(newsgroups_train.target, dtype='int32') | |
| newsgroups_test = datasets.fetch_20newsgroups(subset='test') | |
| x_test = vectorizer.transform(newsgroups_test.data) | |
| x_test = numpy.asarray(x_test.todense(), dtype='float32') | |
| y_test = numpy.asarray(newsgroups_test.target, dtype='int32') | |
| train_models(x_train, y_train, x_test, y_test, x_train.shape[1], | |
| len(set(y_train)), | |
| numpy_rng=numpy.random.RandomState(123), | |
| svms=False, nb=True, deepnn=True, | |
| name='20newsgroups') |
On Windows 7, 64 bit, Python 3.4:
Traceback (most recent call last):
File "E:\Anaconda3\lib\site-packages\theano\gof\lazylinker_c.py", line 59, in
raise ImportError()
ImportError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "E:\Anaconda3\lib\site-packages\theano\gof\lazylinker_c.py", line 76, in
raise ImportError()
ImportError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "E:\Dropbox\Dropbox\BioInformatics Lab\AdaHERF_ML-master\ModdedScripts\dnn.py", line 7, in
import numpy, theano, sys, math
File "E:\Anaconda3\lib\site-packages\theano__init__.py", line 55, in
from theano.compile import
File "E:\Anaconda3\lib\site-packages\theano\compile__init__.py", line 6, in
from theano.compile.function_module import *
File "E:\Anaconda3\lib\site-packages\theano\compile\function_module.py", line 18, in
import theano.compile.mode
File "E:\Anaconda3\lib\site-packages\theano\compile\mode.py", line 11, in
import theano.gof.vm
File "E:\Anaconda3\lib\site-packages\theano\gof\vm.py", line 516, in
from . import lazylinker_c
File "E:\Anaconda3\lib\site-packages\theano\gof\lazylinker_c.py", line 85, in
args = cmodule.GCC_compiler.compile_args()
File "E:\Anaconda3\lib\site-packages\theano\gof\cmodule.py", line 1603, in compile_args
native_lines = get_lines("g++ -march=native -E -v -")
File "E:\Anaconda3\lib\site-packages\theano\gof\cmodule.py", line 1577, in get_lines
(stdout, stderr) = p.communicate(input='')
File "E:\Anaconda3\lib\subprocess.py", line 959, in communicate
stdout, stderr = self._communicate(input, endtime, timeout)
File "E:\Anaconda3\lib\subprocess.py", line 1195, in _communicate
self.stdin.write(input)
TypeError: 'str' does not support the buffer interface
[Finished in 6.6s with exit code 1]
[shell_cmd: python -u "E:\Dropbox\Dropbox\BioInformatics Lab\AdaHERF_ML-master\ModdedScripts\dnn.py"]
in order to make the code works on OSX i substituted
lr=1.E-2
with
lr=numpy.asarray(1.E-2, dtype='float32')
On OSX,
./dnn.py
Total dataset size:
n samples: 70000
n features: 784
n classes: 10
Dropout DNN
ReLU_784x200_ReLU_200x200_LogisticRegression_200x10
dropout rates: [0.0, 0.5, 0.5] using sgd
Traceback (most recent call last):
File "./dnn.py", line 684, in
name='MNIST')
File "./dnn.py", line 647, in train_models
dnn.fit(x_train, y_train, max_epochs=n_epochs, method=method, verbose=VERBOSE, plot=PLOT)
File "./dnn.py", line 490, in fit
avg_cost = train_fn(x, y, lr=1.E-2) # TODO: you have to
File "/Library/Python/2.7/site-packages/Theano-0.6.0-py2.7.egg/theano/compile/function_module.py", line 516, in call
self[k] = arg
File "/Library/Python/2.7/site-packages/Theano-0.6.0-py2.7.egg/theano/compile/function_module.py", line 452, in setitem
self.value[item] = value
File "/Library/Python/2.7/site-packages/Theano-0.6.0-py2.7.egg/theano/compile/function_module.py", line 415, in setitem
s.value = value
File "/Library/Python/2.7/site-packages/Theano-0.6.0-py2.7.egg/theano/gof/link.py", line 278, in set
self.storage[0] = self.type.filter(value, **kwargs)
File "/Library/Python/2.7/site-packages/Theano-0.6.0-py2.7.egg/theano/tensor/type.py", line 152, in filter
raise TypeError(err_msg, data)
TypeError: ('TensorType(float32, scalar) cannot store accurately value 0.01, it would be represented as 0.00999999977648. If you do not mind this precision loss, you can: 1) explicitly convert your data to a numpy array of dtype float32, or 2) set "allow_input_downcast=True" when calling "function".', 0.01, 'Container name "lr"')