Last active
February 4, 2016 13:45
-
-
Save mmmikael/b202b344b1c89dbbce64 to your computer and use it in GitHub Desktop.
mnist pseudo siamese
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import absolute_import | |
from __future__ import print_function | |
import numpy as np | |
np.random.seed(1337) # for reproducibility | |
import theano.tensor as T | |
from keras.datasets import mnist | |
from keras.models import Sequential | |
from keras.layers.core import Dense, Dropout, Activation | |
from keras.optimizers import SGD, Adam, RMSprop | |
from keras.utils import np_utils | |
def siamese_euclidean(y_true, y_pred): | |
a = y_pred[0::2] | |
b = y_pred[1::2] | |
diff = (T.sqr(a - b)).sum(axis=1, keepdims=True) | |
y_true = y_true[0::2] | |
return (T.sqr(diff - y_true)).mean() | |
def compute_accuracy(pred): | |
pos = 0 | |
for i in range(len(pred)/2): | |
diff = pred[2*i] - pred[2*i+1] | |
pos += 1 if ((i % 2 == 0 and np.dot(diff, diff) < 0.5) or | |
(i % 2 != 0 and np.dot(diff, diff) > 0.5)) else 0 | |
return pos * 2.0 / len(pred) | |
def create_set(x, digit_indices): | |
pairs = [] | |
y = [] | |
for i in range(500): | |
z1, z2 = digit_indices[0][i], digit_indices[0][i+1] | |
pairs += [x[z1], x[z2]] | |
y += [0, 0] | |
one = digit_indices[2][i] | |
pairs += [x[z1], x[one]] | |
y += [1, 1] | |
pairs = np.array(pairs, 'float32') | |
y = np.array(y, 'float32') | |
pairs = pairs.reshape(pairs.shape[0], 784) / 255 | |
return pairs, y | |
# the data, shuffled and split between tran and test sets | |
(X_train, y_train), (X_test, y_test) = mnist.load_data() | |
digit_indices = [np.where(y_train == i)[0] for i in range(10)] | |
pairs_train, y_train = create_set(X_train, digit_indices) | |
digit_indices = [np.where(y_test == i)[0] for i in range(10)] | |
pairs_test, y_test = create_set(X_test, digit_indices) | |
model = Sequential() | |
model.add(Dense(128, input_shape=(784,))) | |
model.add(Activation('relu')) | |
model.add(Dropout(0.1)) | |
model.add(Dense(128)) | |
model.add(Activation('relu')) | |
model.add(Dropout(0.1)) | |
model.add(Dense(10)) | |
sgd = SGD(lr=1e-2, decay=1e-5, momentum=0.9, nesterov=True) | |
model.compile(loss=siamese_euclidean, optimizer=sgd) | |
for i in range(300): | |
l = model.train_on_batch(pairs_train, y_train)[0] | |
pred_train = model.predict(pairs_train) | |
pred_test = model.predict(pairs_test) | |
print('loss=%0.6f accuracy (train)=%0.2f%% accuracy (test)=%0.2f%%' % | |
(l, 100*compute_accuracy(pred_train), 100*compute_accuracy(pred_test))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I have just one (potentially stupid) question. Are "y_true" and "y_pred" always of the same size? It seems to me that they are (from the models.py code). If this is the case, I would like to have a contrastive loss that takes as input the (potentially high dimensional) output y_pred from the deep network and the original labels y_true (1-d). Therefore I can compute a contrastive loss of the form (1-y_true) * ||y_pred(i) - y_pred(i+1)|| + y_true * max(m, 0 - ||y_pred(i) - y_pred(i+1)||). Is this possible?