-
-
Save arivero/efa3b81ed45f17b31259ce426a998de3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.datasets import make_moons | |
from sklearn.cross_validation import train_test_split | |
n_feature = 2 | |
n_class = 2 | |
def make_network(n_hidden=100): | |
model = dict( | |
W1=np.random.randn(n_feature, n_hidden), | |
W2=np.random.randn(n_hidden, n_class) | |
) | |
return model | |
def softmax(x): | |
e_x = np.exp(x - np.max(x)) | |
return e_x / e_x.sum() | |
def forward(x, model): | |
# Input to hidden | |
h = x @ model['W1'] | |
h[h < 0] = 0 | |
# Hidden to output | |
prob = softmax(h @ model['W2']) | |
return h, prob | |
def backward(model, xs, hs, errs): | |
dW2 = hs.T @ errs | |
dh = errs @ model['W2'].T | |
dh[hs < 0] = 0 | |
dW1 = xs.T @ dh | |
return dict(W1=dW1, W2=dW2) | |
def get_minibatch_grad(model, X_train, y_train): | |
xs, hs, errs = [], [], [] | |
for x, cls_idx in zip(X_train, y_train): | |
h, y_pred = forward(x, model) | |
y_true = np.zeros(n_class) | |
y_true[int(cls_idx)] = 1. | |
err = y_true - y_pred | |
xs.append(x) | |
hs.append(h) | |
errs.append(err) | |
return backward(model, np.array(xs), np.array(hs), np.array(errs)) | |
def get_minibatch(X, y, minibatch_size): | |
minibatches = [] | |
X, y = shuffle(X, y) | |
for i in range(0, X.shape[0], minibatch_size): | |
X_mini = X[i:i + minibatch_size] | |
y_mini = y[i:i + minibatch_size] | |
minibatches.append((X_mini, y_mini)) | |
return minibatches | |
def sgd(model, X_train, y_train, minibatch_size): | |
minibatches = get_minibatch(X_train, y_train, minibatch_size) | |
for iter in range(1, n_iter + 1): | |
idx = np.random.randint(0, len(minibatches)) | |
X_mini, y_mini = minibatches[idx] | |
grad = get_minibatch_grad(model, X_mini, y_mini) | |
for layer in grad: | |
model[layer] += alpha * grad[layer] | |
return model | |
def momentum(model, X_train, y_train, minibatch_size): | |
velocity = {k: np.zeros_like(v) for k, v in model.items()} | |
gamma = .9 | |
minibatches = get_minibatch(X_train, y_train, minibatch_size) | |
for iter in range(1, n_iter + 1): | |
idx = np.random.randint(0, len(minibatches)) | |
X_mini, y_mini = minibatches[idx] | |
grad = get_minibatch_grad(model, X_mini, y_mini) | |
for layer in grad: | |
velocity[layer] = gamma * velocity[layer] + alpha * grad[layer] | |
model[layer] += velocity[layer] | |
return model | |
def nesterov(model, X_train, y_train, minibatch_size): | |
velocity = {k: np.zeros_like(v) for k, v in model.items()} | |
gamma = .9 | |
minibatches = get_minibatch(X_train, y_train, minibatch_size) | |
for iter in range(1, n_iter + 1): | |
idx = np.random.randint(0, len(minibatches)) | |
X_mini, y_mini = minibatches[idx] | |
model_ahead = {k: v + gamma * velocity[k] for k, v in model.items()} | |
grad = get_minibatch_grad(model, X_mini, y_mini) | |
for layer in grad: | |
velocity[layer] = gamma * velocity[layer] + alpha * grad[layer] | |
model[layer] += velocity[layer] | |
return model | |
def adagrad(model, X_train, y_train, minibatch_size): | |
cache = {k: np.zeros_like(v) for k, v in model.items()} | |
minibatches = get_minibatch(X_train, y_train, minibatch_size) | |
for iter in range(1, n_iter + 1): | |
idx = np.random.randint(0, len(minibatches)) | |
X_mini, y_mini = minibatches[idx] | |
grad = get_minibatch_grad(model, X_mini, y_mini) | |
for k in grad: | |
cache[k] += grad[k]**2 | |
model[k] += alpha * grad[k] / (np.sqrt(cache[k]) + eps) | |
return model | |
def rmsprop(model, X_train, y_train, minibatch_size): | |
cache = {k: np.zeros_like(v) for k, v in model.items()} | |
gamma = .9 | |
minibatches = get_minibatch(X_train, y_train, minibatch_size) | |
for iter in range(1, n_iter + 1): | |
idx = np.random.randint(0, len(minibatches)) | |
X_mini, y_mini = minibatches[idx] | |
grad = get_minibatch_grad(model, X_mini, y_mini) | |
for k in grad: | |
cache[k] = gamma * cache[k] + (1 - gamma) * (grad[k]**2) | |
model[k] += alpha * grad[k] / (np.sqrt(cache[k]) + eps) | |
return model | |
def adam(model, X_train, y_train, minibatch_size): | |
M = {k: np.zeros_like(v) for k, v in model.items()} | |
R = {k: np.zeros_like(v) for k, v in model.items()} | |
beta1 = .9 | |
beta2 = .999 | |
minibatches = get_minibatch(X_train, y_train, minibatch_size) | |
for iter in range(1, n_iter + 1): | |
t = iter | |
idx = np.random.randint(0, len(minibatches)) | |
X_mini, y_mini = minibatches[idx] | |
grad = get_minibatch_grad(model, X_mini, y_mini) | |
for k in grad: | |
M[k] = beta1 * M[k] + (1. - beta1) * grad[k] | |
R[k] = beta2 * R[k] + (1. - beta2) * grad[k]**2 | |
m_k_hat = M[k] / (1. - beta1**(t)) | |
r_k_hat = R[k] / (1. - beta2**(t)) | |
model[k] += alpha * m_k_hat / (np.sqrt(r_k_hat) + eps) | |
return model | |
def shuffle(X, y): | |
Z = np.column_stack((X, y)) | |
np.random.shuffle(Z) | |
return Z[:, :-1], Z[:, -1] | |
if __name__ == '__main__': | |
X, y = make_moons(n_samples=5000, random_state=42, noise=0.1) | |
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) | |
n_iter = 100 | |
eps = 1e-8 # Smoothing to avoid division by zero | |
alpha = 1e-2 | |
minibatch_size = 100 | |
n_experiment = 3 | |
algos = dict( | |
sgd=sgd, | |
momentum=momentum, | |
nesterov=nesterov, | |
adagrad=adagrad, | |
rmsprop=rmsprop, | |
adam=adam | |
) | |
algo_accs = {k: np.zeros(n_experiment) for k in algos} | |
for algo_name, algo in algos.items(): | |
print('Experimenting on {}'.format(algo_name)) | |
for k in range(n_experiment): | |
# print('Experiment-{}'.format(k)) | |
# Reset model | |
model = make_network() | |
model = algo(model, X_train, y_train, minibatch_size) | |
y_pred = np.zeros_like(y_test) | |
for i, x in enumerate(X_test): | |
_, prob = forward(x, model) | |
y = np.argmax(prob) | |
y_pred[i] = y | |
algo_accs[algo_name][k] = np.mean(y_pred == y_test) | |
print() | |
for k, v in algo_accs.items(): | |
print('{} => mean accuracy: {}, std: {}'.format(k, v.mean(), v.std())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment