Last active
June 27, 2019 15:29
-
-
Save mw3i/c5e0bcc8b69e76908d9625c5b40b8f08 to your computer and use it in GitHub Desktop.
Feed Forward Neural Net Classifier in Numpy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## ext requirements | |
import numpy as np | |
# - - - - - - - - - - - - - - - - - - | |
# -- Model -- | |
# - - - - - - - - - - - - - - - - - - | |
## produces model outputs | |
def forward(params: dict, inputs: np.ndarray, hps: np.ndarray) -> list: | |
hidden_act_raw = np.add( | |
np.matmul( | |
inputs, | |
params['input']['hidden']['weights'] | |
), | |
params['input']['hidden']['bias'] | |
) | |
hidden_act = hps['hidden_activation'](hidden_act_raw) | |
output_act_raw = np.add( | |
np.matmul( | |
hidden_act, | |
params['hidden']['output']['weights'] | |
), | |
params['hidden']['output']['bias'], | |
) | |
output_act = hps['output_activation'](output_act_raw) | |
return [hidden_act_raw, hidden_act, output_act_raw, output_act] | |
## logistic loss function | |
def loss(params: dict, inputs: np.ndarray, targets: np.ndarray, hps: dict) -> float: | |
return np.sum( | |
np.square( | |
np.subtract( | |
forward(params, inputs, hps)[-1], | |
targets | |
) | |
) | |
) / inputs.shape[0] | |
## backprop (for sum squared error cost function) | |
def loss_grad(params, inputs, targets, hps): | |
hidden_act_raw, hidden_act, output_act_raw, output_act = forward(params, inputs, hps) | |
## gradients for decode layer ( chain rule on cost function ) | |
decode_grad = np.multiply( | |
hps['output_activation_deriv'](output_act_raw), | |
(2 * (output_act - targets)) / inputs.shape[0] # <-- deriv of cost function | |
) | |
## gradients for decode weights | |
decode_grad_w = np.matmul( | |
hidden_act.T, | |
decode_grad | |
) | |
## gradients for decode bias | |
decode_grad_b = decode_grad.sum(axis = 0, keepdims = True) | |
# - - - - - - - - - - | |
## gradients for encode layer ( chain rule on hidden layer ) | |
encode_grad = np.multiply( | |
hps['hidden_activation_deriv'](hidden_act_raw), | |
np.matmul( | |
decode_grad, | |
params['hidden']['output']['weights'].T | |
) | |
) | |
## gradients for encode weights | |
encode_grad_w = np.matmul( | |
inputs.T, | |
encode_grad | |
) | |
## gradients for encode bias | |
encode_grad_b = encode_grad.sum(axis = 0, keepdims = True) | |
return { | |
'input': { | |
'hidden': { | |
'weights': encode_grad_w, | |
'bias': encode_grad_b, | |
} | |
}, | |
'hidden': { | |
'output': { | |
'weights': decode_grad_w, | |
'bias': decode_grad_b, | |
} | |
} | |
} | |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# - - - - - - - - - - - - - - - - - - | |
# -- Convenience Functions -- | |
# - - - - - - - - - - - - - - - - - - | |
def build_params(num_features: int, num_hidden_nodes: int, num_categories: int, weight_range: tuple = (-.1, .1)) -> dict: | |
''' | |
num_features <-- (numeric) number of feature in the dataset | |
num_hidden_nodes <-- (numeric) | |
num_categories <-- (list) list of category labels to use as keys for decode -- output connections | |
weight_range = [-.1,.1] <-- (list of numeric) | |
''' | |
return { | |
'input': { | |
'hidden': { | |
'weights': np.random.uniform(*weight_range, [num_features, num_hidden_nodes]), | |
'bias': np.random.uniform(*weight_range, [1, num_hidden_nodes]), | |
}, | |
}, | |
'hidden': { | |
'output': { | |
'weights': np.random.uniform(*weight_range, [num_hidden_nodes, num_categories]), | |
'bias': np.random.uniform(*weight_range, [1, num_categories]), | |
} | |
}, | |
} | |
def update_params(params: dict, gradients: dict, lr: float) -> dict: | |
for layer in params: | |
for connection in params[layer]: | |
params[layer][connection]['weights'] -= lr * gradients[layer][connection]['weights'] | |
params[layer][connection]['bias'] -= lr * gradients[layer][connection]['bias'] | |
return params | |
def response(params: dict, inputs: np.ndarray, hps: dict) -> np.ndarray: | |
return np.argmax( | |
forward(params, inputs = inputs, hps = hps)[-1], | |
axis = 1 | |
) | |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
## Run Model | |
if __name__ == '__main__': | |
# makeup a random dataset | |
inputs = np.array([ | |
[.2, .3], | |
[.3, .4], | |
[.4, .5], | |
[.5, .6], | |
[.6, .7], | |
[.7, .8], | |
[.8, .9], | |
[.2, .1], | |
[.3, .2], | |
[.4, .3], | |
[.5, .4], | |
[.6, .5], | |
[.7, .6], | |
[.8, .7], | |
]) | |
one_hot_labels = np.array([ | |
[0,1], | |
[0,1], | |
[0,1], | |
[0,1], | |
[0,1], | |
[0,1], | |
[0,1], | |
[1,0], | |
[1,0], | |
[1,0], | |
[1,0], | |
[1,0], | |
[1,0], | |
[1,0], | |
]) | |
sigmoid = lambda x: 1 / (1 + np.exp(-x)) | |
hps = { | |
'lr': .5, # <-- learning rate | |
'wr': [-.1, .1], # <-- weight range | |
'num_hidden_nodes': 10, | |
'hidden_activation': lambda x: sigmoid(x), # <-- sigmoid activation function | |
'hidden_activation_deriv': lambda x: sigmoid(x) * (1 - sigmoid(x)), # <-- sigmoid activation function derivative | |
'output_activation': lambda x: sigmoid(x), # <-- sigmoid activation function | |
'output_activation_deriv': lambda x: sigmoid(x) * (1 - sigmoid(x)), # <-- sigmoid activation function derivative | |
} | |
params = build_params( | |
inputs.shape[1], # <-- num features | |
hps['num_hidden_nodes'], | |
one_hot_labels.shape[1] | |
) | |
num_epochs = 1000 | |
print('loss initially: ', loss(params, inputs, one_hot_labels, hps)) | |
for epoch in range(num_epochs): | |
gradients = loss_grad(params, inputs, one_hot_labels, hps) | |
params = update_params(params, gradients, hps['lr']) | |
print('loss after training: ', loss(params, inputs, one_hot_labels, hps)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment