Created
October 11, 2021 16:43
-
-
Save sradc/34e5b18e90e61a801bcb1997d5355d06 to your computer and use it in GitHub Desktop.
vectorised autodiff example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# minimal example, using code from: https://sidsite.com/posts/autodiff/ | |
from collections import defaultdict | |
import matplotlib.pyplot as plt | |
import numpy as np | |
class Variable: | |
def __init__(self, value, local_gradients=[]): | |
self.value = value | |
self.local_gradients = local_gradients | |
def __add__(self, other): | |
return add(self, other) | |
def __mul__(self, other): | |
return mul(self, other) | |
def __sub__(self, other): | |
return add(self, neg(other)) | |
def get_gradients(variable): | |
""" Compute the first derivatives of `variable` | |
with respect to child variables. | |
""" | |
gradients = defaultdict(lambda: 0) | |
def compute_gradients(variable, path_value): | |
for child_variable, local_gradient in variable.local_gradients: | |
# "Multiply the edges of a path": | |
value_of_path_to_child = path_value * local_gradient | |
# "Add together the different paths": | |
gradients[child_variable] += value_of_path_to_child | |
# recurse through graph: | |
compute_gradients(child_variable, value_of_path_to_child) | |
compute_gradients(variable, path_value=1) | |
# (path_value=1 is from `variable` differentiated w.r.t. itself) | |
return gradients | |
def add(a, b): | |
value = a.value + b.value | |
local_gradients = ( | |
(a, 1), | |
(b, 1) | |
) | |
return Variable(value, local_gradients) | |
def mul(a, b): | |
value = a.value * b.value | |
local_gradients = ( | |
(a, b.value), | |
(b, a.value) | |
) | |
return Variable(value, local_gradients) | |
def neg(a): | |
value = -1 * a.value | |
local_gradients = ( | |
(a, -1), | |
) | |
return Variable(value, local_gradients) | |
# convert NumPy array into array of Variable objects: | |
to_var = np.vectorize(lambda x : Variable(x)) | |
# get values from array of Variable objects: | |
to_vals = np.vectorize(lambda variable : variable.value) | |
# Create linear layer | |
np.random.seed(0) | |
def update_weights(weights, gradients, lrate): | |
for _, weight in np.ndenumerate(weights): | |
weight.value -= lrate * gradients[weight] | |
input_size = 50 | |
output_size = 10 | |
lrate = 0.001 | |
x = to_var(np.random.random(input_size)) | |
y_true = to_var(np.random.random(output_size)) | |
weights = to_var(np.random.random((input_size, output_size))) | |
loss_vals = [] | |
for i in range(100): | |
y_pred = np.dot(x, weights) | |
loss = np.sum((y_true - y_pred) * (y_true - y_pred)) | |
loss_vals.append(loss.value) | |
gradients = get_gradients(loss) | |
update_weights(weights, gradients, lrate) | |
plt.plot(loss_vals) | |
plt.xlabel("Time step") | |
plt.ylabel("Loss") | |
plt.title("Single linear layer learning") | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment