Created
February 23, 2021 18:34
-
-
Save JosephCatrambone/11571be3c680cdd7073acb088efca3f0 to your computer and use it in GitHub Desktop.
Reverse Mode Automatic Differentiation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
import random | |
class Graph: | |
def __init__(self): | |
self.nodes = list() | |
self.last_tape = None | |
def forward(self, variables, output_node): | |
for n in self.nodes: | |
n.forward(variables) | |
self.last_tape = variables | |
return variables[output_node] | |
def backward(self, output_node, starting_grad=1.0, grad=None): | |
if grad is None: | |
grad = dict() | |
grad[output_node] = starting_grad | |
#for n in self.nodes: | |
# grad[n] = 0 | |
pending = list() | |
pending.append(output_node) | |
while pending: | |
n = pending.pop() | |
n.backward(grad, self.last_tape) | |
for input_node in n.inputs: | |
pending.insert(0, input_node) | |
return grad | |
def add_node(self, node): | |
self.nodes.append(node) | |
return node | |
class Node: | |
def __init__(self, inputs=None): | |
self.inputs = inputs or list() | |
def forward(self, variables:dict): | |
pass | |
def backward(self, grad:dict, tape:list): | |
for n in self.inputs: | |
if n not in grad: | |
grad[n] = numpy.atleast_2d(numpy.zeros_like(tape[n], dtype=numpy.float)) | |
class InputNode(Node): | |
def __init__(self): | |
super().__init__() | |
def forward(self, variables): | |
assert self in variables, f"Input not not specified in feed dictionary: {self}" | |
def backward(self, grad, tape): | |
super().backward(grad, tape) | |
class AddNode(Node): | |
def __init__(self, a, b): | |
super().__init__(inputs=[a, b]) | |
def forward(self, variables): | |
accumulator = 0 | |
for n in self.input: | |
accumulator += variables[n] | |
variables[self] = accumulator | |
def backward(self, grad, tape): | |
super().backward(grad, tape) | |
for idx in self.inputs: | |
grad[idx] += grad[self] | |
class SubtractNode(Node): | |
def __init__(self, a, b): | |
super().__init__(inputs=[a, b]) | |
def forward(self, variables): | |
variables[self] = variables[self.inputs[0]] - variables[self.inputs[1]] | |
def backward(self, grad, tape): | |
super().backward(grad, tape) | |
grad[self.inputs[0]] += grad[self] | |
grad[self.inputs[1]] -= grad[self] | |
class ElementMultiplyNode(Node): | |
def __init__(self, a, b): | |
super().__init__(inputs=[a, b]) | |
def forward(self, variables): | |
variables[self] = variables[self.inputs[0]] * variables[self.inputs[1]] | |
def backward(self, grad, tape): | |
super().backward(grad, tape) | |
grad[self.inputs[0]] += grad[self]*tape[self.inputs[1]] | |
grad[self.inputs[1]] += grad[self]*tape[self.inputs[0]] | |
class MatrixMultiplyNode(Node): | |
def __init__(self, a, b): | |
super().__init__(inputs=[a, b]) | |
def forward(self, variables): | |
variables[self] = numpy.dot(variables[self.inputs[0]], variables[self.inputs[1]]) | |
def backward(self, grad, tape): | |
super().backward(grad, tape) | |
grad[self.inputs[0]] += numpy.dot(numpy.atleast_2d(grad[self]), numpy.atleast_2d(tape[self.inputs[1]]).T) | |
grad[self.inputs[1]] += numpy.dot(numpy.atleast_2d(tape[self.inputs[0]]).T, numpy.atleast_2d(grad[self])) | |
class ReluNode(Node): | |
def __init__(self, a): | |
super().__init__(inputs=[a]) | |
def forward(self, variables): | |
variables[self] = variables[self.inputs[0]] * (variables[self.inputs[0]] > 0) | |
def backward(self, grad, tape): | |
super().backward(grad, tape) | |
grad[self.inputs[0]] += (tape[self] < 0)*(0.0001*grad[self]) + ((tape[self] > 0)*tape[self]*grad[self]) | |
### | |
class ConstantNode(Node): | |
def __init__(self, value): | |
super().__init__() | |
self.value = value | |
def forward(self, variables): | |
variables[self] = self.value | |
def backward(self, grad, tape): | |
grad[self] = numpy.zeros_like(self.value) | |
# One hidden layer, one input (of shape 1x2), and one output (of shape 1x1). | |
g = Graph() | |
visible = g.add_node(InputNode()) | |
input_to_hidden = g.add_node(InputNode()) | |
hidden_to_output = g.add_node(InputNode()) | |
target = g.add_node(InputNode()) | |
hidden = g.add_node(MatrixMultiplyNode(visible, input_to_hidden)) | |
hidden_act = g.add_node(ReluNode(hidden)) | |
output = g.add_node(MatrixMultiplyNode(hidden_act, hidden_to_output)) | |
diff = g.add_node(SubtractNode(target, output)) | |
loss = g.add_node(ElementMultiplyNode(diff, diff)) | |
# Init weights | |
input_to_hidden_weights = numpy.random.uniform(low=-1, high=1, size=(2, 5)) | |
hidden_to_output_weights = numpy.random.uniform(low=-1, high=1, size=(5, 1)) | |
loss_accum = 0 | |
for step in range(5000): | |
x, y = random.choice([ | |
(numpy.asarray([0, 0]), numpy.asarray([0])), | |
(numpy.asarray([0, 1]), numpy.asarray([1])), | |
(numpy.asarray([1, 0]), numpy.asarray([1])), | |
(numpy.asarray([1, 1]), numpy.asarray([0])), | |
#(numpy.asarray([[0, 0], [0, 1], [1, 0], [1, 1]]), (numpy.asarray([[0], [1], [1], [0]]))) | |
]) | |
feed = {visible: x, input_to_hidden:input_to_hidden_weights, hidden_to_output:hidden_to_output_weights, target: y} | |
step_loss = g.forward(feed, output_node=loss) | |
loss_accum += step_loss | |
if step % 1000 == 0: | |
print(loss_accum / 1000) | |
loss_accum = 0.0 | |
grad = g.backward(output_node=loss, starting_grad=numpy.asarray([1.0])) # Want to move in the opposite direction. | |
input_to_hidden_weights -= grad[input_to_hidden]*0.001 | |
hidden_to_output_weights -= grad[hidden_to_output]*0.001 | |
#zero_and_zero = g.forward({visible: numpy.asarray([0, 0]), input_to_hidden:input_to_hidden_weights, hidden_to_output:hidden_to_output_weights, target:0}, output_node=output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment