Skip to content

Instantly share code, notes, and snippets.

@JosephCatrambone
Created February 23, 2021 18:34
Show Gist options
  • Save JosephCatrambone/11571be3c680cdd7073acb088efca3f0 to your computer and use it in GitHub Desktop.
Save JosephCatrambone/11571be3c680cdd7073acb088efca3f0 to your computer and use it in GitHub Desktop.
Reverse Mode Automatic Differentiation
import numpy
import random
class Graph:
def __init__(self):
self.nodes = list()
self.last_tape = None
def forward(self, variables, output_node):
for n in self.nodes:
n.forward(variables)
self.last_tape = variables
return variables[output_node]
def backward(self, output_node, starting_grad=1.0, grad=None):
if grad is None:
grad = dict()
grad[output_node] = starting_grad
#for n in self.nodes:
# grad[n] = 0
pending = list()
pending.append(output_node)
while pending:
n = pending.pop()
n.backward(grad, self.last_tape)
for input_node in n.inputs:
pending.insert(0, input_node)
return grad
def add_node(self, node):
self.nodes.append(node)
return node
class Node:
def __init__(self, inputs=None):
self.inputs = inputs or list()
def forward(self, variables:dict):
pass
def backward(self, grad:dict, tape:list):
for n in self.inputs:
if n not in grad:
grad[n] = numpy.atleast_2d(numpy.zeros_like(tape[n], dtype=numpy.float))
class InputNode(Node):
def __init__(self):
super().__init__()
def forward(self, variables):
assert self in variables, f"Input not not specified in feed dictionary: {self}"
def backward(self, grad, tape):
super().backward(grad, tape)
class AddNode(Node):
def __init__(self, a, b):
super().__init__(inputs=[a, b])
def forward(self, variables):
accumulator = 0
for n in self.input:
accumulator += variables[n]
variables[self] = accumulator
def backward(self, grad, tape):
super().backward(grad, tape)
for idx in self.inputs:
grad[idx] += grad[self]
class SubtractNode(Node):
def __init__(self, a, b):
super().__init__(inputs=[a, b])
def forward(self, variables):
variables[self] = variables[self.inputs[0]] - variables[self.inputs[1]]
def backward(self, grad, tape):
super().backward(grad, tape)
grad[self.inputs[0]] += grad[self]
grad[self.inputs[1]] -= grad[self]
class ElementMultiplyNode(Node):
def __init__(self, a, b):
super().__init__(inputs=[a, b])
def forward(self, variables):
variables[self] = variables[self.inputs[0]] * variables[self.inputs[1]]
def backward(self, grad, tape):
super().backward(grad, tape)
grad[self.inputs[0]] += grad[self]*tape[self.inputs[1]]
grad[self.inputs[1]] += grad[self]*tape[self.inputs[0]]
class MatrixMultiplyNode(Node):
def __init__(self, a, b):
super().__init__(inputs=[a, b])
def forward(self, variables):
variables[self] = numpy.dot(variables[self.inputs[0]], variables[self.inputs[1]])
def backward(self, grad, tape):
super().backward(grad, tape)
grad[self.inputs[0]] += numpy.dot(numpy.atleast_2d(grad[self]), numpy.atleast_2d(tape[self.inputs[1]]).T)
grad[self.inputs[1]] += numpy.dot(numpy.atleast_2d(tape[self.inputs[0]]).T, numpy.atleast_2d(grad[self]))
class ReluNode(Node):
def __init__(self, a):
super().__init__(inputs=[a])
def forward(self, variables):
variables[self] = variables[self.inputs[0]] * (variables[self.inputs[0]] > 0)
def backward(self, grad, tape):
super().backward(grad, tape)
grad[self.inputs[0]] += (tape[self] < 0)*(0.0001*grad[self]) + ((tape[self] > 0)*tape[self]*grad[self])
###
class ConstantNode(Node):
def __init__(self, value):
super().__init__()
self.value = value
def forward(self, variables):
variables[self] = self.value
def backward(self, grad, tape):
grad[self] = numpy.zeros_like(self.value)
# One hidden layer, one input (of shape 1x2), and one output (of shape 1x1).
g = Graph()
visible = g.add_node(InputNode())
input_to_hidden = g.add_node(InputNode())
hidden_to_output = g.add_node(InputNode())
target = g.add_node(InputNode())
hidden = g.add_node(MatrixMultiplyNode(visible, input_to_hidden))
hidden_act = g.add_node(ReluNode(hidden))
output = g.add_node(MatrixMultiplyNode(hidden_act, hidden_to_output))
diff = g.add_node(SubtractNode(target, output))
loss = g.add_node(ElementMultiplyNode(diff, diff))
# Init weights
input_to_hidden_weights = numpy.random.uniform(low=-1, high=1, size=(2, 5))
hidden_to_output_weights = numpy.random.uniform(low=-1, high=1, size=(5, 1))
loss_accum = 0
for step in range(5000):
x, y = random.choice([
(numpy.asarray([0, 0]), numpy.asarray([0])),
(numpy.asarray([0, 1]), numpy.asarray([1])),
(numpy.asarray([1, 0]), numpy.asarray([1])),
(numpy.asarray([1, 1]), numpy.asarray([0])),
#(numpy.asarray([[0, 0], [0, 1], [1, 0], [1, 1]]), (numpy.asarray([[0], [1], [1], [0]])))
])
feed = {visible: x, input_to_hidden:input_to_hidden_weights, hidden_to_output:hidden_to_output_weights, target: y}
step_loss = g.forward(feed, output_node=loss)
loss_accum += step_loss
if step % 1000 == 0:
print(loss_accum / 1000)
loss_accum = 0.0
grad = g.backward(output_node=loss, starting_grad=numpy.asarray([1.0])) # Want to move in the opposite direction.
input_to_hidden_weights -= grad[input_to_hidden]*0.001
hidden_to_output_weights -= grad[hidden_to_output]*0.001
#zero_and_zero = g.forward({visible: numpy.asarray([0, 0]), input_to_hidden:input_to_hidden_weights, hidden_to_output:hidden_to_output_weights, target:0}, output_node=output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment