Created
February 7, 2025 17:31
-
-
Save danyaljj/072a01ba68aa9720ea937a8da8c9557b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Tensor: | |
def __init__(self, value, requires_grad=False): | |
self.value = value # Store the value of the tensor | |
self.grad = 0 # Gradient initialized to zero | |
self.requires_grad = requires_grad | |
self._backward = lambda: None # Function to compute gradient | |
self._prev = set() # Track previous nodes for backpropagation | |
def backward(self): | |
"""Computes gradients using reverse-mode automatic differentiation.""" | |
# Initialize gradient to 1 if this is the final output node | |
if self.grad == 0: | |
self.grad = 1 | |
# Topological ordering of nodes using depth-first search | |
topo_order = [] | |
visited = set() | |
def build_topo(node): | |
if node not in visited: | |
visited.add(node) | |
for parent in node._prev: | |
build_topo(parent) | |
topo_order.append(node) | |
build_topo(self) | |
# Reverse iterate for backpropagation | |
for node in reversed(topo_order): | |
node._backward() | |
def __repr__(self): | |
return f"Tensor(value={self.value}, grad={self.grad})" | |
# Define operations | |
def add(a, b): | |
"""Addition operation.""" | |
out = Tensor( | |
a.value + b.value, | |
requires_grad=(a.requires_grad or b.requires_grad) | |
) | |
def _backward(): | |
if a.requires_grad: | |
a.grad += out.grad | |
if b.requires_grad: | |
b.grad += out.grad | |
out._backward = _backward | |
out._prev = {a, b} | |
return out | |
def multiply(a, b): | |
"""Multiplication operation.""" | |
out = Tensor( | |
a.value * b.value, | |
requires_grad=(a.requires_grad or b.requires_grad) | |
) | |
def _backward(): | |
if a.requires_grad: | |
a.grad += b.value * out.grad | |
if b.requires_grad: | |
b.grad += a.value * out.grad | |
out._backward = _backward | |
out._prev = {a, b} | |
return out | |
def relu(x): | |
"""ReLU activation function.""" | |
out = Tensor(max(0, x.value), requires_grad=x.requires_grad) | |
def _backward(): | |
if x.requires_grad: | |
x.grad += (out.value > 0) * out.grad # Gradient is 1 if x > 0, else 0 | |
out._backward = _backward | |
out._prev = {x} | |
return out | |
# Example computation graph | |
x = Tensor(3.0, requires_grad=True) | |
y = Tensor(2.0, requires_grad=True) | |
# Forward pass | |
z = multiply(x, y) # z = x * y | |
w = add(z, y) # w = z + y | |
o = relu(w) # o = ReLU(w) | |
# Backpropagation | |
o.backward() | |
# Print gradients | |
print(f"x.grad: {x.grad}") # Should be y.value if ReLU is active | |
print(f"y.grad: {y.grad}") # Should be x.value + 1 (from addition) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment