danyaljj · February 7, 2025 17:31
diff --git a/simple_backprop.py b/simple_backprop.py
 class Tensor:
    def __init__(self, value, requires_grad=False):
        self.value = value  # Store the value of the tensor
        self.grad = 0  # Gradient initialized to zero
        self.requires_grad = requires_grad
        self._backward = lambda: None  # Function to compute gradient
        self._prev = set()  # Track previous nodes for backpropagation

    def backward(self):
        """Computes gradients using reverse-mode automatic differentiation."""
        # Initialize gradient to 1 if this is the final output node
        if self.grad == 0:
            self.grad = 1

        # Topological ordering of nodes using depth-first search
        topo_order = []
        visited = set()

        def build_topo(node):
            if node not in visited:
                visited.add(node)
                for parent in node._prev:
                    build_topo(parent)
                topo_order.append(node)

        build_topo(self)

        # Reverse iterate for backpropagation
        for node in reversed(topo_order):
            node._backward()

    def __repr__(self):
        return f"Tensor(value={self.value}, grad={self.grad})"

 # Define operations
 def add(a, b):
    """Addition operation."""
    out = Tensor(
        a.value + b.value, 
        requires_grad=(a.requires_grad or b.requires_grad)
    )

    def _backward():
        if a.requires_grad:
            a.grad += out.grad
        if b.requires_grad:
            b.grad += out.grad

    out._backward = _backward
    out._prev = {a, b}
    return out

 def multiply(a, b):
    """Multiplication operation."""
    out = Tensor(
        a.value * b.value, 
        requires_grad=(a.requires_grad or b.requires_grad)
    )

    def _backward():
        if a.requires_grad:
            a.grad += b.value * out.grad
        if b.requires_grad:
            b.grad += a.value * out.grad

    out._backward = _backward
    out._prev = {a, b}
    return out

 def relu(x):
    """ReLU activation function."""
    out = Tensor(max(0, x.value), requires_grad=x.requires_grad)

    def _backward():
        if x.requires_grad:
            x.grad += (out.value > 0) * out.grad  # Gradient is 1 if x > 0, else 0

    out._backward = _backward
    out._prev = {x}
    return out

 # Example computation graph
 x = Tensor(3.0, requires_grad=True)
 y = Tensor(2.0, requires_grad=True)

 # Forward pass
 z = multiply(x, y)  # z = x * y
 w = add(z, y)       # w = z + y
 o = relu(w)         # o = ReLU(w)

 # Backpropagation
 o.backward()

 # Print gradients
 print(f"x.grad: {x.grad}")  # Should be y.value if ReLU is active
 print(f"y.grad: {y.grad}")  # Should be x.value + 1 (from addition)
	class Tensor:
	def __init__(self, value, requires_grad=False):
	self.value = value # Store the value of the tensor
	self.grad = 0 # Gradient initialized to zero
	self.requires_grad = requires_grad
	self._backward = lambda: None # Function to compute gradient
	self._prev = set() # Track previous nodes for backpropagation

	def backward(self):
	"""Computes gradients using reverse-mode automatic differentiation."""
	# Initialize gradient to 1 if this is the final output node
	if self.grad == 0:
	self.grad = 1

	# Topological ordering of nodes using depth-first search
	topo_order = []
	visited = set()

	def build_topo(node):
	if node not in visited:
	visited.add(node)
	for parent in node._prev:
	build_topo(parent)
	topo_order.append(node)

	build_topo(self)

	# Reverse iterate for backpropagation
	for node in reversed(topo_order):
	node._backward()

	def __repr__(self):
	return f"Tensor(value={self.value}, grad={self.grad})"

	# Define operations
	def add(a, b):
	"""Addition operation."""
	out = Tensor(
	a.value + b.value,
	requires_grad=(a.requires_grad or b.requires_grad)
	)

	def _backward():
	if a.requires_grad:
	a.grad += out.grad
	if b.requires_grad:
	b.grad += out.grad

	out._backward = _backward
	out._prev = {a, b}
	return out

	def multiply(a, b):
	"""Multiplication operation."""
	out = Tensor(
	a.value * b.value,
	requires_grad=(a.requires_grad or b.requires_grad)
	)

	def _backward():
	if a.requires_grad:
	a.grad += b.value * out.grad
	if b.requires_grad:
	b.grad += a.value * out.grad

	out._backward = _backward
	out._prev = {a, b}
	return out

	def relu(x):
	"""ReLU activation function."""
	out = Tensor(max(0, x.value), requires_grad=x.requires_grad)

	def _backward():
	if x.requires_grad:
	x.grad += (out.value > 0) * out.grad # Gradient is 1 if x > 0, else 0

	out._backward = _backward
	out._prev = {x}
	return out

	# Example computation graph
	x = Tensor(3.0, requires_grad=True)
	y = Tensor(2.0, requires_grad=True)

	# Forward pass
	z = multiply(x, y) # z = x * y
	w = add(z, y) # w = z + y
	o = relu(w) # o = ReLU(w)

	# Backpropagation
	o.backward()

	# Print gradients
	print(f"x.grad: {x.grad}") # Should be y.value if ReLU is active
	print(f"y.grad: {y.grad}") # Should be x.value + 1 (from addition)