Skip to content

Instantly share code, notes, and snippets.

@apoorvnandan
Created August 25, 2024 08:00
Show Gist options
  • Select an option

  • Save apoorvnandan/ae98e11d9836d9b4bd5546bbf9218aea to your computer and use it in GitHub Desktop.

Select an option

Save apoorvnandan/ae98e11d9836d9b4bd5546bbf9218aea to your computer and use it in GitHub Desktop.
# inspired by micrograd and tinygrad
import numpy as np
class Tensor:
def __init__(self, data, name=''):
self.data = np.array(data)
self.grad = np.zeros_like(self.data)
self.parents = []
self.op = ''
self.name = name
def __str__(self):
return str(self.data)
def __repr__(self):
return str(self.data)
def backward(self, grad=None):
if grad is None:
grad = np.ones_like(self.data)
self.grad += grad
if self.op == 'sum':
assert len(self.parents) == 1
self.parents[0].backward(np.ones_like(self.parents[0].data) * grad)
elif self.op == 'add':
assert len(self.parents) == 2
self.parents[0].backward(grad)
self.parents[1].backward(grad)
elif self.op == 'mul':
assert len(self.parents) == 2
self.parents[0].backward(grad * self.parents[1].data)
self.parents[1].backward(grad * self.parents[0].data)
elif self.op == 'relu':
assert len(self.parents) == 1
self.parents[0].backward(grad * (self.data > 0))
elif self.op == 'matmul':
assert len(self.parents) == 2
self.parents[0].backward(np.dot(grad, self.parents[1].data.T))
self.parents[1].backward(np.dot(self.parents[0].data.T, grad))
elif self.op == 'mean':
assert len(self.parents) == 1
self.parents[0].backward(grad / self.data.size)
elif self.op == 'logsoftmax':
assert len(self.parents) == 1
softmax_output = np.exp(self.data)
grad_out = grad - softmax_output * np.sum(grad, axis=1).reshape((-1,1))
self.parents[0].backward(grad_out)
def add(a, b):
c = Tensor(a.data + b.data)
c.parents = [a, b]
c.op = 'add'
return c
def sum(a):
c = Tensor(np.sum(a.data))
c.parents = [a]
c.op = 'sum'
return c
def mul(a, b):
c = Tensor(a.data * b.data)
c.parents = [a, b]
c.op = 'mul'
return c
def matmul(a,b):
c = Tensor(np.dot(a.data, b.data))
c.parents = [a,b]
c.op = 'matmul'
return c
def relu(a):
c = Tensor(np.maximum(a.data, 0))
c.parents = [a]
c.op = 'relu'
return c
def mean(a):
c = Tensor(np.mean(a.data))
c.parents = [a]
c.op = 'mean'
return c
def logsoftmax(a):
max_vals = np.max(a.data, axis=1, keepdims=True)
exp_a = np.exp(a.data - max_vals)
sum_exp_a = np.sum(exp_a, axis=1, keepdims=True)
c = Tensor(a.data - max_vals - np.log(sum_exp_a))
c.parents = [a]
c.op = 'logsoftmax'
return c
# Test the implementation by training on MNIST
from tensorflow.keras.datasets import mnist
# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = (x_train / 255).astype(np.float32)
x_test = (x_test / 255).astype(np.float32)
def uniform_init_matrix(n,m):
return np.random.uniform(-1., 1., size=(n,m))/np.sqrt(n*m).astype(np.float32)
class TinyNet:
def __init__(self):
self.w1 = Tensor(uniform_init_matrix(784,128), name='w1')
self.w2 = Tensor(uniform_init_matrix(128,10), name='w2')
def forward(self, x):
x1 = matmul(x, self.w1)
x2 = relu(x1)
x3 = matmul(x2, self.w2)
o = logsoftmax(x3)
return o
model = TinyNet()
lr = 0.005
BS = 128
losses = []
for i in range(1000):
samp = np.random.randint(0, x_train.data.shape[0], size=(BS))
x = Tensor(x_train[samp].reshape(-1,28*28), name='x')
y_samp = y_train[samp]
y = np.zeros((len(samp),10), np.float32)
y[range(y.shape[0]),y_samp] = -1.0
y = Tensor(y, name='y')
out = model.forward(x)
out_mul_y = mul(out, y)
loss = mean(out_mul_y)
loss.backward()
losses.append(loss.data)
if i % 100 == 0:
print('batch',i,'loss',loss.data)
model.w1.data = model.w1.data - lr * model.w1.grad
model.w2.data = model.w2.data - lr * model.w2.grad
model.w1.grad = np.zeros_like(model.w1.grad)
model.w2.grad = np.zeros_like(model.w2.grad)
# test
preds_out = model.forward(Tensor(x_test.reshape(-1,28*28)))
preds = np.argmax(preds_out.data, axis=1)
print('test accuracy', np.mean(preds == y_test))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment