Last active
July 11, 2017 10:40
-
-
Save shouya/87fd62cd9bc8ef9a3176a1b634b2acf6 to your computer and use it in GitHub Desktop.
plain numpy, torch.autograd and torch.nn implementation of a simple nn
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# N is batch size; D_in is input dimension;\n", | |
"# H is hidden dimension; D_out is output dimension.\n", | |
"N, D_in, H, D_out = 6, 3, 10, 1\n", | |
"\n", | |
"w_1 = np.random.randn(D_in, H)\n", | |
"h_1 = np.random.randn(1, H)\n", | |
"w_2 = np.random.randn(H, D_out)\n", | |
"h_2 = np.random.randn(1, D_out)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"learning_rate = 1e-6\n", | |
"# X: N x D_in\n", | |
"# y: N x D_out\n", | |
"def train(X, y):\n", | |
" global w_1, h_1, w_2, h_2, learning_rate\n", | |
" X, y = np.array(X), np.array(y)\n", | |
" \n", | |
" # forward\n", | |
" m = X.dot(w_1) + h_1 # m: N x H\n", | |
" m_relu = np.maximum(m, 0) # m_relu: N x H\n", | |
" p = m_relu.dot(w_2) + h_2 # p: N x D_out\n", | |
" \n", | |
" # loss = Sum((y - p)^2)\n", | |
" loss = np.sum(np.square(y - p))\n", | |
" \n", | |
" grad_p = 2.0 * (p - y) # N x D_out\n", | |
" grad_w_2 = m_relu.T.dot(grad_p) # H x D_out\n", | |
" grad_h_2 = np.ones([1,N]).dot(grad_p) # 1 x D_out\n", | |
" grad_m_relu = grad_p.dot(w_2.T) # N x H\n", | |
" grad_m = grad_m_relu.copy()\n", | |
" grad_m[m < 0] = 0 # N x H\n", | |
" grad_w_1 = X.T.dot(grad_m) # D_in x H\n", | |
" grad_h_1 = np.ones([1,N]).dot(grad_m) # 1 x H\n", | |
" \n", | |
" w_1 -= grad_w_1 * learning_rate\n", | |
" h_1 -= grad_h_1 * learning_rate\n", | |
" w_2 -= grad_w_2 * learning_rate\n", | |
" h_2 -= grad_h_2 * learning_rate\n", | |
" \n", | |
" return (p, loss)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0 170.107147598\n", | |
"1000 40.8882320157\n", | |
"2000 15.4625862613\n", | |
"3000 7.59400657593\n", | |
"4000 4.80639618412\n", | |
"5000 3.25362955576\n", | |
"6000 2.53836435786\n", | |
"7000 2.16317548503\n", | |
"8000 1.92283008973\n", | |
"9000 1.75861723377\n", | |
"10000 1.63850458144\n", | |
"11000 1.54485930858\n", | |
"12000 1.46783602095\n", | |
"13000 1.40181951668\n", | |
"14000 1.3435117832\n", | |
"15000 1.28310906035\n", | |
"16000 1.21733002681\n", | |
"17000 1.15698675267\n", | |
"18000 1.10122366141\n", | |
"19000 1.04940541193\n", | |
"20000 1.00104182762\n", | |
"21000 0.95574279473\n", | |
"22000 0.91319013619\n", | |
"23000 0.873119322178\n", | |
"24000 0.835307053419\n", | |
"25000 0.799562480248\n", | |
"26000 0.765720766615\n", | |
"27000 0.73363823201\n", | |
"28000 0.703188598995\n", | |
"29000 0.674260043442\n", | |
"30000 0.646752844883\n", | |
"31000 0.620577495877\n", | |
"32000 0.595653168544\n", | |
"33000 0.571906462557\n", | |
"34000 0.549270377018\n", | |
"35000 0.527683461653\n", | |
"36000 0.507089112432\n", | |
"37000 0.487434984025\n", | |
"38000 0.468672497126\n", | |
"39000 0.450756423128\n", | |
"40000 0.433644532035\n", | |
"41000 0.417297292311\n", | |
"42000 0.4016776135\n", | |
"43000 0.386750624238\n", | |
"44000 0.372483479649\n", | |
"45000 0.358845193253\n", | |
"46000 0.346616725173\n", | |
"47000 0.335556267132\n", | |
"48000 0.324956038611\n", | |
"49000 0.314792557367\n" | |
] | |
} | |
], | |
"source": [ | |
"for i in range(50000):\n", | |
" data = [\n", | |
" (1, [1,2,3]),\n", | |
" (1, [1,2,4]),\n", | |
" (1, [2,3,4]),\n", | |
" (0, [1,4,2]),\n", | |
" (0, [2,4,3]),\n", | |
" (0, [5,3,1])\n", | |
" ]\n", | |
" res, loss = train([x[1] for x in data], [[x[0]] for x in data])\n", | |
" if i % 1000 == 0:\n", | |
" print(i, loss)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See http://pytorch.org/tutorials/beginner/pytorch_with_examples.html.