Created
April 4, 2018 03:22
-
-
Save JiaweiZhuang/bfc15f5c00c529bc12072c0895d762ab to your computer and use it in GitHub Desktop.
Benchmark NN forward path, numpy vs Pytorch vs Fortran
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Ref: http://pytorch.org/tutorials/beginner/pytorch_with_examples.html" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%matplotlib inline\n", | |
"import matplotlib.pyplot as plt\n", | |
"import numpy as np\n", | |
"\n", | |
"import torch\n", | |
"from torch.autograd import Variable\n", | |
"from torch import nn" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Numpy" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# N is batch size; D_in is input dimension;\n", | |
"# H is hidden dimension; D_out is output dimension.\n", | |
"N, D_in, H, D_out = 5000, 200, 200, 200\n", | |
"\n", | |
"# Create random input and output data\n", | |
"x = np.random.randn(N, D_in)\n", | |
"y = np.random.randn(N, D_out)\n", | |
"\n", | |
"# Randomly initialize weights\n", | |
"w1 = np.random.randn(D_in, H)\n", | |
"w2 = np.random.randn(H, D_out)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"17.1 ms ± 326 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"# Forward pass: compute predicted y\n", | |
"h = x.dot(w1)\n", | |
"h_relu = np.maximum(h, 0)\n", | |
"y_pred = h_relu.dot(w2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Pytorch hand coded" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dtype = torch.DoubleTensor # to be consistent with numpy and Fortran\n", | |
"#dtype = torch.FloatTensor # faster\n", | |
"\n", | |
"# Create random input and output data\n", | |
"x = torch.randn(N, D_in).type(dtype)\n", | |
"y = torch.randn(N, D_out).type(dtype)\n", | |
"\n", | |
"# Randomly initialize weights\n", | |
"w1 = torch.randn(D_in, H).type(dtype)\n", | |
"w2 = torch.randn(H, D_out).type(dtype)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"12.6 ms ± 972 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"# Forward pass: compute predicted y\n", | |
"h = x.mm(w1)\n", | |
"h_relu = h.clamp(min=0)\n", | |
"y_pred = h_relu.mm(w2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# use Variable instead of Tensor\n", | |
"x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)\n", | |
"y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)\n", | |
"\n", | |
"w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True)\n", | |
"w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"17.2 ms ± 675 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"# Forward pass: compute predicted y\n", | |
"h = x.mm(w1)\n", | |
"h_relu = h.clamp(min=0)\n", | |
"y_pred = h_relu.mm(w2)\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Pytorch NN module" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Sequential(\n", | |
" (0): Linear(in_features=200, out_features=200)\n", | |
" (1): ReLU()\n", | |
" (2): Linear(in_features=200, out_features=200)\n", | |
")" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model = torch.nn.Sequential(\n", | |
" torch.nn.Linear(D_in, H, bias=False),\n", | |
" torch.nn.ReLU(),\n", | |
" torch.nn.Linear(H, D_out, bias=False),\n", | |
")\n", | |
"\n", | |
"model.double() # cast parameters to double" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"15.9 ms ± 387 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"y_pred = model(x)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[torch.Size([200, 200]), torch.Size([200, 200])]" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# make sure no bias term\n", | |
"[p.shape for p in model.parameters()]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Fortran" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Overwriting neural_net.F90\n" | |
] | |
} | |
], | |
"source": [ | |
"%%file neural_net.F90\n", | |
"\n", | |
"PROGRAM MxM\n", | |
" implicit none\n", | |
" integer :: N_loop = 50, i_loop=0 ! for timing\n", | |
" real :: timeStart, timeEnd, timeTotal=0.0 \n", | |
" integer, parameter :: N = 5000, D_in = 200, H = 200, D_out = 200\n", | |
" real, dimension(N, D_in) :: x=1.0\n", | |
" real, dimension(N, D_out) :: y=0.0\n", | |
" real, dimension(D_in, H) :: w1=1.0\n", | |
" real, dimension(H, D_out) :: w2=1.0\n", | |
" real, dimension(N, H) :: a1=0.0 ! intermediate result\n", | |
"\n", | |
" do i_loop=1,N_loop\n", | |
" call cpu_time(time=timeStart)\n", | |
"\n", | |
" a1 = MATMUL(x, w1)\n", | |
" a1 = max(a1, 0.0) ! RELU\n", | |
" y = MATMUL(a1, w2)\n", | |
"\n", | |
" call cpu_time(time=timeEnd)\n", | |
" timeTotal = timeTotal + (timeEnd-timeStart)\n", | |
" enddo\n", | |
"\n", | |
" print*, 'First element: ', y(1,1)\n", | |
" print*, \"Time use: \", timeTotal/N_loop*1000.0, \"ms\"\n", | |
"\n", | |
"END PROGRAM" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"!gfortran -O3 -march=native -ffast-math neural_net.F90 -o neural_net.exe" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" First element: 40000.0000 \n", | |
" Time use: 21.9086151 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"!./neural_net.exe" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment