JiaweiZhuang · April 4, 2018 03:22
diff --git a/nn_performance_py_fortran.ipynb b/nn_performance_py_fortran.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Ref: http://pytorch.org/tutorials/beginner/pytorch_with_examples.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "import torch\n",
    "from torch.autograd import Variable\n",
    "from torch import nn"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Numpy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# N is batch size; D_in is input dimension;\n",
    "# H is hidden dimension; D_out is output dimension.\n",
    "N, D_in, H, D_out = 5000, 200, 200, 200\n",
    "\n",
    "# Create random input and output data\n",
    "x = np.random.randn(N, D_in)\n",
    "y = np.random.randn(N, D_out)\n",
    "\n",
    "# Randomly initialize weights\n",
    "w1 = np.random.randn(D_in, H)\n",
    "w2 = np.random.randn(H, D_out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "17.1 ms ± 326 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "# Forward pass: compute predicted y\n",
    "h = x.dot(w1)\n",
    "h_relu = np.maximum(h, 0)\n",
    "y_pred = h_relu.dot(w2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pytorch hand coded"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "dtype = torch.DoubleTensor # to be consistent with numpy and Fortran\n",
    "#dtype = torch.FloatTensor # faster\n",
    "\n",
    "# Create random input and output data\n",
    "x = torch.randn(N, D_in).type(dtype)\n",
    "y = torch.randn(N, D_out).type(dtype)\n",
    "\n",
    "# Randomly initialize weights\n",
    "w1 = torch.randn(D_in, H).type(dtype)\n",
    "w2 = torch.randn(H, D_out).type(dtype)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "12.6 ms ± 972 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "# Forward pass: compute predicted y\n",
    "h = x.mm(w1)\n",
    "h_relu = h.clamp(min=0)\n",
    "y_pred = h_relu.mm(w2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# use Variable instead of Tensor\n",
    "x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)\n",
    "y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)\n",
    "\n",
    "w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True)\n",
    "w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "17.2 ms ± 675 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "# Forward pass: compute predicted y\n",
    "h = x.mm(w1)\n",
    "h_relu = h.clamp(min=0)\n",
    "y_pred = h_relu.mm(w2)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pytorch NN module"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Sequential(\n",
       "  (0): Linear(in_features=200, out_features=200)\n",
       "  (1): ReLU()\n",
       "  (2): Linear(in_features=200, out_features=200)\n",
       ")"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = torch.nn.Sequential(\n",
    "    torch.nn.Linear(D_in, H, bias=False),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(H, D_out, bias=False),\n",
    ")\n",
    "\n",
    "model.double() # cast parameters to double"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "15.9 ms ± 387 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "y_pred = model(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[torch.Size([200, 200]), torch.Size([200, 200])]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# make sure no bias term\n",
    "[p.shape for p in model.parameters()]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Fortran"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting neural_net.F90\n"
     ]
    }
   ],
   "source": [
    "%%file neural_net.F90\n",
    "\n",
    "PROGRAM MxM\n",
    "  implicit none\n",
    "  integer :: N_loop = 50, i_loop=0 ! for timing\n",
    "  real :: timeStart, timeEnd, timeTotal=0.0 \n",
    "  integer, parameter :: N = 5000, D_in = 200, H = 200, D_out = 200\n",
    "  real, dimension(N, D_in) :: x=1.0\n",
    "  real, dimension(N, D_out) :: y=0.0\n",
    "  real, dimension(D_in, H) :: w1=1.0\n",
    "  real, dimension(H, D_out) :: w2=1.0\n",
    "  real, dimension(N, H) :: a1=0.0 ! intermediate result\n",
    "\n",
    "  do i_loop=1,N_loop\n",
    "      call cpu_time(time=timeStart)\n",
    "\n",
    "      a1 = MATMUL(x, w1)\n",
    "      a1 = max(a1, 0.0) ! RELU\n",
    "      y = MATMUL(a1, w2)\n",
    "\n",
    "      call cpu_time(time=timeEnd)\n",
    "      timeTotal = timeTotal + (timeEnd-timeStart)\n",
    "  enddo\n",
    "\n",
    "  print*, 'First element: ', y(1,1)\n",
    "  print*, \"Time use: \", timeTotal/N_loop*1000.0, \"ms\"\n",
    "\n",
    "END PROGRAM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "!gfortran -O3 -march=native -ffast-math neural_net.F90 -o neural_net.exe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " First element:    40000.0000    \n",
      " Time use:    21.9086151     ms\n"
     ]
    }
   ],
   "source": [
    "!./neural_net.exe"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Ref: http://pytorch.org/tutorials/beginner/pytorch_with_examples.html"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"%matplotlib inline\n",
	"import matplotlib.pyplot as plt\n",
	"import numpy as np\n",
	"\n",
	"import torch\n",
	"from torch.autograd import Variable\n",
	"from torch import nn"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Numpy"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"# N is batch size; D_in is input dimension;\n",
	"# H is hidden dimension; D_out is output dimension.\n",
	"N, D_in, H, D_out = 5000, 200, 200, 200\n",
	"\n",
	"# Create random input and output data\n",
	"x = np.random.randn(N, D_in)\n",
	"y = np.random.randn(N, D_out)\n",
	"\n",
	"# Randomly initialize weights\n",
	"w1 = np.random.randn(D_in, H)\n",
	"w2 = np.random.randn(H, D_out)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"17.1 ms ± 326 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"# Forward pass: compute predicted y\n",
	"h = x.dot(w1)\n",
	"h_relu = np.maximum(h, 0)\n",
	"y_pred = h_relu.dot(w2)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Pytorch hand coded"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"dtype = torch.DoubleTensor # to be consistent with numpy and Fortran\n",
	"#dtype = torch.FloatTensor # faster\n",
	"\n",
	"# Create random input and output data\n",
	"x = torch.randn(N, D_in).type(dtype)\n",
	"y = torch.randn(N, D_out).type(dtype)\n",
	"\n",
	"# Randomly initialize weights\n",
	"w1 = torch.randn(D_in, H).type(dtype)\n",
	"w2 = torch.randn(H, D_out).type(dtype)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"12.6 ms ± 972 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"# Forward pass: compute predicted y\n",
	"h = x.mm(w1)\n",
	"h_relu = h.clamp(min=0)\n",
	"y_pred = h_relu.mm(w2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"# use Variable instead of Tensor\n",
	"x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)\n",
	"y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)\n",
	"\n",
	"w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True)\n",
	"w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"17.2 ms ± 675 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"# Forward pass: compute predicted y\n",
	"h = x.mm(w1)\n",
	"h_relu = h.clamp(min=0)\n",
	"y_pred = h_relu.mm(w2)\n"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Pytorch NN module"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"Sequential(\n",
	" (0): Linear(in_features=200, out_features=200)\n",
	" (1): ReLU()\n",
	" (2): Linear(in_features=200, out_features=200)\n",
	")"
	]
	},
	"execution_count": 8,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"model = torch.nn.Sequential(\n",
	" torch.nn.Linear(D_in, H, bias=False),\n",
	" torch.nn.ReLU(),\n",
	" torch.nn.Linear(H, D_out, bias=False),\n",
	")\n",
	"\n",
	"model.double() # cast parameters to double"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"15.9 ms ± 387 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"y_pred = model(x)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[torch.Size([200, 200]), torch.Size([200, 200])]"
	]
	},
	"execution_count": 10,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# make sure no bias term\n",
	"[p.shape for p in model.parameters()]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Fortran"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Overwriting neural_net.F90\n"
	]
	}
	],
	"source": [
	"%%file neural_net.F90\n",
	"\n",
	"PROGRAM MxM\n",
	" implicit none\n",
	" integer :: N_loop = 50, i_loop=0 ! for timing\n",
	" real :: timeStart, timeEnd, timeTotal=0.0 \n",
	" integer, parameter :: N = 5000, D_in = 200, H = 200, D_out = 200\n",
	" real, dimension(N, D_in) :: x=1.0\n",
	" real, dimension(N, D_out) :: y=0.0\n",
	" real, dimension(D_in, H) :: w1=1.0\n",
	" real, dimension(H, D_out) :: w2=1.0\n",
	" real, dimension(N, H) :: a1=0.0 ! intermediate result\n",
	"\n",
	" do i_loop=1,N_loop\n",
	" call cpu_time(time=timeStart)\n",
	"\n",
	" a1 = MATMUL(x, w1)\n",
	" a1 = max(a1, 0.0) ! RELU\n",
	" y = MATMUL(a1, w2)\n",
	"\n",
	" call cpu_time(time=timeEnd)\n",
	" timeTotal = timeTotal + (timeEnd-timeStart)\n",
	" enddo\n",
	"\n",
	" print*, 'First element: ', y(1,1)\n",
	" print, \"Time use: \", timeTotal/N_loop1000.0, \"ms\"\n",
	"\n",
	"END PROGRAM"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [],
	"source": [
	"!gfortran -O3 -march=native -ffast-math neural_net.F90 -o neural_net.exe"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" First element: 40000.0000 \n",
	" Time use: 21.9086151 ms\n"
	]
	}
	],
	"source": [
	"!./neural_net.exe"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}