Last active
November 5, 2021 14:01
-
-
Save pinzhenx/590495038d2d7fe1f95fe1d6f5f53cae to your computer and use it in GitHub Desktop.
conv_denorm.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "conv_denorm.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyOsmjhSqjY2tIyPQbnth8Wx", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/pinzhenx/590495038d2d7fe1f95fe1d6f5f53cae/conv_denorm.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "OL45Z0Y1Fl-m" | |
}, | |
"source": [ | |
"import torch\n", | |
"import torch.backends.mkldnn" | |
], | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "G6JfcXjeFrHk", | |
"outputId": "d8a53f47-f208-4ebd-9747-11675467f6e8" | |
}, | |
"source": [ | |
"print(torch.__version__)\n", | |
"print(torch.__config__.show())" | |
], | |
"execution_count": 76, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"1.9.0+cu111\n", | |
"PyTorch built with:\n", | |
" - GCC 7.3\n", | |
" - C++ Version: 201402\n", | |
" - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications\n", | |
" - Intel(R) MKL-DNN v2.1.2 (Git Hash 98be7e8afa711dc9b66c8ff3504129cb82013cdb)\n", | |
" - OpenMP 201511 (a.k.a. OpenMP 4.5)\n", | |
" - NNPACK is enabled\n", | |
" - CPU capability usage: AVX2\n", | |
" - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.1, CUDNN_VERSION=8.0.5, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, \n", | |
"\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ax9oA67UFtUG" | |
}, | |
"source": [ | |
"input = torch.ones(1, 3, 224, 224)\n", | |
"weight = torch.tensor([1e-42] * 27).view(1, 3, 3, 3)" | |
], | |
"execution_count": 45, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "l8Sk6oH2L9Tb" | |
}, | |
"source": [ | |
"# MKLDNN with default flush denormal flags\n", | |
"\n", | |
"Check the `CPU time avg` column. It uses *79ms* for each call. And the result does not flush to zero" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Z-1bcvR-GXrI", | |
"outputId": "700d4897-3762-44fc-9fc7-1b3a1f882f12" | |
}, | |
"source": [ | |
"with torch.profiler.profile() as p:\n", | |
" with torch.backends.mkldnn.flags(enabled=True):\n", | |
" for _ in range(50):\n", | |
" res = torch.conv2d(input, weight)\n", | |
"print(p.key_averages().table(row_limit=5, sort_by=\"cpu_time\"))\n", | |
"print(res)" | |
], | |
"execution_count": 73, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"---------------------------- ------------ ------------ ------------ ------------ ------------ ------------ \n", | |
" Name Self CPU % Self CPU CPU total % CPU total CPU time avg # of Calls \n", | |
"---------------------------- ------------ ------------ ------------ ------------ ------------ ------------ \n", | |
" aten::conv2d 0.01% 397.000us 100.00% 3.989s 79.771ms 50 \n", | |
" aten::convolution 0.01% 497.000us 99.99% 3.988s 79.763ms 50 \n", | |
" aten::_convolution 0.03% 1.294ms 99.98% 3.988s 79.753ms 50 \n", | |
" aten::mkldnn_convolution 99.91% 3.985s 99.95% 3.986s 79.728ms 50 \n", | |
" aten::empty 0.02% 958.000us 0.02% 958.000us 19.160us 50 \n", | |
"---------------------------- ------------ ------------ ------------ ------------ ------------ ------------ \n", | |
"Self CPU time total: 3.989s\n", | |
"\n", | |
"tensor([[[[2.7014e-41, 2.7014e-41, 2.7014e-41, ..., 2.7014e-41,\n", | |
" 2.7014e-41, 2.7014e-41],\n", | |
" [2.7014e-41, 2.7014e-41, 2.7014e-41, ..., 2.7014e-41,\n", | |
" 2.7014e-41, 2.7014e-41],\n", | |
" [2.7014e-41, 2.7014e-41, 2.7014e-41, ..., 2.7014e-41,\n", | |
" 2.7014e-41, 2.7014e-41],\n", | |
" ...,\n", | |
" [2.7014e-41, 2.7014e-41, 2.7014e-41, ..., 2.7014e-41,\n", | |
" 2.7014e-41, 2.7014e-41],\n", | |
" [2.7014e-41, 2.7014e-41, 2.7014e-41, ..., 2.7014e-41,\n", | |
" 2.7014e-41, 2.7014e-41],\n", | |
" [2.7014e-41, 2.7014e-41, 2.7014e-41, ..., 2.7014e-41,\n", | |
" 2.7014e-41, 2.7014e-41]]]])\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "ylYCmkH3Mjgy" | |
}, | |
"source": [ | |
"# THNN with default denormal flags\n", | |
"\n", | |
"THNN takes *12ms* and the default behavior is *not* flushing to zero." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "47ekfL35IAUS", | |
"outputId": "c52dfebf-413e-4536-8b93-398e18d175a0" | |
}, | |
"source": [ | |
"with torch.profiler.profile() as p:\n", | |
" with torch.backends.mkldnn.flags(enabled=False):\n", | |
" for _ in range(50):\n", | |
" res = torch.conv2d(input, weight)\n", | |
"print(p.key_averages().table(row_limit=5, sort_by=\"cpu_time\"))\n", | |
"print(res)" | |
], | |
"execution_count": 74, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"------------------------------ ------------ ------------ ------------ ------------ ------------ ------------ \n", | |
" Name Self CPU % Self CPU CPU total % CPU total CPU time avg # of Calls \n", | |
"------------------------------ ------------ ------------ ------------ ------------ ------------ ------------ \n", | |
" aten::conv2d 0.05% 310.000us 100.00% 644.729ms 12.895ms 50 \n", | |
" aten::convolution 0.06% 418.000us 99.95% 644.419ms 12.888ms 50 \n", | |
" aten::_convolution 0.14% 922.000us 99.89% 644.001ms 12.880ms 50 \n", | |
" aten::_convolution_nogroup 0.12% 794.000us 99.74% 643.079ms 12.862ms 50 \n", | |
" aten::thnn_conv2d 0.88% 5.691ms 99.60% 642.173ms 12.843ms 50 \n", | |
"------------------------------ ------------ ------------ ------------ ------------ ------------ ------------ \n", | |
"Self CPU time total: 644.729ms\n", | |
"\n", | |
"tensor([[[[2.7014e-41, 2.7014e-41, 2.7014e-41, ..., 2.7014e-41,\n", | |
" 2.7014e-41, 2.7014e-41],\n", | |
" [2.7014e-41, 2.7014e-41, 2.7014e-41, ..., 2.7014e-41,\n", | |
" 2.7014e-41, 2.7014e-41],\n", | |
" [2.7014e-41, 2.7014e-41, 2.7014e-41, ..., 2.7014e-41,\n", | |
" 2.7014e-41, 2.7014e-41],\n", | |
" ...,\n", | |
" [2.7014e-41, 2.7014e-41, 2.7014e-41, ..., 2.7014e-41,\n", | |
" 2.7014e-41, 2.7014e-41],\n", | |
" [2.7014e-41, 2.7014e-41, 2.7014e-41, ..., 2.7014e-41,\n", | |
" 2.7014e-41, 2.7014e-41],\n", | |
" [2.7014e-41, 2.7014e-41, 2.7014e-41, ..., 2.7014e-41,\n", | |
" 2.7014e-41, 2.7014e-41]]]])\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "LxZgmbJ5M1DL" | |
}, | |
"source": [ | |
"**Then we set flush denormal flag to `True`**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "zDpcNU9AKcvI", | |
"outputId": "91b92b6b-dfbf-413c-df38-3f41e9db6592" | |
}, | |
"source": [ | |
"torch.set_flush_denormal(True)\n" | |
], | |
"execution_count": 75, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 75 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "efuaC1KmNEMT" | |
}, | |
"source": [ | |
"# MKLDNN with flush denormal\n", | |
"\n", | |
"The time of is now down to *1.157ms*" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "k9GZ28UvM5u9", | |
"outputId": "31a43694-8c03-40bd-dc7b-a07e959ede2a" | |
}, | |
"source": [ | |
"with torch.profiler.profile() as p:\n", | |
" with torch.backends.mkldnn.flags(enabled=True):\n", | |
" for _ in range(50):\n", | |
" res = torch.conv2d(input, weight)\n", | |
"print(p.key_averages().table(row_limit=5, sort_by=\"cpu_time\"))\n", | |
"print(res)" | |
], | |
"execution_count": 80, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"---------------------------- ------------ ------------ ------------ ------------ ------------ ------------ \n", | |
" Name Self CPU % Self CPU CPU total % CPU total CPU time avg # of Calls \n", | |
"---------------------------- ------------ ------------ ------------ ------------ ------------ ------------ \n", | |
" aten::conv2d 0.28% 163.000us 100.00% 57.853ms 1.157ms 50 \n", | |
" aten::convolution 0.28% 164.000us 99.72% 57.690ms 1.154ms 50 \n", | |
" aten::_convolution 0.72% 415.000us 99.43% 57.526ms 1.151ms 50 \n", | |
" aten::mkldnn_convolution 98.14% 56.778ms 98.72% 57.111ms 1.142ms 50 \n", | |
" aten::empty 0.38% 222.000us 0.38% 222.000us 4.440us 50 \n", | |
"---------------------------- ------------ ------------ ------------ ------------ ------------ ------------ \n", | |
"Self CPU time total: 57.853ms\n", | |
"\n", | |
"tensor([[[[0., 0., 0., ..., 0., 0., 0.],\n", | |
" [0., 0., 0., ..., 0., 0., 0.],\n", | |
" [0., 0., 0., ..., 0., 0., 0.],\n", | |
" ...,\n", | |
" [0., 0., 0., ..., 0., 0., 0.],\n", | |
" [0., 0., 0., ..., 0., 0., 0.],\n", | |
" [0., 0., 0., ..., 0., 0., 0.]]]])\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "aA1LQXLfNbbG" | |
}, | |
"source": [ | |
"# THNN with flush denormal\n", | |
"\n", | |
"The time is also down to *1.314ms*" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "hzNRgzyaNJvm", | |
"outputId": "eaf38dd4-91dc-4de9-f0c4-34a794bba26e" | |
}, | |
"source": [ | |
"with torch.profiler.profile() as p:\n", | |
" with torch.backends.mkldnn.flags(enabled=False):\n", | |
" for _ in range(50):\n", | |
" res = torch.conv2d(input, weight)\n", | |
"print(p.key_averages().table(row_limit=5, sort_by=\"cpu_time\"))\n", | |
"print(res)" | |
], | |
"execution_count": 81, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"------------------------------ ------------ ------------ ------------ ------------ ------------ ------------ \n", | |
" Name Self CPU % Self CPU CPU total % CPU total CPU time avg # of Calls \n", | |
"------------------------------ ------------ ------------ ------------ ------------ ------------ ------------ \n", | |
" aten::conv2d 0.21% 137.000us 100.00% 65.706ms 1.314ms 50 \n", | |
" aten::convolution 0.23% 149.000us 99.79% 65.569ms 1.311ms 50 \n", | |
" aten::_convolution 0.47% 310.000us 99.56% 65.420ms 1.308ms 50 \n", | |
" aten::_convolution_nogroup 0.51% 338.000us 99.09% 65.110ms 1.302ms 50 \n", | |
" aten::thnn_conv2d 4.95% 3.254ms 98.52% 64.736ms 1.295ms 50 \n", | |
"------------------------------ ------------ ------------ ------------ ------------ ------------ ------------ \n", | |
"Self CPU time total: 65.706ms\n", | |
"\n", | |
"tensor([[[[0., 0., 0., ..., 0., 0., 0.],\n", | |
" [0., 0., 0., ..., 0., 0., 0.],\n", | |
" [0., 0., 0., ..., 0., 0., 0.],\n", | |
" ...,\n", | |
" [0., 0., 0., ..., 0., 0., 0.],\n", | |
" [0., 0., 0., ..., 0., 0., 0.],\n", | |
" [0., 0., 0., ..., 0., 0., 0.]]]])\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "tzSuhx2DNbA0" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment