Last active
March 30, 2023 09:29
-
-
Save arunmallya/0e340cbc79c4f9545f97bf10d040cb65 to your computer and use it in GitHub Desktop.
A Jupyter notebook to get the receptive field and effective stride of layers in a CNN. Supports dilated convolutions as well.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import math\n", | |
"import copy\n", | |
"from __future__ import division\n", | |
"\n", | |
"def layer_params(layer):\n", | |
" s = layer['s'] if 's' in layer else 1\n", | |
" d = layer['d'] if 'd' in layer else 1\n", | |
" k = layer['k']\n", | |
" return k, s, d\n", | |
"\n", | |
"def output_size(i, k, s=1, p=0, d=1):\n", | |
" o = math.floor((i + 2 * p - d * (k - 1) - 1) / s) + 1\n", | |
" return int(o)\n", | |
" \n", | |
"def find_output_size(network, i=224):\n", | |
" net = copy.deepcopy(network)\n", | |
" for layer in net:\n", | |
" layer['i'] = i\n", | |
" o = output_size(**layer)\n", | |
" i = o\n", | |
" return o\n", | |
"\n", | |
"def find_receptive_field(net):\n", | |
" output = [] # (rf, effective stride)\n", | |
" for i in range(len(net)):\n", | |
" k, s, d = layer_params(net[i])\n", | |
" if i == 0:\n", | |
" rf_p = 1\n", | |
" s_p = 1\n", | |
" else:\n", | |
" rf_p, s_p = output[i-1]\n", | |
" es = s * s_p\n", | |
" rf = rf_p + d * s_p * (k-1)\n", | |
" output.append((rf, es))\n", | |
" return output\n", | |
"\n", | |
"def find_full_info(network_with_names, input_size=224):\n", | |
" structure = network_with_names[0::2]\n", | |
" layer_names = network_with_names[1::2]\n", | |
" print '%-10s| %-16s| %-16s| %-16s' % ('Layer Name', 'Receptive Field', 'Effective Stride', 'Output Size')\n", | |
" print '-' * 59\n", | |
" print '%-10s| %-16s| %-16s| %-16s' % ('Input', '--', '--', str(input_size))\n", | |
" rf_s = find_receptive_field(structure)\n", | |
" for i in range(0, len(structure)):\n", | |
" print '%-10s| %-16d| %-16d| %-16d' % (\n", | |
" layer_names[i], rf_s[i][0], rf_s[i][1], find_output_size(structure[:i+1], input_size))\n", | |
" i = len(structure) - 1\n", | |
" return rf_s[i][0], rf_s[i][1], find_output_size(structure[:i+1], input_size)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1 Conv\n", | |
"Layer Name| Receptive Field | Effective Stride| Output Size \n", | |
"-----------------------------------------------------------\n", | |
"Input | -- | -- | 224 \n", | |
"conv1 | 3 | 1 | 222 \n" | |
] | |
} | |
], | |
"source": [ | |
"# Format: {\n", | |
"# 'k': kernel_size, \n", | |
"# 's': stride (default 1), \n", | |
"# 'p': padding (default 0), \n", | |
"# 'd': dilation (default 1)\n", | |
"# }\n", | |
"\n", | |
"print '1 Conv'\n", | |
"network = [\n", | |
" {'k': 3}, 'conv1',\n", | |
"]\n", | |
"find_full_info(network);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2 Conv + 1 Pool\n", | |
"Layer Name| Receptive Field | Effective Stride| Output Size \n", | |
"-----------------------------------------------------------\n", | |
"Input | -- | -- | 224 \n", | |
"conv1 | 3 | 1 | 222 \n", | |
"conv2 | 5 | 1 | 220 \n", | |
"pool1 | 6 | 2 | 110 \n" | |
] | |
} | |
], | |
"source": [ | |
"print '2 Conv + 1 Pool'\n", | |
"network = [\n", | |
" {'k': 3}, 'conv1',\n", | |
" {'k': 3}, 'conv2',\n", | |
" {'k': 2, 's': 2}, 'pool1',\n", | |
"]\n", | |
"find_full_info(network);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 65, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"AlexNet\n", | |
"Layer Name| Receptive Field | Effective Stride| Output Size \n", | |
"-----------------------------------------------------------\n", | |
"Input | -- | -- | 227 \n", | |
"conv1 | 11 | 4 | 55 \n", | |
"pool1 | 19 | 8 | 27 \n", | |
"conv2 | 51 | 8 | 27 \n", | |
"pool2 | 67 | 16 | 13 \n", | |
"conv3 | 99 | 16 | 13 \n", | |
"conv4 | 131 | 16 | 13 \n", | |
"conv5 | 163 | 16 | 13 \n", | |
"pool5 | 195 | 32 | 6 \n", | |
"fc6 | 355 | 32 | 1 \n", | |
"fc7 | 355 | 32 | 1 \n" | |
] | |
} | |
], | |
"source": [ | |
"print 'AlexNet'\n", | |
"network = [\n", | |
" {'k': 11, 's': 4, 'p': 0}, 'conv1',\n", | |
" {'k': 3, 's': 2, 'p': 0}, 'pool1',\n", | |
" {'k': 5, 's': 1, 'p': 2}, 'conv2',\n", | |
" {'k': 3, 's': 2, 'p': 0}, 'pool2',\n", | |
" {'k': 3, 's': 1, 'p': 1}, 'conv3',\n", | |
" {'k': 3, 's': 1, 'p': 1}, 'conv4',\n", | |
" {'k': 3, 's': 1, 'p': 1}, 'conv5',\n", | |
" {'k': 3, 's': 2, 'p': 0}, 'pool5',\n", | |
" {'k': 6}, 'fc6',\n", | |
" {'k': 1}, 'fc7',\n", | |
"]\n", | |
"find_full_info(network, input_size=227);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"VGG-16\n", | |
"Layer Name| Receptive Field | Effective Stride| Output Size \n", | |
"-----------------------------------------------------------\n", | |
"Input | -- | -- | 224 \n", | |
"conv1_1 | 3 | 1 | 224 \n", | |
"conv1_2 | 5 | 1 | 224 \n", | |
"pool1 | 6 | 2 | 112 \n", | |
"conv2_1 | 10 | 2 | 112 \n", | |
"conv2_2 | 14 | 2 | 112 \n", | |
"pool2 | 16 | 4 | 56 \n", | |
"conv3_1 | 24 | 4 | 56 \n", | |
"conv3_2 | 32 | 4 | 56 \n", | |
"conv3_3 | 40 | 4 | 56 \n", | |
"pool3 | 44 | 8 | 28 \n", | |
"conv4_1 | 60 | 8 | 28 \n", | |
"conv4_2 | 76 | 8 | 28 \n", | |
"conv4_3 | 92 | 8 | 28 \n", | |
"pool4 | 100 | 16 | 14 \n", | |
"conv5_1 | 132 | 16 | 14 \n", | |
"conv5_2 | 164 | 16 | 14 \n", | |
"conv5_3 | 196 | 16 | 14 \n", | |
"pool5 | 212 | 32 | 7 \n", | |
"fc6 | 404 | 32 | 1 \n", | |
"fc7 | 404 | 32 | 1 \n", | |
"fc8 | 404 | 32 | 1 \n" | |
] | |
} | |
], | |
"source": [ | |
"print 'VGG-16'\n", | |
"network = [\n", | |
" {'k': 3, 'p': 1}, 'conv1_1',\n", | |
" {'k': 3, 'p': 1}, 'conv1_2',\n", | |
" {'k': 2, 's': 2}, 'pool1',\n", | |
" {'k': 3, 'p': 1}, 'conv2_1',\n", | |
" {'k': 3, 'p': 1}, 'conv2_2',\n", | |
" {'k': 2, 's': 2}, 'pool2',\n", | |
" {'k': 3, 'p': 1}, 'conv3_1',\n", | |
" {'k': 3, 'p': 1}, 'conv3_2',\n", | |
" {'k': 3, 'p': 1}, 'conv3_3',\n", | |
" {'k': 2, 's': 2}, 'pool3',\n", | |
" {'k': 3, 'p': 1}, 'conv4_1',\n", | |
" {'k': 3, 'p': 1}, 'conv4_2',\n", | |
" {'k': 3, 'p': 1}, 'conv4_3',\n", | |
" {'k': 2, 's': 2}, 'pool4',\n", | |
" {'k': 3, 'p': 1}, 'conv5_1',\n", | |
" {'k': 3, 'p': 1}, 'conv5_2',\n", | |
" {'k': 3, 'p': 1}, 'conv5_3',\n", | |
" {'k': 2, 's': 2}, 'pool5',\n", | |
" {'k': 7}, 'fc6',\n", | |
" {'k': 1}, 'fc7',\n", | |
" {'k': 1}, 'fc8',\n", | |
"]\n", | |
"find_full_info(network);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 69, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"VGG-16 with Dilated Convs for Dense Prediction\n", | |
"\n", | |
"Pascal VOC front end\n", | |
"Layer Name| Receptive Field | Effective Stride| Output Size \n", | |
"-----------------------------------------------------------\n", | |
"Input | -- | -- | 900 \n", | |
"conv1_1 | 3 | 1 | 898 \n", | |
"conv1_2 | 5 | 1 | 896 \n", | |
"pool1 | 6 | 2 | 448 \n", | |
"conv2_1 | 10 | 2 | 446 \n", | |
"conv2_2 | 14 | 2 | 444 \n", | |
"pool2 | 16 | 4 | 222 \n", | |
"conv3_1 | 24 | 4 | 220 \n", | |
"conv3_2 | 32 | 4 | 218 \n", | |
"conv3_3 | 40 | 4 | 216 \n", | |
"pool3 | 44 | 8 | 108 \n", | |
"conv4_1 | 60 | 8 | 106 \n", | |
"conv4_2 | 76 | 8 | 104 \n", | |
"conv4_3 | 92 | 8 | 102 \n", | |
"conv5_1 | 124 | 8 | 98 \n", | |
"conv5_2 | 156 | 8 | 94 \n", | |
"conv5_3 | 188 | 8 | 90 \n", | |
"fc6 | 380 | 8 | 66 \n", | |
"fc7 | 380 | 8 | 66 \n", | |
"fc-final | 380 | 8 | 66 \n", | |
"\n", | |
"Context Aggregation Module\n", | |
"Layer Name| Receptive Field | Effective Stride| Output Size \n", | |
"-----------------------------------------------------------\n", | |
"Input | -- | -- | 66 \n", | |
"ct_conv1_1| 3 | 1 | 130 \n", | |
"ct_conv1_2| 5 | 1 | 128 \n", | |
"ct_conv2_1| 9 | 1 | 124 \n", | |
"ct_conv3_1| 17 | 1 | 116 \n", | |
"ct_conv4_1| 33 | 1 | 100 \n", | |
"ct_conv5_1| 65 | 1 | 68 \n", | |
"ct_fc1 | 67 | 1 | 66 \n", | |
"ct_final | 67 | 1 | 66 \n" | |
] | |
} | |
], | |
"source": [ | |
"# https://arxiv.org/pdf/1511.07122v3.pdf\n", | |
"# https://github.com/fyu/dilation/blob/master/models/dilation8_pascal_voc_deploy.prototxt\n", | |
"# No intermediate padding.\n", | |
"print 'VGG-16 with Dilated Convs for Dense Prediction\\n'\n", | |
"print 'Pascal VOC front end'\n", | |
"network = [\n", | |
" {'k': 3}, 'conv1_1',\n", | |
" {'k': 3}, 'conv1_2',\n", | |
" {'k': 2, 's': 2}, 'pool1',\n", | |
" {'k': 3}, 'conv2_1',\n", | |
" {'k': 3}, 'conv2_2',\n", | |
" {'k': 2, 's': 2}, 'pool2',\n", | |
" {'k': 3}, 'conv3_1',\n", | |
" {'k': 3}, 'conv3_2',\n", | |
" {'k': 3}, 'conv3_3',\n", | |
" {'k': 2, 's': 2}, 'pool3',\n", | |
" {'k': 3}, 'conv4_1',\n", | |
" {'k': 3}, 'conv4_2',\n", | |
" {'k': 3}, 'conv4_3',\n", | |
" {'k': 3, 'd': 2}, 'conv5_1',\n", | |
" {'k': 3, 'd': 2}, 'conv5_2',\n", | |
" {'k': 3, 'd': 2}, 'conv5_3',\n", | |
" {'k': 7, 'd': 4}, 'fc6',\n", | |
" {'k': 1}, 'fc7',\n", | |
" {'k': 1}, 'fc-final',\n", | |
"]\n", | |
"find_full_info(network, input_size=900);\n", | |
"print ''\n", | |
"\n", | |
"print 'Context Aggregation Module'\n", | |
"network = [\n", | |
" {'k': 3, 'p': 33}, 'ct_conv1_1',\n", | |
" {'k': 3}, 'ct_conv1_2',\n", | |
" {'k': 3, 'd': 2}, 'ct_conv2_1',\n", | |
" {'k': 3, 'd': 4}, 'ct_conv3_1',\n", | |
" {'k': 3, 'd': 8}, 'ct_conv4_1',\n", | |
" {'k': 3, 'd': 16}, 'ct_conv5_1',\n", | |
" {'k': 3}, 'ct_fc1',\n", | |
" {'k': 1}, 'ct_final',\n", | |
"]\n", | |
"find_full_info(network, input_size=66);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment