Skip to content

Instantly share code, notes, and snippets.

@davidberard98
Last active May 3, 2022 18:37
Show Gist options
  • Save davidberard98/b55d9ec36f47c4ad04fc63b1d18c9fae to your computer and use it in GitHub Desktop.
Save davidberard98/b55d9ec36f47c4ad04fc63b1d18c9fae to your computer and use it in GitHub Desktop.
convolution measurements - A100, channels_last
N tdim wdim ch is_channels_last ms
16 100 3 3 1 18.43367154942825
16 100 3 3 0 18.246989184990525
16 100 3 7 1 18.320465798024088
16 100 3 7 0 18.22661985643208
16 100 3 8 1 20.038793003186584
16 100 3 8 0 18.317282549105585
16 100 5 3 1 18.30171929905191
16 100 5 3 0 18.09040275402367
16 100 5 7 1 29.462874494493008
16 100 5 7 0 25.831757998093963
16 100 5 8 1 51.52064310386777
16 100 5 8 0 31.876941304653883
16 100 7 3 1 18.537260207813233
16 100 7 3 0 18.110481044277552
16 100 7 7 1 53.05891130119562
16 100 7 7 0 47.530143009498715
16 100 7 8 1 99.12664885632694
16 100 7 8 0 59.04654811602086
16 100 9 3 1 18.69109240360558
16 100 9 3 0 18.230828805826604
16 100 9 7 1 81.12724008969963
16 100 9 7 0 67.67505290918052
16 100 9 8 1 160.0374965928495
16 100 9 8 0 83.96956697106361
16 100 11 3 1 24.356922297738492
16 100 11 3 0 20.960539998486638
16 100 11 7 1 118.52847051341087
16 100 11 7 0 101.30427253898233
16 100 11 8 1 236.2886720802635
16 100 11 8 0 126.48218043614179
16 244 3 3 1 21.351740299724042
16 244 3 3 0 18.690762598998845
16 244 3 7 1 45.8222656045109
16 244 3 7 0 78.0768459662795
16 244 3 8 1 53.245655214414
16 244 3 8 0 99.66220404021442
16 244 5 3 1 45.01768241170794
16 244 5 3 0 27.741778106428683
16 244 5 7 1 101.61644197069108
16 244 5 7 0 144.5265645161271
16 244 5 8 1 120.28961698524654
16 244 5 8 0 185.34037203062326
16 244 7 3 1 80.84342093206942
16 244 7 3 0 49.77555158548057
16 244 7 7 1 185.42232853360474
16 244 7 7 0 264.8957180790603
16 244 7 8 1 220.69369396194816
16 244 7 8 0 341.45430591888726
16 244 9 3 1 128.54479893576354
16 244 9 3 0 70.11849293485284
16 244 9 7 1 296.58175096847117
16 244 9 7 0 374.50749101117253
16 244 9 8 1 353.35517884232104
16 244 9 8 0 483.67772600613534
16 244 11 3 1 188.00333852414042
16 244 11 3 0 104.70633255317807
16 244 11 7 1 435.82991999574006
16 244 11 7 0 563.7340219691396
16 244 11 8 1 520.4557671677321
16 244 11 8 0 728.8549048826098
16 512 3 3 1 102.50197246205062
16 512 3 3 0 78.18600907921791
16 512 3 7 1 201.3016720302403
16 512 3 7 0 342.28865802288055
16 512 3 8 1 231.97088111191988
16 512 3 8 0 437.2869550716132
16 512 5 3 1 206.11139107495546
16 512 5 3 0 130.80083951354027
16 512 5 7 1 445.2961441129446
16 512 5 7 0 630.8648940175772
16 512 5 8 1 525.1300630625337
16 512 5 8 0 814.3553459085524
16 512 7 3 1 362.5257189851254
16 512 7 3 0 227.82374895177782
16 512 7 7 1 811.7622800637037
16 512 7 7 0 1157.9886570107192
16 512 7 8 1 967.0884439256042
16 512 7 8 0 1505.2870861254632
16 512 9 3 1 571.1262919940054
16 512 9 3 0 315.63887116499245
16 512 9 7 1 1300.1056250650436
16 512 9 7 0 1636.7696318775415
16 512 9 8 1 1544.389443937689
16 512 9 8 0 2127.304224995896
16 512 11 3 1 832.3811821173877
16 512 11 3 0 467.4088421743363
16 512 11 7 1 1910.3905488736928
16 512 11 7 0 2466.9426139444113
16 512 11 8 1 2278.6464418750256
16 512 11 8 0 3209.5337680075318
N tdim wdim ch is_channels_last ms
16 100 3 3 1 18.617491715122018
16 100 3 3 0 18.78675235202536
16 100 3 7 1 19.152427499648184
16 100 3 7 0 18.516116950195283
16 100 3 8 1 20.04905662033707
16 100 3 8 0 18.60648614820093
16 100 5 3 1 19.169622543267906
16 100 5 3 0 18.820264446549118
16 100 5 7 1 29.462046385742724
16 100 5 7 0 25.832909997552633
16 100 5 8 1 51.55018719378859
16 100 5 8 0 31.883728713728487
16 100 7 3 1 19.15375804528594
16 100 7 3 0 19.243654911406338
16 100 7 7 1 53.01895779557526
16 100 7 7 0 47.49531680718064
16 100 7 8 1 98.84221595712006
16 100 7 8 0 58.97610879037529
16 100 9 3 1 19.0016643027775
16 100 9 3 0 18.49844044772908
16 100 9 7 1 81.02516806684434
16 100 9 7 0 67.36795790493488
16 100 9 8 1 160.0180856185034
16 100 9 8 0 83.91591906547546
16 100 11 3 1 24.340565013699234
16 100 11 3 0 20.957220904529095
16 100 11 7 1 118.49813652224839
16 100 11 7 0 101.25651210546494
16 100 11 8 1 236.2594180740416
16 100 11 8 0 126.45251443609595
16 244 3 3 1 21.335488883778453
16 244 3 3 0 18.89237290015444
16 244 3 7 1 45.82061788532883
16 244 3 7 0 78.01525900140405
16 244 3 8 1 53.23789780959487
16 244 3 8 0 99.63596891611814
16 244 5 3 1 45.00673818401992
16 244 5 3 0 27.740272181108594
16 244 5 7 1 101.5442309435457
16 244 5 7 0 144.46621004026383
16 244 5 8 1 120.23189302999526
16 244 5 8 0 185.31600560527295
16 244 7 3 1 80.83452610298991
16 244 7 3 0 49.78251978754997
16 244 7 7 1 185.226792935282
16 244 7 7 0 264.93781502358615
16 244 7 8 1 220.7418200559914
16 244 7 8 0 341.69769706204534
16 244 9 3 1 128.59959551133215
16 244 9 3 0 70.15452487394214
16 244 9 7 1 296.9771730713546
16 244 9 7 0 374.7915520798415
16 244 9 8 1 353.53076411411166
16 244 9 8 0 483.90447115525603
16 244 11 3 1 188.1598614854738
16 244 11 3 0 104.75643642712384
16 244 11 7 1 436.16899801418185
16 244 11 7 0 564.1035118605942
16 244 11 8 1 520.6638311501592
16 244 11 8 0 729.0159750264138
16 512 3 3 1 102.5122330756858
16 512 3 3 0 78.164700884372
16 512 3 7 1 201.3238740619272
16 512 3 7 0 342.31415297836065
16 512 3 8 1 231.97667696513236
16 512 3 8 0 437.3319619335234
16 512 5 3 1 206.09992602840066
16 512 5 3 0 130.80425304360688
16 512 5 7 1 445.32433804124594
16 512 5 7 0 631.1094518750906
16 512 5 8 1 525.3238540608436
16 512 5 8 0 814.3894399981946
16 512 7 3 1 362.3560150153935
16 512 7 3 0 227.8254448901862
16 512 7 7 1 814.5140670239925
16 512 7 7 0 1158.3332540467381
16 512 7 8 1 965.2099870145321
16 512 7 8 0 1503.144463058561
16 512 9 3 1 571.2393971625715
16 512 9 3 0 315.47448807395995
16 512 9 7 1 1300.385845825076
16 512 9 7 0 1636.8507500737906
16 512 9 8 1 1545.9112760145217
16 512 9 8 0 2127.843477996066
16 512 11 3 1 832.3594650719315
16 512 11 3 0 467.1126550529152
16 512 11 7 1 1911.026865011081
16 512 11 7 0 2464.282466098666
16 512 11 8 1 2277.7668449562043
16 512 11 8 0 3211.3462639972568
N tdim wdim ch is_channels_last ms
16 100 3 3 1 17.98505939077586
16 100 3 3 0 17.783134942874312
16 100 3 7 1 18.125824455637485
16 100 3 7 0 18.00207723863423
16 100 3 8 1 20.031808386556804
16 100 3 8 0 17.9782064515166
16 100 5 3 1 17.987724544946104
16 100 5 3 0 17.954158294014633
16 100 5 7 1 29.481642902828753
16 100 5 7 0 25.864325487054884
16 100 5 8 1 51.561471400782466
16 100 5 8 0 31.903302296996117
16 100 7 3 1 18.324633350130167
16 100 7 3 0 17.88231561658904
16 100 7 7 1 53.0493127880618
16 100 7 7 0 47.548712603747845
16 100 7 8 1 98.93104806542397
16 100 7 8 0 59.03653670102358
16 100 9 3 1 18.25640833703801
16 100 9 3 0 18.012333416845646
16 100 9 7 1 81.1137561686337
16 100 9 7 0 67.46744713746011
16 100 9 8 1 160.08370905183256
16 100 9 8 0 83.99839396588504
16 100 11 3 1 24.344576080329716
16 100 11 3 0 20.973224588669837
16 100 11 7 1 118.53211000561714
16 100 11 7 0 101.33329965174198
16 100 11 8 1 236.33953696116805
16 100 11 8 0 126.53844512533396
16 244 3 3 1 21.352207893505692
16 244 3 3 0 18.491640803404152
16 244 3 7 1 45.84740300197154
16 244 3 7 0 78.09652201831341
16 244 3 8 1 53.2672727946192
16 244 3 8 0 99.70052912831306
16 244 5 3 1 45.04391020163894
16 244 5 3 0 27.769387210719287
16 244 5 7 1 101.69024544302374
16 244 5 7 0 144.58980155177414
16 244 5 8 1 120.33829209394753
16 244 5 8 0 185.4973214212805
16 244 7 3 1 80.91538213193417
16 244 7 3 0 49.828163301572204
16 244 7 7 1 185.3654155274853
16 244 7 7 0 265.0411769282073
16 244 7 8 1 220.8123339805752
16 244 7 8 0 341.7605140712112
16 244 9 3 1 128.62315052188933
16 244 9 3 0 70.326424902305
16 244 9 7 1 296.890138881281
16 244 9 7 0 375.1403170172125
16 244 9 8 1 353.51713793352246
16 244 9 8 0 484.00816414505243
16 244 11 3 1 188.15834308043122
16 244 11 3 0 104.76912185549736
16 244 11 7 1 436.18130101822317
16 244 11 7 0 564.080409007147
16 244 11 8 1 520.6471499986947
16 244 11 8 0 729.3332149274647
16 512 3 3 1 102.52439265605062
16 512 3 3 0 78.38175096549094
16 512 3 7 1 201.73748093657196
16 512 3 7 0 342.7497821394354
16 512 3 8 1 232.08913300186396
16 512 3 8 0 437.38882592879236
16 512 5 3 1 206.16268599405885
16 512 5 3 0 130.8596400776878
16 512 5 7 1 445.52418286912143
16 512 5 7 0 631.3505121506751
16 512 5 8 1 525.5342728924006
16 512 5 8 0 814.5755738951266
16 512 7 3 1 362.6846210099757
16 512 7 3 0 228.12796500511467
16 512 7 7 1 812.5514201819897
16 512 7 7 0 1158.9657519944012
16 512 7 8 1 965.9182881005108
16 512 7 8 0 1504.0805558674037
16 512 9 3 1 571.675136918202
16 512 9 3 0 316.26641703769565
16 512 9 7 1 1299.9964321497828
16 512 9 7 0 1636.111926054582
16 512 9 8 1 1548.0412761680782
16 512 9 8 0 2131.553997984156
16 512 11 3 1 833.4145059343427
16 512 11 3 0 468.1004530284554
16 512 11 7 1 1914.6338149439543
16 512 11 7 0 2467.0993939507753
16 512 11 8 1 2281.0571540612727
16 512 11 8 0 3211.363078095019
import torch
from torch.utils.benchmark import Timer
def conv_add_relu_x100(t, w, pad: int):
for i in range(100):
conv_out = torch.nn.functional.conv2d(t, w, bias = None, stride = 1, padding = pad)
add_out = conv_out.add_(t)
relu_out = torch.relu_(add_out)
t = relu_out
return t
def run_test(t, w, pad, *args, **kwargs):
t = t.cuda()
w = w.cuda()
script = torch.jit.script(conv_add_relu_x100)
for i in range(20):
res = script(t, w, pad)
t = Timer(stmt="fn(*inputs)", globals={"fn": script, "inputs": [t, w, pad]})
times = t.blocked_autorange()
return times.median * 1000
def main():
configs = []
print("N,tdim,wdim,ch,is_channels_last,ms")
N = 16
for tdim in [100, 244, 512]:
for wdim in [3, 5, 7, 9, 11]:
for ch in [3, 7, 8]:
for mem_format in [1,2]:
t = torch.rand(N, ch, tdim, tdim).cuda()
w = torch.rand(ch, ch, wdim, wdim).cuda()
pad = wdim//2
if mem_format == 1:
t = t.to(memory_format=torch.channels_last)
w = w.to(memory_format=torch.channels_last)
if mem_format == 2:
t = t.to(memory_format=torch.contiguous_format)
w = w.to(memory_format=torch.contiguous_format)
result = run_test(t, w, pad)
print(','.join([str(x) for x in [N, tdim, wdim, ch, 1 if mem_format == 1 else 0, result]]))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment