Last active
November 5, 2019 02:32
-
-
Save yusukemihara/49f02c2c901861a9a83c12b8d70164c0 to your computer and use it in GitHub Desktop.
OpenCOBOL performance memo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| --- u14.log 2019-10-29 11:24:19.253258539 +0900 | |
| +++ u18.log 2019-10-29 11:24:19.253258539 +0900 | |
| @@ -2,2 +2,2 @@ | |
| - -falign-functions 1 | |
| - -falign-jumps 1 | |
| + -falign-functions 0 | |
| + -falign-jumps 0 | |
| @@ -5 +5,2 @@ | |
| - -falign-loops 1 | |
| + -falign-loops 0 | |
| + -fassociative-math 0 | |
| @@ -6,0 +8 @@ | |
| + -fauto-inc-dec 1 | |
| @@ -12,0 +15 @@ | |
| + -fcode-hoisting 1 | |
| @@ -14 +16,0 @@ | |
| - -fcommon 1 | |
| @@ -22 +23,0 @@ | |
| - -fdata-sections 0 | |
| @@ -25,0 +27 @@ | |
| + -fdelete-dead-exceptions 0 | |
| @@ -27,0 +30 @@ | |
| + -fdevirtualize-speculatively 1 | |
| @@ -34,0 +38,2 @@ | |
| + -ffp-int-builtin-inexact 1 | |
| + -ffunction-cse 1 | |
| @@ -39,0 +45 @@ | |
| + -fgraphite 0 | |
| @@ -44,0 +51 @@ | |
| + -findirect-inlining 1 | |
| @@ -49,0 +57 @@ | |
| + -fipa-bit-cp 1 | |
| @@ -51,0 +60,3 @@ | |
| + -fipa-icf 1 | |
| + -fipa-icf-functions 1 | |
| + -fipa-icf-variables 1 | |
| @@ -54,0 +66 @@ | |
| + -fipa-ra 1 | |
| @@ -56,0 +69 @@ | |
| + -fipa-vrp 1 | |
| @@ -57,0 +71,5 @@ | |
| + -fira-loop-pressure 0 | |
| + -fira-share-save-slots 1 | |
| + -fira-share-spill-slots 1 | |
| + -fisolate-erroneous-paths-attribute 0 | |
| + -fisolate-erroneous-paths-dereference 1 | |
| @@ -60,2 +78,4 @@ | |
| - -floop-block 0 | |
| - -floop-interchange 0 | |
| + -fkeep-gc-roots-live 0 | |
| + -flifetime-dse 1 | |
| + -flimit-function-alignment 0 | |
| + -flive-range-shrinkage 0 | |
| @@ -64 +84 @@ | |
| - -floop-strip-mine 0 | |
| + -flra-remat 1 | |
| @@ -66,2 +85,0 @@ | |
| - -fmerge-all-constants 0 | |
| - -fmerge-constants 1 | |
| @@ -68,0 +87 @@ | |
| + -fmodulo-sched-allow-regmoves 0 | |
| @@ -73 +92 @@ | |
| - -fomit-frame-pointer 0 | |
| + -fomit-frame-pointer 1 | |
| @@ -75 +93,0 @@ | |
| - -foptimize-register-move 1 | |
| @@ -78,0 +97 @@ | |
| + -fpartial-inlining 1 | |
| @@ -81,0 +101 @@ | |
| + -fplt 1 | |
| @@ -84,2 +104,3 @@ | |
| - -freg-struct-return 0 | |
| - -fregmove 1 | |
| + -fprintf-return-value 1 | |
| + -freciprocal-math 0 | |
| + -freg-struct-return 1 | |
| @@ -88 +109 @@ | |
| - -freorder-blocks-and-partition 0 | |
| + -freorder-blocks-and-partition 1 | |
| @@ -107,0 +129 @@ | |
| + -fschedule-fusion 1 | |
| @@ -116 +137,0 @@ | |
| - -fshort-double 0 | |
| @@ -119,0 +141 @@ | |
| + -fshrink-wrap-separate 1 | |
| @@ -123,0 +146,2 @@ | |
| + -fsplit-loops 0 | |
| + -fsplit-paths 0 | |
| @@ -124,0 +149,8 @@ | |
| + -fssa-backprop 1 | |
| + -fssa-phiopt 1 | |
| + -fstack-protector 0 | |
| + -fstack-protector-all 0 | |
| + -fstack-protector-explicit 0 | |
| + -fstack-protector-strong 0 | |
| + -fstdarg-opt 1 | |
| + -fstore-merging 1 | |
| @@ -126,0 +159,2 @@ | |
| + -fstrict-overflow 1 | |
| + -fstrict-volatile-bitfields 1 | |
| @@ -128 +162 @@ | |
| - -ftoplevel-reorder 1 | |
| + -ftracer 0 | |
| @@ -135 +168,0 @@ | |
| - -ftree-coalesce-inlined-vars 0 | |
| @@ -138 +170,0 @@ | |
| - -ftree-copyrename 1 | |
| @@ -148 +179,0 @@ | |
| - -ftree-loop-if-convert-stores 0 | |
| @@ -151,0 +183 @@ | |
| + -ftree-loop-vectorize 0 | |
| @@ -160 +192 @@ | |
| - -ftree-slp-vectorize 1 | |
| + -ftree-slp-vectorize 0 | |
| @@ -166 +197,0 @@ | |
| - -ftree-vect-loop-version 1 | |
| @@ -169 +200 @@ | |
| - -funit-at-a-time 1 | |
| + -funconstrained-commons 0 | |
| @@ -172 +202,0 @@ | |
| - -funsafe-loop-optimizations 0 | |
| @@ -175 +205 @@ | |
| - -funwind-tables 0 | |
| + -funwind-tables 1 | |
| @@ -181 +210,0 @@ | |
| - -fvect-cost-model 0 | |
| @@ -184 +212,0 @@ | |
| - -fwhole-program 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # 説明 | |
| 1. プロファイラを利用してテストで利用したCOBOLコードでどの関数で時間がかかっているか調査した | |
| 2. cob_add_int, cob_cmp_long_numdisp の2関数で90%近く処理時間がかかっているとわかった | |
| 3. それぞれ単独で実行するテストを作成し、OS別に処理時間を測定したところ以下のことがわかった | |
| * cob_add_int は Ubuntu 18.04、14.04で3%の速度差しかない | |
| * cob_cmp_long_numdisp は最大で30%の速度差があった | |
| 4. cob_cmp_long_numdispの実装を調べたが外部ライブラリは使用していない | |
| * libcob.soを入れ替えて測定してみる | |
| * OpenCOBOLパッケージの最適化関連のビルドオプションを調べる(gccのバージョンによる最適化の差異がないか調べる) | |
| # prev test | |
| chef-server | |
| Total: 588 samples | |
| 262 44.6% 44.6% 293 49.8% cob_add_int | |
| 255 43.4% 87.9% 255 43.4% cob_cmp_long_numdisp | |
| 31 5.3% 93.2% 31 5.3% __nss_hosts_lookup | |
| 22 3.7% 96.9% 22 3.7% _init@4009e0 | |
| 17 2.9% 99.8% 565 96.1% TEST1_ | |
| 1 0.2% 100.0% 1 0.2% _init@9560 | |
| 0 0.0% 100.0% 22 3.7% 0x00007f259b3c925f | |
| 0 0.0% 100.0% 1 0.2% 0x00007ffee7ad1f0f | |
| 0 0.0% 100.0% 587 99.8% TEST1 | |
| 0 0.0% 100.0% 587 99.8% __libc_start_main | |
| ap-proxy-bionic2-stg | |
| Total: 759 samples | |
| 317 41.8% 41.8% 404 53.2% cob_add_int | |
| 316 41.6% 83.4% 316 41.6% cob_cmp_long_numdisp | |
| 73 9.6% 93.0% 73 9.6% __nss_passwd_lookup | |
| 23 3.0% 96.0% 23 3.0% _init@b28 | |
| 16 2.1% 98.2% 759 100.0% TEST1_ | |
| 14 1.8% 100.0% 14 1.8% _init@94f8 | |
| 0 0.0% 100.0% 759 100.0% TEST1 | |
| 0 0.0% 100.0% 759 100.0% __libc_start_main | |
| 0 0.0% 100.0% 759 100.0% _start | |
| 0 0.0% 100.0% 759 100.0% main | |
| # cob_add_int | |
| ## code | |
| /* TEST1.CBL:21: PERFORM */ | |
| { | |
| int i,j,k; | |
| memcpy (b_5, "0000000001", 10); | |
| for(i=0;i<1000;i++) { | |
| for(j=0;j<1000;j++) { | |
| for(k=0;k<1000;k++) { | |
| cob_add_int (&f_5, 1); | |
| } | |
| } | |
| } | |
| } | |
| ## result | |
| % /usr/bin/time ./TEST1 | |
| num, chef-server, ap-proxy-bionic2-stg (user sec) | |
| 1, 14.612, 14.855 | |
| 2, 14.352, 14.827 | |
| 3, 14.384, 14.721 | |
| 14.827 / 14.352 = 1.033 3.3% | |
| # cob_cmp | |
| ## code | |
| /* TEST1.CBL:21: PERFORM */ | |
| { | |
| int i,j,k; | |
| memcpy (b_5, "0000000001", 10); | |
| for(i=0;i<1000;i++) { | |
| for(j=0;j<1000;j++) { | |
| for(k=0;k<1000;k++) { | |
| cob_cmp_long_numdisp (b_5, 10, i+j+k); | |
| } | |
| } | |
| } | |
| } | |
| ## result | |
| % /usr/bin/time ./TEST1 | |
| num, chef-server, ap-proxy-bionic2-stg (user sec) | |
| 1, 11.416, 14.661 | |
| 2, 11.364, 14.834 | |
| 3, 11.628, 14.605 | |
| 14.834 / 11.364 = 1.3055 30.6% | |
| ## cob_cmp_long_numdisp source | |
| int cob_cmp_long_numdisp ( const unsigned char * data, | |
| const size_t size, | |
| const int n | |
| ) | |
| { | |
| const unsigned char *p; | |
| long long val = 0; | |
| size_t inc; | |
| p = data; | |
| for (inc = 0; inc < size; inc++, p++) { | |
| val = (val * 10) + (*p - (unsigned char)'0'); | |
| } | |
| return (val < n) ? -1 : (val > n); | |
| } | |
| # cob_cmp_long_numdispのテストでそれぞれの環境でlibcob.so.1.0.0をLD_PRELOADして違いがないか確認 | |
| ## ubuntu 18.04 (ap-proxy-bionic2-stg) | |
| oruser@ap-proxy-bionic2-stg:~/mihara/cobtest-20191021/05-cmp$ time ./TEST1 | |
| real 0m14.248s | |
| user 0m14.224s | |
| sys 0m0.004s | |
| oruser@ap-proxy-bionic2-stg:~/mihara/cobtest-20191021/05-cmp$ LD_PRELOAD=./libcob.so.1.0.0 time ./TEST1 | |
| 10.80user 0.00system 0:10.83elapsed 99%CPU (0avgtext+0avgdata 3460maxresident)k | |
| 0inputs+0outputs (0major+146minor)pagefaults 0swaps | |
| oruser@ap-proxy-bionic2-stg:~/mihara/cobtest-20191021/05-cmp$ md5sum libcob.so.1.0.0 | |
| 80d36e6db53e83f4f3c1f0850bf6008a libcob.so.1.0.0 | |
| ### result | |
| num, 14.04, 18.04 (user sec) | |
| 1, 10.640, 14.224 | |
| 2, 10.820, 14.230 | |
| 3, 10.800, 14.405 | |
| ## ubuntu 14.04 (chef-server) | |
| oruser@chef-server:~/mihara/cobtest-20191021/05-cmp$ time ./TEST1 | |
| real 0m11.356s | |
| user 0m10.724s | |
| sys 0m0.000s | |
| oruser@chef-server:~/mihara/cobtest-20191021/05-cmp$ LD_PRELOAD=./libcob.so.1.0.0 time ./TEST1 | |
| time: /lib/x86_64-linux-gnu/libncurses.so.5: no version information available (required by ./libcob.so.1.0.0) | |
| time: /lib/x86_64-linux-gnu/libtinfo.so.5: no version information available (required by ./libcob.so.1.0.0) | |
| ./TEST1: /lib/x86_64-linux-gnu/libncurses.so.5: no version information available (required by ./libcob.so.1.0.0) | |
| ./TEST1: /lib/x86_64-linux-gnu/libtinfo.so.5: no version information available (required by ./libcob.so.1.0.0) | |
| 14.52user 0.00system 0:15.23elapsed 95%CPU (0avgtext+0avgdata 3240maxresident)k | |
| 0inputs+0outputs (0major+154minor)pagefaults 0swaps | |
| ### result | |
| num, 14.04, 18.04 (user sec) | |
| 1, 10.724, 14.520 | |
| 2, 10.864, 14.290 | |
| 3, 10.970, 14.210 | |
| # open-cobolパッケージビルド時のgccオプション比較 | |
| * 14.04,18.04いずれもgcc -O2 -fsigned-char -Wall -Wwrite-strings -Wmissing-prototypes -Wno-format-y2k でコンパイルしている | |
| * -O0を指定した上で14.04のコンパイルオプションを適用して18.04でパッケージビルドするとOpen-COBOLのmake時の単体テストで失敗する(どのオプションかは未特定) | |
| * -O3でビルドしてもパフォーマンス改善しなかった | |
| * どのオプションが影響しているか地道に調べるしかなさそう。。。 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment