Skip to content

Instantly share code, notes, and snippets.

@yusukemihara
Last active November 5, 2019 02:32
Show Gist options
  • Select an option

  • Save yusukemihara/49f02c2c901861a9a83c12b8d70164c0 to your computer and use it in GitHub Desktop.

Select an option

Save yusukemihara/49f02c2c901861a9a83c12b8d70164c0 to your computer and use it in GitHub Desktop.
OpenCOBOL performance memo
--- u14.log 2019-10-29 11:24:19.253258539 +0900
+++ u18.log 2019-10-29 11:24:19.253258539 +0900
@@ -2,2 +2,2 @@
- -falign-functions 1
- -falign-jumps 1
+ -falign-functions 0
+ -falign-jumps 0
@@ -5 +5,2 @@
- -falign-loops 1
+ -falign-loops 0
+ -fassociative-math 0
@@ -6,0 +8 @@
+ -fauto-inc-dec 1
@@ -12,0 +15 @@
+ -fcode-hoisting 1
@@ -14 +16,0 @@
- -fcommon 1
@@ -22 +23,0 @@
- -fdata-sections 0
@@ -25,0 +27 @@
+ -fdelete-dead-exceptions 0
@@ -27,0 +30 @@
+ -fdevirtualize-speculatively 1
@@ -34,0 +38,2 @@
+ -ffp-int-builtin-inexact 1
+ -ffunction-cse 1
@@ -39,0 +45 @@
+ -fgraphite 0
@@ -44,0 +51 @@
+ -findirect-inlining 1
@@ -49,0 +57 @@
+ -fipa-bit-cp 1
@@ -51,0 +60,3 @@
+ -fipa-icf 1
+ -fipa-icf-functions 1
+ -fipa-icf-variables 1
@@ -54,0 +66 @@
+ -fipa-ra 1
@@ -56,0 +69 @@
+ -fipa-vrp 1
@@ -57,0 +71,5 @@
+ -fira-loop-pressure 0
+ -fira-share-save-slots 1
+ -fira-share-spill-slots 1
+ -fisolate-erroneous-paths-attribute 0
+ -fisolate-erroneous-paths-dereference 1
@@ -60,2 +78,4 @@
- -floop-block 0
- -floop-interchange 0
+ -fkeep-gc-roots-live 0
+ -flifetime-dse 1
+ -flimit-function-alignment 0
+ -flive-range-shrinkage 0
@@ -64 +84 @@
- -floop-strip-mine 0
+ -flra-remat 1
@@ -66,2 +85,0 @@
- -fmerge-all-constants 0
- -fmerge-constants 1
@@ -68,0 +87 @@
+ -fmodulo-sched-allow-regmoves 0
@@ -73 +92 @@
- -fomit-frame-pointer 0
+ -fomit-frame-pointer 1
@@ -75 +93,0 @@
- -foptimize-register-move 1
@@ -78,0 +97 @@
+ -fpartial-inlining 1
@@ -81,0 +101 @@
+ -fplt 1
@@ -84,2 +104,3 @@
- -freg-struct-return 0
- -fregmove 1
+ -fprintf-return-value 1
+ -freciprocal-math 0
+ -freg-struct-return 1
@@ -88 +109 @@
- -freorder-blocks-and-partition 0
+ -freorder-blocks-and-partition 1
@@ -107,0 +129 @@
+ -fschedule-fusion 1
@@ -116 +137,0 @@
- -fshort-double 0
@@ -119,0 +141 @@
+ -fshrink-wrap-separate 1
@@ -123,0 +146,2 @@
+ -fsplit-loops 0
+ -fsplit-paths 0
@@ -124,0 +149,8 @@
+ -fssa-backprop 1
+ -fssa-phiopt 1
+ -fstack-protector 0
+ -fstack-protector-all 0
+ -fstack-protector-explicit 0
+ -fstack-protector-strong 0
+ -fstdarg-opt 1
+ -fstore-merging 1
@@ -126,0 +159,2 @@
+ -fstrict-overflow 1
+ -fstrict-volatile-bitfields 1
@@ -128 +162 @@
- -ftoplevel-reorder 1
+ -ftracer 0
@@ -135 +168,0 @@
- -ftree-coalesce-inlined-vars 0
@@ -138 +170,0 @@
- -ftree-copyrename 1
@@ -148 +179,0 @@
- -ftree-loop-if-convert-stores 0
@@ -151,0 +183 @@
+ -ftree-loop-vectorize 0
@@ -160 +192 @@
- -ftree-slp-vectorize 1
+ -ftree-slp-vectorize 0
@@ -166 +197,0 @@
- -ftree-vect-loop-version 1
@@ -169 +200 @@
- -funit-at-a-time 1
+ -funconstrained-commons 0
@@ -172 +202,0 @@
- -funsafe-loop-optimizations 0
@@ -175 +205 @@
- -funwind-tables 0
+ -funwind-tables 1
@@ -181 +210,0 @@
- -fvect-cost-model 0
@@ -184 +212,0 @@
- -fwhole-program 0
# 説明
1. プロファイラを利用してテストで利用したCOBOLコードでどの関数で時間がかかっているか調査した
2. cob_add_int, cob_cmp_long_numdisp の2関数で90%近く処理時間がかかっているとわかった
3. それぞれ単独で実行するテストを作成し、OS別に処理時間を測定したところ以下のことがわかった
* cob_add_int は Ubuntu 18.04、14.04で3%の速度差しかない
* cob_cmp_long_numdisp は最大で30%の速度差があった
4. cob_cmp_long_numdispの実装を調べたが外部ライブラリは使用していない
* libcob.soを入れ替えて測定してみる
* OpenCOBOLパッケージの最適化関連のビルドオプションを調べる(gccのバージョンによる最適化の差異がないか調べる)
# prev test
chef-server
Total: 588 samples
262 44.6% 44.6% 293 49.8% cob_add_int
255 43.4% 87.9% 255 43.4% cob_cmp_long_numdisp
31 5.3% 93.2% 31 5.3% __nss_hosts_lookup
22 3.7% 96.9% 22 3.7% _init@4009e0
17 2.9% 99.8% 565 96.1% TEST1_
1 0.2% 100.0% 1 0.2% _init@9560
0 0.0% 100.0% 22 3.7% 0x00007f259b3c925f
0 0.0% 100.0% 1 0.2% 0x00007ffee7ad1f0f
0 0.0% 100.0% 587 99.8% TEST1
0 0.0% 100.0% 587 99.8% __libc_start_main
ap-proxy-bionic2-stg
Total: 759 samples
317 41.8% 41.8% 404 53.2% cob_add_int
316 41.6% 83.4% 316 41.6% cob_cmp_long_numdisp
73 9.6% 93.0% 73 9.6% __nss_passwd_lookup
23 3.0% 96.0% 23 3.0% _init@b28
16 2.1% 98.2% 759 100.0% TEST1_
14 1.8% 100.0% 14 1.8% _init@94f8
0 0.0% 100.0% 759 100.0% TEST1
0 0.0% 100.0% 759 100.0% __libc_start_main
0 0.0% 100.0% 759 100.0% _start
0 0.0% 100.0% 759 100.0% main
# cob_add_int
## code
/* TEST1.CBL:21: PERFORM */
{
int i,j,k;
memcpy (b_5, "0000000001", 10);
for(i=0;i<1000;i++) {
for(j=0;j<1000;j++) {
for(k=0;k<1000;k++) {
cob_add_int (&f_5, 1);
}
}
}
}
## result
% /usr/bin/time ./TEST1
num, chef-server, ap-proxy-bionic2-stg (user sec)
1, 14.612, 14.855
2, 14.352, 14.827
3, 14.384, 14.721
14.827 / 14.352 = 1.033 3.3%
# cob_cmp
## code
/* TEST1.CBL:21: PERFORM */
{
int i,j,k;
memcpy (b_5, "0000000001", 10);
for(i=0;i<1000;i++) {
for(j=0;j<1000;j++) {
for(k=0;k<1000;k++) {
cob_cmp_long_numdisp (b_5, 10, i+j+k);
}
}
}
}
## result
% /usr/bin/time ./TEST1
num, chef-server, ap-proxy-bionic2-stg (user sec)
1, 11.416, 14.661
2, 11.364, 14.834
3, 11.628, 14.605
14.834 / 11.364 = 1.3055 30.6%
## cob_cmp_long_numdisp source
int cob_cmp_long_numdisp ( const unsigned char * data,
const size_t size,
const int n
)
{
const unsigned char *p;
long long val = 0;
size_t inc;
p = data;
for (inc = 0; inc < size; inc++, p++) {
val = (val * 10) + (*p - (unsigned char)'0');
}
return (val < n) ? -1 : (val > n);
}
# cob_cmp_long_numdispのテストでそれぞれの環境でlibcob.so.1.0.0をLD_PRELOADして違いがないか確認
## ubuntu 18.04 (ap-proxy-bionic2-stg)
oruser@ap-proxy-bionic2-stg:~/mihara/cobtest-20191021/05-cmp$ time ./TEST1
real 0m14.248s
user 0m14.224s
sys 0m0.004s
oruser@ap-proxy-bionic2-stg:~/mihara/cobtest-20191021/05-cmp$ LD_PRELOAD=./libcob.so.1.0.0 time ./TEST1
10.80user 0.00system 0:10.83elapsed 99%CPU (0avgtext+0avgdata 3460maxresident)k
0inputs+0outputs (0major+146minor)pagefaults 0swaps
oruser@ap-proxy-bionic2-stg:~/mihara/cobtest-20191021/05-cmp$ md5sum libcob.so.1.0.0
80d36e6db53e83f4f3c1f0850bf6008a libcob.so.1.0.0
### result
num, 14.04, 18.04 (user sec)
1, 10.640, 14.224
2, 10.820, 14.230
3, 10.800, 14.405
## ubuntu 14.04 (chef-server)
oruser@chef-server:~/mihara/cobtest-20191021/05-cmp$ time ./TEST1
real 0m11.356s
user 0m10.724s
sys 0m0.000s
oruser@chef-server:~/mihara/cobtest-20191021/05-cmp$ LD_PRELOAD=./libcob.so.1.0.0 time ./TEST1
time: /lib/x86_64-linux-gnu/libncurses.so.5: no version information available (required by ./libcob.so.1.0.0)
time: /lib/x86_64-linux-gnu/libtinfo.so.5: no version information available (required by ./libcob.so.1.0.0)
./TEST1: /lib/x86_64-linux-gnu/libncurses.so.5: no version information available (required by ./libcob.so.1.0.0)
./TEST1: /lib/x86_64-linux-gnu/libtinfo.so.5: no version information available (required by ./libcob.so.1.0.0)
14.52user 0.00system 0:15.23elapsed 95%CPU (0avgtext+0avgdata 3240maxresident)k
0inputs+0outputs (0major+154minor)pagefaults 0swaps
### result
num, 14.04, 18.04 (user sec)
1, 10.724, 14.520
2, 10.864, 14.290
3, 10.970, 14.210
# open-cobolパッケージビルド時のgccオプション比較
* 14.04,18.04いずれもgcc -O2 -fsigned-char -Wall -Wwrite-strings -Wmissing-prototypes -Wno-format-y2k でコンパイルしている
* -O0を指定した上で14.04のコンパイルオプションを適用して18.04でパッケージビルドするとOpen-COBOLのmake時の単体テストで失敗する(どのオプションかは未特定)
* -O3でビルドしてもパフォーマンス改善しなかった
* どのオプションが影響しているか地道に調べるしかなさそう。。。
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment