Skip to content

Instantly share code, notes, and snippets.

@foxtran
Created January 15, 2023 15:28
Show Gist options
  • Save foxtran/e88500db6ee92c75ccf7ec9623ca4dce to your computer and use it in GitHub Desktop.
Save foxtran/e88500db6ee92c75ccf7ec9623ca4dce to your computer and use it in GitHub Desktop.

Benchmark was compiled using the following compiler:

GCC version 11.3.0

Benchmark was compiled with the following options:

-mabi=lp64d -mcpu=sifive-u74 -misa-spec=2.2 -march=rv64imafdc -O3 -Wall -Wextra -ffree-line-length-none -fpre-include=/usr/include/finclude/riscv64-linux-gnu/math-vector-fortran.h

Number of repeats is: 100000

Total size of one array is 4Kb

type test N mean time, ms sd time, ms min time, ms max time, ms
integer(1) add_v1 10 171.876 1.879 171.217 177.514
integer(1) add_v2 10 274.061 0.015 274.044 274.091
integer(1) add_v3 10 205.642 0.016 205.617 205.667
integer(1) mul_v1 10 1230.561 0.088 1230.432 1230.786
integer(1) mul_v2 10 1640.996 0.079 1640.919 1641.211
integer(1) mul_v3 10 1914.201 0.119 1914.030 1914.508
integer(1) fma_v1 10 1640.575 0.039 1640.505 1640.637
integer(1) fma_v2 10 1504.098 0.508 1503.850 1505.616
integer(1) fma_v3 10 1504.131 0.390 1503.867 1505.284
integer(1) fma_v4 10 1915.636 3.762 1914.318 1926.921
integer(1) div_v1 10 18196.291 4.374 18191.477 18205.041
integer(1) div_v2 10 4133.336 0.203 4133.166 4133.909
integer(1) inv_v1 10 18305.291 0.349 18305.045 18306.300
integer(1) inv_v2.1 10 1407.035 0.529 1406.338 1407.799
integer(1) inv_v2.2 10 6956.418 3.610 6951.885 6962.224
integer(1) inv_v2.3 10 1094.147 0.106 1094.078 1094.455
integer(1) popcnt 10 6561.848 0.130 6561.647 6562.169
integer(1) poppar 10 4921.592 0.181 4921.406 4922.092
integer(1) dim 10 2461.267 0.052 2461.168 2461.377
integer(1) iand 10 205.643 0.015 205.620 205.670
integer(1) ieor 10 205.689 0.068 205.605 205.781
integer(1) ior 10 205.721 0.062 205.639 205.800
integer(1) ishft 10 3573.592 0.125 3573.399 3573.833
integer(1) ishftc 10 4101.599 0.075 4101.518 4101.729
integer(1) ibset 10 1367.620 0.034 1367.544 1367.662
integer(1) ibclr 10 1914.350 0.071 1914.193 1914.432
integer(1) min 10 2274.390 0.379 2273.786 2275.092
integer(1) max 10 2280.774 0.256 2280.329 2281.242
integer(1) shifta 10 1367.716 0.294 1367.561 1368.594
integer(1) shiftl 10 1641.387 1.165 1640.906 1644.879
integer(1) shiftr 10 1914.414 0.397 1914.159 1915.586
integer(2) add_v1 10 154.120 0.021 154.092 154.155
integer(2) add_v2 10 240.003 0.153 239.938 240.460
integer(2) add_v3 10 205.735 0.089 205.682 206.000
integer(2) mul_v1 10 615.493 0.087 615.421 615.735
integer(2) mul_v2 10 820.824 0.029 820.773 820.863
integer(2) mul_v3 10 820.745 0.022 820.716 820.793
integer(2) fma_v1 10 820.522 0.298 820.365 821.412
integer(2) fma_v2 10 752.186 0.044 752.134 752.265
integer(2) fma_v3 10 752.230 0.031 752.182 752.292
integer(2) fma_v4 10 957.520 0.033 957.477 957.594
integer(2) div_v1 10 8135.335 0.570 8135.073 8137.025
integer(2) div_v2 10 2860.434 0.099 2860.299 2860.581
integer(2) inv_v1 10 8203.608 0.188 8203.400 8204.100
integer(2) inv_v2.1 10 958.286 0.023 958.247 958.323
integer(2) inv_v2.2 10 3192.113 0.052 3192.034 3192.205
integer(2) inv_v2.3 10 820.720 0.086 820.607 820.946
integer(2) popcnt 10 3281.267 0.057 3281.183 3281.347
integer(2) poppar 10 2461.066 0.063 2460.949 2461.155
integer(2) dim 10 1231.091 0.186 1230.948 1231.637
integer(2) iand 10 137.431 0.014 137.406 137.450
integer(2) ieor 10 137.428 0.010 137.414 137.448
integer(2) ior 10 137.429 0.012 137.413 137.453
integer(2) ishft 10 1777.767 0.024 1777.732 1777.806
integer(2) ishftc 10 2187.981 0.058 2187.913 2188.116
integer(2) ibset 10 684.146 0.043 684.106 684.262
integer(2) ibclr 10 957.550 0.037 957.484 957.612
integer(2) min 10 1095.466 0.085 1095.278 1095.538
integer(2) max 10 1095.651 0.097 1095.570 1095.895
integer(2) shifta 10 684.145 0.023 684.103 684.187
integer(2) shiftl 10 820.839 0.033 820.773 820.878
integer(2) shiftr 10 1094.190 0.042 1094.124 1094.260
integer(4) add_v1 10 171.240 0.015 171.221 171.270
integer(4) add_v2 10 342.410 0.021 342.392 342.467
integer(4) add_v3 10 273.994 0.017 273.961 274.019
integer(4) mul_v1 10 307.933 0.017 307.905 307.964
integer(4) mul_v2 10 410.817 0.026 410.763 410.857
integer(4) mul_v3 10 410.639 0.066 410.567 410.745
integer(4) fma_v1 10 342.056 0.018 342.023 342.080
integer(4) fma_v2 10 376.226 0.024 376.203 376.286
integer(4) fma_v3 10 376.283 0.025 376.243 376.329
integer(4) fma_v4 10 479.030 0.038 478.984 479.105
integer(4) div_v1 10 2944.163 0.042 2944.097 2944.261
integer(4) div_v2 10 2451.705 0.044 2451.635 2451.756
integer(4) inv_v1 10 3012.748 0.068 3012.612 3012.840
integer(4) inv_v2.1 10 410.997 0.019 410.972 411.033
integer(4) inv_v2.2 10 1506.171 0.290 1506.007 1507.030
integer(4) inv_v2.3 10 273.906 0.022 273.872 273.947
integer(4) popcnt 10 1641.015 0.055 1640.938 1641.143
integer(4) poppar 10 1230.856 0.063 1230.794 1231.016
integer(4) dim 10 485.220 0.034 485.146 485.275
integer(4) iand 10 137.431 0.023 137.403 137.476
integer(4) ieor 10 137.432 0.016 137.407 137.458
integer(4) ior 10 137.438 0.023 137.406 137.494
integer(4) ishft 10 684.286 0.096 684.224 684.564
integer(4) ishftc 10 820.893 0.036 820.839 820.957
integer(4) ibset 10 410.735 0.022 410.698 410.766
integer(4) ibclr 10 479.005 0.059 478.938 479.110
integer(4) min 10 342.485 0.022 342.448 342.531
integer(4) max 10 342.552 0.118 342.387 342.747
integer(4) shifta 10 342.545 0.016 342.519 342.575
integer(4) shiftl 10 342.463 0.019 342.440 342.506
integer(4) shiftr 10 410.664 0.052 410.601 410.743
integer(8) add_v1 10 85.817 0.012 85.804 85.849
integer(8) add_v2 10 205.695 0.030 205.633 205.736
integer(8) add_v3 10 171.335 0.012 171.324 171.362
integer(8) mul_v1 10 154.112 0.010 154.098 154.133
integer(8) mul_v2 10 205.764 0.057 205.651 205.827
integer(8) mul_v3 10 205.573 0.015 205.554 205.608
integer(8) fma_v1 10 171.205 0.017 171.190 171.247
integer(8) fma_v2 10 188.281 0.016 188.267 188.310
integer(8) fma_v3 10 188.320 0.016 188.303 188.355
integer(8) fma_v4 10 239.827 0.023 239.796 239.865
integer(8) div_v1 10 1435.614 0.023 1435.576 1435.652
integer(8) div_v2 10 1231.004 0.041 1230.917 1231.072
integer(8) inv_v1 10 1470.044 0.051 1469.960 1470.125
integer(8) inv_v2.1 10 205.789 0.030 205.754 205.870
integer(8) inv_v2.2 10 956.894 0.044 956.818 956.967
integer(8) inv_v2.3 10 205.508 0.015 205.487 205.531
integer(8) popcnt 10 855.151 0.027 855.110 855.202
integer(8) poppar 10 650.097 0.055 650.008 650.215
integer(8) dim 10 205.854 0.021 205.804 205.885
integer(8) iand 10 171.544 0.017 171.518 171.579
integer(8) ieor 10 171.548 0.028 171.514 171.618
integer(8) ior 10 171.531 0.011 171.516 171.556
integer(8) ishft 10 343.799 0.015 343.776 343.832
integer(8) ishftc 10 342.479 0.018 342.447 342.509
integer(8) ibset 10 171.598 0.011 171.580 171.614
integer(8) ibclr 10 205.714 0.015 205.685 205.737
integer(8) min 10 205.596 0.054 205.564 205.757
integer(8) max 10 205.846 0.012 205.829 205.869
integer(8) shifta 10 171.699 0.016 171.669 171.717
integer(8) shiftl 10 171.603 0.014 171.573 171.624
integer(8) shiftr 10 171.606 0.009 171.589 171.618
real(4) add_v1 10 512.951 0.020 512.921 512.994
real(4) add_v2 10 684.137 0.034 684.078 684.192
real(4) add_v3 10 615.855 0.029 615.818 615.897
real(4) mul_v1 10 512.886 0.031 512.841 512.933
real(4) mul_v2 10 684.201 0.032 684.136 684.233
real(4) mul_v3 10 615.710 0.037 615.633 615.787
real(4) fma_v1 10 478.706 0.018 478.676 478.735
real(4) fma_v2 10 512.971 0.029 512.921 513.027
real(4) fma_v3 10 547.123 0.022 547.095 547.167
real(4) fma_v4 10 684.183 0.046 684.070 684.244
real(4) div_v1 10 763.301 0.034 763.245 763.370
real(4) div_v2 10 2220.395 0.065 2220.247 2220.478
real(4) inv 10 2187.585 0.019 2187.546 2187.618
real(4) invsqrt_v1 10 4077.277 0.043 4077.197 4077.345
real(4) invsqrt_v2 10 4056.873 0.048 4056.777 4056.942
real(4) exp 10 4784.793 0.066 4784.698 4784.909
real(4) erf 10 6743.107 0.081 6743.001 6743.254
real(4) erfc 10 6623.890 0.135 6623.716 6624.231
real(4) erfc_scaled 10 10582.025 0.208 10581.605 10582.412
real(4) gamma 10 26325.138 0.630 26324.426 26326.214
real(4) sqrt 10 2075.046 0.083 2074.889 2075.144
real(4) sin 10 4093.452 0.200 4092.995 4093.637
real(4) cos 10 3758.971 0.578 3757.901 3759.646
real(4) tan 10 8868.126 1.308 8866.079 8869.433
real(4) sinh 10 17206.805 0.082 17206.677 17206.955
real(4) cosh 10 11816.718 0.154 11816.499 11817.026
real(4) tanh 10 14935.072 0.221 14934.719 14935.512
real(4) asinh 10 17715.835 0.257 17715.521 17716.508
real(4) acosh 10 3555.262 0.095 3555.097 3555.421
real(4) atan 10 7334.219 0.124 7333.949 7334.364
real(4) bessel_j0 10 6562.282 0.183 6562.099 6562.805
real(4) bessel_j1 10 7109.460 0.161 7109.214 7109.827
real(4) bessel_y0 10 17566.615 0.195 17566.340 17567.083
real(4) bessel_y1 10 20095.468 0.274 20095.208 20096.191
real(4) epsilon 10 205.503 0.019 205.477 205.532
real(4) exponent 10 4172.997 2.168 4169.611 4174.528
real(4) fraction 10 3810.742 34.368 3793.101 3879.553
real(4) log 10 3896.333 0.066 3896.245 3896.504
real(4) log10 10 7563.962 0.444 7562.733 7564.421
real(4) log_gamma 10 13165.042 0.931 13164.563 13167.796
real(4) atan2 10 13054.174 0.888 13052.963 13055.839
real(4) dim 10 1174.182 0.067 1174.062 1174.280
real(8) add_v1 10 325.023 0.017 324.997 325.051
real(8) add_v2 10 410.771 0.086 410.718 411.018
real(8) add_v3 10 376.505 0.016 376.481 376.540
real(8) mul_v1 10 325.012 0.017 324.988 325.039
real(8) mul_v2 10 410.734 0.031 410.699 410.804
real(8) mul_v3 10 376.505 0.025 376.470 376.544
real(8) fma_v1 10 307.887 0.021 307.850 307.922
real(8) fma_v2 10 324.991 0.019 324.968 325.032
real(8) fma_v3 10 342.123 0.016 342.102 342.167
real(8) fma_v4 10 410.737 0.025 410.704 410.778
real(8) div_v1 10 428.998 0.033 428.958 429.080
real(8) div_v2 10 2103.440 0.067 2103.340 2103.570
real(8) inv 10 2085.070 0.101 2084.967 2085.342
real(8) invsqrt_v1 10 4021.411 0.064 4021.320 4021.518
real(8) invsqrt_v2 10 4010.283 0.048 4010.222 4010.370
real(8) exp 10 2802.751 0.057 2802.646 2802.832
real(8) erf 10 4484.458 0.090 4484.335 4484.607
real(8) erfc 10 4627.043 0.141 4626.882 4627.416
real(8) erfc_scaled 10 7382.662 0.143 7382.321 7382.822
real(8) gamma 10 15863.540 0.711 15862.906 15864.945
real(8) sqrt 10 2028.331 0.053 2028.250 2028.401
real(8) sin 10 4270.003 0.094 4269.892 4270.220
real(8) cos 10 4169.045 0.205 4168.756 4169.519
real(8) tan 10 8372.737 0.364 8372.203 8373.314
real(8) sinh 10 11397.746 5.498 11391.383 11407.719
real(8) cosh 10 7856.762 8.172 7853.224 7881.249
real(8) tanh 10 10704.972 0.127 10704.738 10705.160
real(8) asinh 10 13261.467 0.165 13261.244 13261.750
real(8) acosh 10 1948.697 0.372 1948.493 1949.808
real(8) atan 10 5663.040 5.605 5657.812 5677.576
real(8) bessel_j0 10 4682.661 0.142 4682.509 4683.048
real(8) bessel_j1 10 4956.206 0.124 4956.069 4956.534
real(8) bessel_y0 10 12519.257 0.182 12518.941 12519.616
real(8) bessel_y1 10 14706.557 0.132 14706.361 14706.732
real(8) epsilon 10 69.016 0.008 69.002 69.030
real(8) exponent 10 1983.909 0.059 1983.750 1983.959
real(8) fraction 10 1811.969 0.055 1811.901 1812.050
real(8) log 10 3013.375 0.196 3013.248 3013.948
real(8) log10 10 5051.569 0.108 5051.400 5051.764
real(8) log_gamma 10 8728.382 0.127 8728.238 8728.636
real(8) atan2 10 12059.186 0.454 12058.511 12059.863
real(8) dim 10 633.483 3.638 628.646 641.969
real(16) add_v1 10 1711.972 1.860 1707.865 1713.185
real(16) add_v2 10 1489.706 0.890 1488.341 1491.262
real(16) add_v3 10 1547.631 0.049 1547.551 1547.703
real(16) mul_v1 10 2485.594 0.219 2485.411 2486.010
real(16) mul_v2 10 3183.166 3.489 3173.450 3184.780
real(16) mul_v3 10 3118.262 2.158 3116.700 3124.680
real(16) fma_v1 10 2159.504 0.634 2158.477 2159.997
real(16) fma_v2 10 2052.379 0.349 2051.954 2053.186
real(16) fma_v3 10 2069.272 0.226 2068.968 2069.749
real(16) fma_v4 10 4776.933 1.505 4772.481 4777.867
real(16) div_v1 10 3332.143 2.140 3329.139 3335.158
real(16) div_v2 10 7644.577 1.374 7642.330 7647.036
real(16) inv 10 7596.247 1.129 7594.075 7597.825
real(16) invsqrt_v1 10 29671.155 4.286 29666.534 29678.275
real(16) invsqrt_v2 10 29836.658 2.588 29833.937 29840.477
real(16) exp 10 79838.766 20.898 79804.489 79860.670
real(16) erf 10 93772.776 34.150 93675.660 93797.683
real(16) erfc 10 94949.351 17.211 94921.551 94987.186
real(16) erfc_scaled 10 185948.588 48.888 185878.229 186004.931
real(16) gamma 10 226644.005 41.202 226588.849 226696.370
real(16) sqrt 10 22140.093 2.683 22137.830 22147.967
real(16) sin 10 67993.470 2.586 67987.585 67996.383
real(16) cos 10 68003.560 6.190 67996.043 68016.098
real(16) tan 10 80604.229 8.951 80593.184 80628.176
real(16) sinh 10 143092.392 24.349 143053.675 143140.407
real(16) cosh 10 108711.028 19.029 108675.098 108744.133
real(16) tanh 10 136880.311 40.026 136810.911 136957.193
real(16) asinh 10 212342.951 18.453 212324.884 212389.067
real(16) acosh 10 3694.850 1.344 3693.155 3697.342
real(16) atan 10 82480.154 19.690 82431.182 82514.616
real(16) bessel_j0 10 85646.555 30.105 85558.909 85667.759
real(16) bessel_j1 10 83058.397 37.807 83014.808 83118.807
real(16) bessel_y0 10 276825.506 56.291 276734.184 276934.334
real(16) bessel_y1 10 286938.182 47.061 286881.925 287036.771
real(16) epsilon 10 34.710 0.009 34.691 34.723
real(16) exponent 10 2506.674 1.377 2504.283 2508.980
real(16) fraction 10 1531.281 21.829 1507.264 1555.204
real(16) log 10 101306.463 18.161 101280.002 101338.099
real(16) log10 10 158700.551 73.984 158625.271 158915.377
real(16) log_gamma 10 193440.858 43.866 193393.084 193538.142
real(16) atan2 10 91256.427 12.388 91236.898 91282.087
real(16) dim 10 2382.710 1.067 2381.684 2385.374
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment