This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <cstdio> | |
| #include <cmath> | |
| #include <emmintrin.h> | |
| #define _mm_set1_pd(x) _mm_set_pd((x), (x)) | |
| // Probably somewhat faster. | |
| inline __m128d fastexp(__m128d v) | |
| { | |
| const __m128d inv_log2 = _mm_set1_pd(1.4426950408889634073599); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| PRE_ALIGN(128) struct __vec8_d { | |
| __vec8_d() { } | |
| FORCEINLINE __vec8_d(const double v0) { | |
| u.v0 = _mm_set_pd(v0, v0); | |
| u.v1 = _mm_set_pd(v0, v0); | |
| u.v2 = _mm_set_pd(v0, v0); | |
| u.v3 = _mm_set_pd(v0, v0); | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| L21: | |
| .LSSN2: | |
| /* 12 */ frcpad %f0,%f32 | |
| /* 12 */ sethi %h44(.LR0),%g1 | |
| /* 12 */ or %g1,%m44(.LR0),%g1 | |
| /* 12 */ sllx %g1,12,%g1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # K frontend | |
| $ FCCpx -Kfast,nounroll,noswp exp_bench.cpp fmath.cpp | |
| exp_bench.cpp: | |
| fmath.cpp: | |
| "fmath.cpp", line 30: warning: variable "fmath::local::LOG_TABLE_SIZE" was declared but never referenced | |
| const size_t LOG_TABLE_SIZE = 12; | |
| ^ | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| _ZN5fmath3expEd: | |
| .LLFB2: | |
| .L46: | |
| .LSSN28: | |
| /* 167 */ add %sp,-208,%sp | |
| .L47: | |
| .LSSN29: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <cmath> | |
| #include <emmintrin.h> | |
| #define _mm_set1_pd(x) _mm_set_pd((x), (x)) | |
| #define FORCE_INLINE __attriabute__((force_inline)) | |
| // Compute exp(x) using trigonometric function. | |
| inline __m128d exp_tri(__m128d v) | |
| { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // code From http://gallium.inria.fr/blog/fast-vectorizable-math-approx/ | |
| $ e-gcc compile options: -O3 -mno-soft-cmpsf -mcmove -mfp-mode=truncate | |
| // 73 clocks | |
| 00000f40 <_expapprox>: | |
| f40: 200b 0002 mov r1,0x0 | |
| f44: 476b 0aa2 mov r2,0xaa3b | |
| f48: 470b 14b2 movt r2,0x4b38 | |
| f4c: 2fcb 14e2 movt r1,0x4e7e |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| The clock cycle count for "expf()" (reference) is 141645. | |
| The clock cycle count for "expapprox()" is 74. | |
| The clock cycle count for "expapprox4()" is 127 (/4 = 31). | |
| // GCC | |
| #define RESTRICT __restrict__ | |
| // Disable range check makes faster evaluation of exp(). |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| 00000610 <_new_exp>: | |
| 610: 200b 0002 mov r1,0x0 | |
| 614: 200b 13c2 movt r1,0x3c00 | |
| 618: 59ab 0cc2 mov r2,0xcccd | |
| 61c: 00a7 fmul r0,r0,r1 | |
| 61e: 712b 0882 mov r3,0x8889 | |
| 622: 610b 13d2 movt r3,0x3d08 | |
| 626: 498b 13e2 movt r2,0x3e4c | |
| 62a: 200b 0002 mov r1,0x0 | |
| 62e: 300b 13f2 movt r1,0x3f80 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //// new_exp | |
| // == sim start == | |
| LLFB4 | |
| L32 | |
| LSSN114 | |
| L33 | |
| LSSN115 | |
| 00000001 : ld %f0 | |
| LSSN116 |