Skip to content

Instantly share code, notes, and snippets.

View syoyo's full-sized avatar
💗
ray tracing

Syoyo Fujita syoyo

💗
ray tracing
View GitHub Profile
@syoyo
syoyo / gist:d80cb6f9936aa5f290da
Created April 18, 2015 16:45
exp(x) using trigonometric function in HPC-ACE
#include <cstdio>
#include <cmath>
#include <emmintrin.h>
#define _mm_set1_pd(x) _mm_set_pd((x), (x))
// Probably somewhat faster.
inline __m128d fastexp(__m128d v)
{
const __m128d inv_log2 = _mm_set1_pd(1.4426950408889634073599);
PRE_ALIGN(128) struct __vec8_d {
__vec8_d() { }
FORCEINLINE __vec8_d(const double v0) {
u.v0 = _mm_set_pd(v0, v0);
u.v1 = _mm_set_pd(v0, v0);
u.v2 = _mm_set_pd(v0, v0);
u.v3 = _mm_set_pd(v0, v0);
}
@syoyo
syoyo / gist:a8ba3b6fb1f2d51e4fff
Created May 4, 2015 16:15
1/x with -Kfast,nounroll,noswp
L21:
.LSSN2:
/* 12 */ frcpad %f0,%f32
/* 12 */ sethi %h44(.LR0),%g1
/* 12 */ or %g1,%m44(.LR0),%g1
/* 12 */ sllx %g1,12,%g1
@syoyo
syoyo / gist:78f6ddb6f99deb49f8ae
Created May 6, 2015 15:41
fmath performance(2015/05/06)
# K frontend
$ FCCpx -Kfast,nounroll,noswp exp_bench.cpp fmath.cpp
exp_bench.cpp:
fmath.cpp:
"fmath.cpp", line 30: warning: variable "fmath::local::LOG_TABLE_SIZE" was declared but never referenced
const size_t LOG_TABLE_SIZE = 12;
^
@syoyo
syoyo / gist:861c3d78ffc6ac5aabd0
Created May 7, 2015 10:03
disassemble of double fmath::exp(double) in SPARC/HPC-ACE
_ZN5fmath3expEd:
.LLFB2:
.L46:
.LSSN28:
/* 167 */ add %sp,-208,%sp
.L47:
.LSSN29:
@syoyo
syoyo / gist:1516aa3e8e5489871fdd
Created May 8, 2015 06:59
doubl2 exp(double2) performance on SPARC/HPC-ACE
#include <cmath>
#include <emmintrin.h>
#define _mm_set1_pd(x) _mm_set_pd((x), (x))
#define FORCE_INLINE __attriabute__((force_inline))
// Compute exp(x) using trigonometric function.
inline __m128d exp_tri(__m128d v)
{
@syoyo
syoyo / gist:9484d27be95e3303789b
Created June 8, 2015 14:15
expapprox() compiled for Parallella Epiphany
// code From http://gallium.inria.fr/blog/fast-vectorizable-math-approx/
$ e-gcc compile options: -O3 -mno-soft-cmpsf -mcmove -mfp-mode=truncate
// 73 clocks
00000f40 <_expapprox>:
f40: 200b 0002 mov r1,0x0
f44: 476b 0aa2 mov r2,0xaa3b
f48: 470b 14b2 movt r2,0x4b38
f4c: 2fcb 14e2 movt r1,0x4e7e
@syoyo
syoyo / gist:ef68a9c5b46b040e88db
Created June 12, 2015 04:24
exp() approximate function on Epiphany.
The clock cycle count for "expf()" (reference) is 141645.
The clock cycle count for "expapprox()" is 74.
The clock cycle count for "expapprox4()" is 127 (/4 = 31).
// GCC
#define RESTRICT __restrict__
// Disable range check makes faster evaluation of exp().
@syoyo
syoyo / gist:f98f95206c9418588645
Created June 25, 2015 12:14
new_exp on Epiphany.
00000610 <_new_exp>:
610: 200b 0002 mov r1,0x0
614: 200b 13c2 movt r1,0x3c00
618: 59ab 0cc2 mov r2,0xcccd
61c: 00a7 fmul r0,r0,r1
61e: 712b 0882 mov r3,0x8889
622: 610b 13d2 movt r3,0x3d08
626: 498b 13e2 movt r2,0x3e4c
62a: 200b 0002 mov r1,0x0
62e: 300b 13f2 movt r1,0x3f80
@syoyo
syoyo / gist:956846067e49eb42c4b1
Created June 29, 2015 10:05
new_exp, new_exp4 sim
//// new_exp
// == sim start ==
LLFB4
L32
LSSN114
L33
LSSN115
00000001 : ld %f0
LSSN116