Last active
June 7, 2020 15:35
-
-
Save nesteruk/ebde71e9550f90ab725cb26dcab01698 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _USE_MATH_DEFINES | |
#include <iostream> | |
#include <cmath> | |
#include <array> | |
#include <chrono> | |
#include <immintrin.h> | |
#include <random> | |
// Standard normal probability density function | |
double norm_pdf(const double& x) { | |
const static auto one_over_root_two_pi = (1.0 / (sqrt(2 * M_PI))); | |
return one_over_root_two_pi * exp(-0.5 * x * x); | |
} | |
__m256d norm_pdf(const __m256d& x) | |
{ | |
const static double mul = 1.0 / (sqrt(2 * M_PI)); | |
const static __m256d mul_vector = {mul,mul,mul,mul}; | |
const static __m256d minus_half = {-0.5,-0.5,-0.5,-0.5}; | |
return _mm256_mul_pd( | |
mul_vector, | |
_mm256_exp_pd( | |
_mm256_mul_pd(minus_half, | |
_mm256_mul_pd(x, x) | |
) | |
) | |
); | |
} | |
// An approximation to the cumulative distribution function | |
// for the standard normal distribution | |
// Note: This is a recursive function | |
double norm_cdf(const double& x) { | |
double k = 1.0 / (1.0 + 0.2316419 * x); | |
double k_sum = k * (0.319381530 + k * (-0.356563782 + k * (1.781477937 + k * (-1.821255978 + 1.330274429 * k)))); | |
if (x >= 0.0) { | |
return (1.0 - (1.0 / (sqrt(2 * M_PI))) * exp(-0.5 * x * x) * k_sum); | |
} | |
else { | |
return 1.0 - norm_cdf(-x); | |
} | |
} | |
// This calculates d_j, for j in {1,2}. This term appears in the closed | |
// form solution for the European call or put price | |
double d_j(const int& j, const double& S, const double& K, const double& r, const double& v, const double& T) { | |
return (log(S / K) + (r + (pow(-1, j - 1)) * 0.5 * v * v) * T) / (v * (sqrt(T))); | |
} | |
__m256d d1(const __m256d& s, const __m256d& k, | |
const __m256d& r, const __m256d& v, const __m256d& t) | |
{ | |
const __m256d half = _mm256_set1_pd(0.5); | |
return _mm256_fmadd_pd( | |
_mm256_log_pd(_mm256_div_pd(s, k)), /* log(s/k) */ | |
_mm256_mul_pd(t, _mm256_fmadd_pd(_mm256_mul_pd(v, v), half, r)), /* (r + 0.5*v*v) */ | |
_mm256_invsqrt_pd(_mm256_mul_pd(_mm256_mul_pd(v, v), t))); | |
} | |
__m256d d2(const __m256d& s, const __m256d& k, | |
const __m256d& r, const __m256d& v, const __m256d& t) | |
{ | |
return _mm256_sub_pd( | |
d1(s, k, r, v, t), | |
_mm256_mul_pd(v, _mm256_sqrt_pd(t))); | |
} | |
__m256d call_price(const __m256d& s, const __m256d& k, | |
const __m256d& r, const __m256d& v, const __m256d& t) | |
{ | |
const auto D1 = d1(s, k, r, v, t); | |
const auto D2 = d2(s, k, r, v, t); | |
const auto s_n_d1 = _mm256_mul_pd(s, _mm256_cdfnorm_pd(D1)); | |
const auto e_min_rt = _mm256_exp_pd( | |
_mm256_xor_pd(_mm256_mul_pd(r, t), _mm256_set1_pd(-0.0)) | |
); | |
// fma it! | |
return _mm256_fnmadd_pd(_mm256_mul_pd(k, e_min_rt),_mm256_cdfnorm_pd(D2), s_n_d1); | |
} | |
__m256d put_price(const __m256d& s, const __m256d& k, | |
const __m256d& r, const __m256d& v, const __m256d& t) | |
{ | |
const auto zero = _mm256_set1_pd(0.0); | |
auto e_min_rt = _mm256_exp_pd( | |
//_mm256_fnmadd_pd(r, t, zero) | |
_mm256_xor_pd(_mm256_mul_pd(r, t), _mm256_set1_pd(-0.0)) | |
); | |
return | |
_mm256_add_pd( | |
_mm256_sub_pd(_mm256_mul_pd(k, e_min_rt), s), | |
call_price(s, k, r, v, t)); | |
} | |
// Calculate the European vanilla call price based on | |
// underlying S, strike K, risk-free rate r, volatility of | |
// underlying sigma and time to maturity T | |
double call_price(const double& S, const double& K, const double& r, const double& v, const double& T) { | |
return S * norm_cdf(d_j(1, S, K, r, v, T)) - K * exp(-r * T) * norm_cdf(d_j(2, S, K, r, v, T)); | |
} | |
// Calculate the European vanilla put price based on | |
// underlying S, strike K, risk-free rate r, volatility of | |
// underlying sigma and time to maturity T | |
double put_price(const double& S, const double& K, const double& r, const double& v, const double& T) { | |
return -S * norm_cdf(-d_j(1, S, K, r, v, T)) + K * exp(-r * T) * norm_cdf(-d_j(2, S, K, r, v, T)); | |
} | |
int main(int argc, char** argv) | |
{ | |
using namespace std; | |
using namespace chrono; | |
constexpr int count{1024}; | |
random_device rd; | |
mt19937_64 gen(rd()); | |
const uniform_int_distribution<> price_dist(50, 150); | |
const uniform_real_distribution<> rate_dist(0.01, 0.1); | |
const uniform_real_distribution<> vol_dist(0.1, 0.4); | |
const uniform_real<> time_dist(0.1, 1); | |
// scalars | |
array<double, count> s, k, r, v, t, c, p; | |
array<__m256d, count<<2> ss, kk, rr, vv, tt, cc, pp; | |
for (int i = 0; i < count; ++i) | |
{ | |
s[i] = price_dist(gen); | |
k[i] = price_dist(gen); | |
r[i] = rate_dist(gen); | |
v[i] = vol_dist(gen); | |
t[i] = time_dist(gen); | |
ss[i >> 2].m256d_f64[i%4] = price_dist(gen); | |
kk[i >> 2].m256d_f64[i%4] = price_dist(gen); | |
rr[i >> 2].m256d_f64[i%4] = rate_dist(gen); | |
vv[i >> 2].m256d_f64[i%4] = vol_dist(gen); | |
tt[i >> 2].m256d_f64[i%4] = time_dist(gen); | |
} | |
constexpr int iterations{1024*16}; | |
double scalar_total{0.0}, vector_total{0.0}; | |
for (int i = 0; i < iterations; ++i) | |
{ | |
auto scalar_start = high_resolution_clock::now(); | |
for (int i = 0; i < count; ++i) | |
{ | |
c[i] = call_price(s[i], k[i], r[i], v[i], t[i]); | |
p[i] = put_price(s[i], k[i], r[i], v[i], t[i]); | |
} | |
auto scalar_end = high_resolution_clock::now(); | |
scalar_total += duration_cast<microseconds>(scalar_end-scalar_start).count(); | |
} | |
cout << "scalar execution took " << (scalar_total / iterations) << " microseconds\n"; | |
for (int i = 0; i < iterations; ++i) | |
{ | |
auto vector_start = high_resolution_clock::now(); | |
for (int i = 0; i < (count >> 2); ++i) | |
{ | |
cc[i] = call_price(ss[i], kk[i], rr[i], vv[i], tt[i]); | |
pp[i] = put_price(ss[i], kk[i], rr[i], vv[i], tt[i]); | |
} | |
auto vector_end = high_resolution_clock::now(); | |
vector_total += duration_cast<microseconds>(vector_end-vector_start).count(); | |
} | |
cout << "vector execution took " << (vector_total / iterations) << " microseconds\n"; | |
cout << "speedup factor is " << scalar_total / vector_total << "\n"; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment