Created
January 27, 2023 07:33
-
-
Save leuc/20a43793d9200b5278ea4fa5b4314a85 to your computer and use it in GitHub Desktop.
test igraph umap layout with avx2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
src/layout/umap.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ | |
1 file changed, 58 insertions(+) | |
diff --git a/src/layout/umap.c b/src/layout/umap.c | |
index d1f334313..a9e40d7d7 100644 | |
--- a/src/layout/umap.c | |
+++ b/src/layout/umap.c | |
@@ -28,6 +28,7 @@ | |
#include "layout/layout_internal.h" | |
#include "core/interruption.h" | |
+#include <immintrin.h> | |
#include <math.h> | |
@@ -720,6 +721,7 @@ static igraph_real_t igraph_i_umap_clip_force(igraph_real_t force, igraph_real_t | |
return force > limit ? limit : (force < -limit ? -limit : force); | |
} | |
+/* | |
static igraph_real_t igraph_i_umap_attract( | |
igraph_real_t dsq, | |
igraph_real_t a, | |
@@ -727,7 +729,42 @@ static igraph_real_t igraph_i_umap_attract( | |
{ | |
return - (2 * a * b * pow(dsq, b - 1.)) / (1. + a * pow(dsq, b)); | |
} | |
+*/ | |
+ | |
+ | |
+#ifndef _mm256_extract_pd | |
+#define _mm256_extract_pd(R, I) (*((double*)(&R)+I)) | |
+#endif | |
+ | |
+__m256d _mm256_pow_pd(__m256d base, __m256d exponent) { | |
+ __m256d result = _mm256_set1_pd(1.0); | |
+ __m256d base_n = base; | |
+ __m256d exponent_n = exponent; | |
+ for (int i = 0; i < 4; i++) { | |
+ if (_mm256_extract_pd(exponent_n, i) >= 1) | |
+ result = _mm256_mul_pd(result, base_n); | |
+ base_n = _mm256_mul_pd(base_n, base); | |
+ exponent_n = _mm256_sub_pd(exponent_n, _mm256_set1_pd(1)); | |
+ } | |
+ return result; | |
+} | |
+ | |
+static igraph_real_t igraph_i_umap_attract( | |
+ igraph_real_t dsq, | |
+ igraph_real_t a, | |
+ igraph_real_t b) | |
+{ | |
+ __m256d dsq_vec = _mm256_set1_pd(dsq); | |
+ __m256d a_vec = _mm256_set1_pd(a); | |
+ __m256d b_vec = _mm256_set1_pd(b); | |
+ __m256d temp1 = _mm256_mul_pd(a_vec, _mm256_pow_pd(dsq_vec, b_vec)); | |
+ __m256d temp2 = _mm256_add_pd(temp1, _mm256_set1_pd(1)); | |
+ __m256d temp3 = _mm256_mul_pd(_mm256_mul_pd(b_vec, _mm256_set1_pd(2)), _mm256_pow_pd(dsq_vec, _mm256_sub_pd(b_vec, _mm256_set1_pd(1)))); | |
+ __m256d result = _mm256_div_pd(_mm256_mul_pd(temp3, a_vec), temp2); | |
+ return -_mm256_extract_pd(result, 0); | |
+} | |
+/* | |
static igraph_real_t igraph_i_umap_repel( | |
igraph_real_t dsq, | |
igraph_real_t a, | |
@@ -737,6 +774,27 @@ static igraph_real_t igraph_i_umap_repel( | |
return (2 * b) / (dsq_min + dsq) / (1. + a * pow(dsq, b)); | |
} | |
+*/ | |
+ | |
+ | |
+ | |
+static igraph_real_t igraph_i_umap_repel(igraph_real_t dsq, igraph_real_t a, igraph_real_t b) { | |
+ igraph_real_t dsq_min = UMAP_CORRECT_DISTANCE_REPULSION * UMAP_CORRECT_DISTANCE_REPULSION; | |
+ | |
+ __m256d dsq_min_vec = _mm256_set1_pd(dsq_min); | |
+ __m256d dsq_vec = _mm256_set1_pd(dsq); | |
+ __m256d a_vec = _mm256_set1_pd(a); | |
+ __m256d b_vec = _mm256_set1_pd(b); | |
+ __m256d dsq_pow_b = _mm256_pow_pd(dsq_vec, b_vec); | |
+ __m256d dsq_pow_b_a = _mm256_mul_pd(dsq_pow_b, a_vec); | |
+ __m256d one_plus_dsq_pow_b_a = _mm256_add_pd(dsq_pow_b_a, _mm256_set1_pd(1.0)); | |
+ __m256d dsq_plus_dsq_min = _mm256_add_pd(dsq_vec, dsq_min_vec); | |
+ __m256d two_b_vec = _mm256_set1_pd(2.0 * b); | |
+ __m256d result = _mm256_div_pd(two_b_vec, _mm256_div_pd(dsq_plus_dsq_min, one_plus_dsq_pow_b_a)); | |
+ return _mm256_extract_pd(result,0); | |
+} | |
+ | |
+ | |
static igraph_error_t igraph_i_umap_apply_forces( | |
const igraph_t *graph, |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment