Created
March 26, 2025 02:40
-
-
Save 7etsuo/4e4d889ca864ebb5a6e0334a3071e552 to your computer and use it in GitHub Desktop.
simd.c
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <emmintrin.h> | |
#if defined(__SSE3__) | |
#include <pmmintrin.h> // SSE3 intrinsics for _mm_hadd_pd | |
#endif | |
#include <math.h> | |
// Horizontal sum for a __m128d vector. | |
// If SSE3 is available, use _mm_hadd_pd; otherwise, use a shuffle-and-add | |
// fallback. | |
static inline double | |
horizontal_sum_m128d (__m128d v) | |
{ | |
#if defined(__SSE3__) | |
v = _mm_hadd_pd (v, v); | |
return _mm_cvtsd_f64 (v); | |
#else | |
__m128d shuf = _mm_shuffle_pd (v, v, 0x1); | |
__m128d sum = _mm_add_sd (v, shuf); | |
return _mm_cvtsd_f64 (sum); | |
#endif | |
} | |
double | |
simd_mean_f64 (const double *arr, size_t n) | |
{ | |
if (n == 0) | |
return NAN; | |
size_t i = 0; | |
__m128d sum_vec = _mm_setzero_pd (); | |
for (; i + 1 < n; i += 2) | |
{ | |
__m128d v = _mm_loadu_pd (&arr[i]); | |
sum_vec = _mm_add_pd (sum_vec, v); | |
} | |
double sum = horizontal_sum_m128d (sum_vec); | |
for (; i < n; i++) | |
sum += arr[i]; | |
return sum / n; | |
} | |
void | |
simd_min_max_f64 (const double *arr, size_t n, double *min_out, double *max_out) | |
{ | |
if (n == 0) | |
{ | |
*min_out = NAN; | |
*max_out = NAN; | |
return; | |
} | |
double min_val = arr[0], max_val = arr[0]; | |
for (size_t i = 1; i < n; i++) | |
{ | |
if (arr[i] < min_val) | |
min_val = arr[i]; | |
if (arr[i] > max_val) | |
max_val = arr[i]; | |
} | |
*min_out = min_val; | |
*max_out = max_val; | |
} | |
int | |
main (void) | |
{ | |
double data[] = {3.0, 1.0, 7.0, 4.0, 5.0, 9.0, 2.0, 6.0}; | |
size_t n = sizeof (data) / sizeof (data[0]); | |
double mean = simd_mean_f64 (data, n); | |
double min, max; | |
simd_min_max_f64 (data, n, &min, &max); | |
printf ("Data: "); | |
for (size_t i = 0; i < n; i++) | |
{ | |
printf ("%.2f ", data[i]); | |
} | |
printf ("\nMean: %.2f\n", mean); | |
printf ("Min: %.2f, Max: %.2f\n", min, max); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment