Skip to content

Instantly share code, notes, and snippets.

@soravux
Last active December 16, 2015 00:18
Show Gist options
  • Save soravux/5346143 to your computer and use it in GitHub Desktop.
Save soravux/5346143 to your computer and use it in GitHub Desktop.
Solution to the Project Euler problem #1 - For the multigrad blog (entry 5)
#include <stdio.h>
#include <x86intrin.h>
#define NUMBER_OF_TRIES 1000000
int main()
{
register unsigned long lResult, lLoop, i;
for (lLoop = 0; lLoop < NUMBER_OF_TRIES; lLoop++) {
lResult = 0;
__m128i numbers, resultat, somme, sum_iteration;
const __m128i delta = _mm_set_epi16(0, 15, 12, 10, 9, 6, 5, 3);
const __m128i step = _mm_set_epi16(0, 15, 15, 15, 15, 15, 15, 15);
const __m128i add_mask = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0xFFFF);
const __m128i final_mask = _mm_set_epi16(0, 0, 0, 0, 0xFFFF, 0xFFFF,
0xFFFF, 0xFFFF);
numbers = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
sum_iteration = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
/* 1000 / (7 / 15) = 66,...
* We'll make the 65 first iterations correctly, then make the final
* one with a mask. */
for (i = 0; i <= 66; i++) {
// Compute next number pattern
resultat = _mm_add_epi16(numbers, delta);
// If final iteration, remove the last numbers (over 1000)
if (i == 66) {
resultat = _mm_and_si128(resultat, final_mask);
}
// Sum of the 7 numbers
somme = _mm_add_epi16(resultat, _mm_srli_si128(resultat, 2));
somme = _mm_add_epi16(somme, _mm_srli_si128(somme, 4));
somme = _mm_add_epi16(somme, _mm_srli_si128(somme, 8));
// Sum them in a 32 bits wide entry
somme = _mm_and_si128(somme, add_mask);
sum_iteration = _mm_add_epi32(somme, sum_iteration);
// Get the next numbers
numbers = _mm_add_epi16(numbers, step);
} /* end for of sum */
lResult = _mm_cvtsi128_si32(sum_iteration);
} /* end for of benchmark repeat */
printf("Result: %u\n", lResult);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment