Last active
August 29, 2015 14:02
-
-
Save lnicola/a1af75fc404c75247cb2 to your computer and use it in GitHub Desktop.
Slow SSE code for vector-on-axis test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdio.h> | |
| #include <tchar.h> | |
| #include <emmintrin.h> | |
| #include <xmmintrin.h> | |
| #include <smmintrin.h> | |
| #include <iostream> | |
| #include <chrono> | |
| __declspec(noinline) | |
| bool is_axial(__m128 vec) | |
| { | |
| auto r = _mm_castps_si128(_mm_cmpeq_ps(vec, _mm_setzero_ps())); | |
| auto r1 = _mm_xor_si128(r, _mm_setr_epi32(0, -1, -1, -1)); | |
| auto r2 = _mm_xor_si128(r, _mm_setr_epi32(-1, 0, -1, -1)); | |
| auto r3 = _mm_xor_si128(r, _mm_setr_epi32(-1, -1, 0, -1)); | |
| auto m = _mm_setr_epi32(_mm_movemask_epi8(r1), _mm_movemask_epi8(r2), _mm_movemask_epi8(r3), -1); | |
| auto n = _mm_cmpeq_epi32(m, _mm_setzero_si128()); | |
| return !_mm_testz_si128(n, n); | |
| } | |
| //__declspec(noinline) | |
| bool is_axial2(__m128 vec) | |
| { | |
| auto r = _mm_castps_si128(_mm_cmpeq_ps(vec, _mm_setzero_ps())); | |
| r = _mm_hadd_epi32(r, r); | |
| r = _mm_hadd_epi32(r, r); | |
| return _mm_cvtsi128_si32(r) + 3 == 0; | |
| } | |
| //__declspec(noinline) | |
| bool is_axial6(float x, float y, float z) | |
| { | |
| auto r = _mm_castps_si128(_mm_cmpeq_ps(_mm_setr_ps(x, y, z, 0.f), _mm_setzero_ps())); | |
| r = _mm_hadd_epi32(r, r); | |
| r = _mm_hadd_epi32(r, r); | |
| return _mm_cvtsi128_si32(r) + 3 == 0; | |
| } | |
| //__declspec(noinline) | |
| bool is_axial3(float x, float y, float z) | |
| { | |
| return ((x != 0.0f) + (y != 0.0f) + (z != 0.0f)) == 1; | |
| } | |
| //__declspec(noinline) | |
| bool is_axial4(float x, float y, float z) | |
| { | |
| return x == 0.f && y == 0.f && z != 0.f || | |
| x == 0.f && y != 0.f && z == 0.f || | |
| x != 0.f && y == 0.f && z == 0.f; | |
| } | |
| //__declspec(noinline) | |
| bool is_axial5(float x, float y, float z) | |
| { | |
| return x == 0.f && (y == 0.f && z != 0.f || | |
| y != 0.f && z == 0.f) || | |
| x != 0.f && y == 0.f && z == 0.f; | |
| } | |
| bool is_axial7(float x, float y, float z) | |
| { | |
| if (x == 0.f) | |
| if (y == 0.f) | |
| return z != 0.f; | |
| else | |
| return z == 0.f; | |
| return y == 0.f && z == 0.f; | |
| } | |
| using namespace std; | |
| int main() | |
| { | |
| int temp = 0; | |
| auto start_time = chrono::high_resolution_clock::now(); | |
| for (int i = -500; i < 500; i++) | |
| for (int j = -500; j < 500; j++) | |
| for (int k = -500; k < 500; k++) | |
| //temp += is_axial2(_mm_setr_ps(i & 1, j & 1, k & 1, 0.f)); | |
| temp += is_axial7(i & 1, j & 1, k & 1); | |
| auto end_time = chrono::high_resolution_clock::now(); | |
| cout << chrono::duration_cast<chrono::milliseconds>(end_time - start_time).count() << endl; | |
| cout << temp; | |
| return temp; | |
| printf("%d\n", is_axial2(_mm_setr_ps(0.f, 0.f, 1.f, 0.f))); | |
| printf("%d\n", is_axial2(_mm_setr_ps(1.f, 1.f, 0.f, 0.f))); | |
| printf("%d\n", is_axial2(_mm_setr_ps(1.f, 1.f, 1.f, 0.f))); | |
| printf("%d\n", is_axial2(_mm_setr_ps(0.f, 1.f, 0.f, 0.f))); | |
| printf("%d\n", is_axial2(_mm_setr_ps(1.f, 0.f, 0.f, 0.f))); | |
| printf("%d\n", is_axial2(_mm_setr_ps(0.f, 0.f, 0.f, 0.f))); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment