Created
January 17, 2014 00:49
-
-
Save phg1024/8466448 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <iostream> | |
| #include <algorithm> | |
| #include <vector> | |
| #include <assert.h> | |
| #include <time.h> | |
| #include "vectorclass/vectorf128.h" | |
| #include "alignedallocator.hpp" | |
| using namespace std; | |
| struct Point4f { | |
| Point4f(){ sse = _mm_set_ps1(0); } | |
| Point4f(float x, float y, float z, float w) { | |
| sse = _mm_set_ps(w, z, y, x); | |
| } | |
| Point4f(float v){ sse = _mm_set_ps1(v); } | |
| Point4f(__m128 v) { sse = v; } | |
| Point4f& operator=(const Point4f& p) { | |
| sse = p.sse; | |
| return *this; | |
| } | |
| Point4f operator+(const Point4f& p) { | |
| return _mm_add_ps(sse, p.sse); | |
| } | |
| Point4f operator-(const Point4f& p) { | |
| return _mm_sub_ps(sse, p.sse); | |
| } | |
| Point4f operator*(const Point4f& p) { | |
| return _mm_mul_ps(sse, p.sse); | |
| } | |
| Point4f operator/(const Point4f& p) { | |
| return _mm_div_ps(sse, p.sse); | |
| } | |
| __m128 operator==(const Point4f& p) { | |
| return _mm_cmpeq_ps(sse, p.sse); | |
| } | |
| __m128 operator!=(const Point4f& p) { | |
| return _mm_cmpneq_ps(sse, p.sse); | |
| } | |
| operator __m128() { | |
| return sse; | |
| } | |
| float& operator[](int idx) { | |
| assert(idx >= 0 && idx < 4); | |
| return (&x)[idx]; | |
| } | |
| float operator[](int idx) const { | |
| assert(idx >= 0 && idx < 4); | |
| return (&x)[idx]; | |
| } | |
| friend ostream& operator << (ostream& os, const Point4f& p); | |
| friend istream& operator >> (istream& is, Point4f& p); | |
| union { | |
| __m128 sse; | |
| struct { float x, y, z, w; }; | |
| }; | |
| }; | |
| ostream& operator << (ostream& os, const Point4f& p) { | |
| os << p.x << ' ' << p.y << ' ' << p.z << ' ' << p.w << endl; | |
| return os; | |
| } | |
| istream& operator >>(istream& is, Point4f& p) { | |
| is >> p.x >> p.y >> p.z >> p.w; | |
| return is; | |
| } | |
| int main() { | |
| __m128 m1 = _mm_set_ps1(7.0); | |
| __m128 mref = _mm_set_ps1(sqrtf(7.0)); | |
| float res_val[4] = { 0 }; | |
| int runs = 1; | |
| int n = 64 * 1024 * 1024; | |
| vector<Point4f, aligned_allocator<Point4f, 16>> vec(n, m1); | |
| clock_t start, stop; | |
| start = clock(); | |
| for (int j = 0; j < runs;j++) | |
| for (int i = 0; i < n;i++) { | |
| vec[i] = _mm_sqrt_ps(vec[i]); | |
| } | |
| stop = clock(); | |
| cout << "time cost with SIMD = " << (stop - start) / (float)CLOCKS_PER_SEC * 1000.0 << "ms" << endl; | |
| // verify | |
| bool flag = false; | |
| for (int i = 0; i < n; i++) { | |
| Vec4fb res = (vec[i] != mref); | |
| if (res[0] || res[1] || res[2] || res[3]) { | |
| cout << vec[i] << '\t' << mref << endl; | |
| flag = true; | |
| break; | |
| } | |
| } | |
| if (flag) cout << "Failed." << endl; | |
| else cout << "Succeeded." << endl; | |
| start = clock(); | |
| for (int j = 0; j < runs; j++) | |
| for (int i = 0; i < n; i++) { | |
| Point4f& p = vec[i]; | |
| p[0] = sqrtf(7.0); | |
| p[1] = sqrtf(7.0); | |
| p[2] = sqrtf(7.0); | |
| p[3] = sqrtf(7.0); | |
| } | |
| stop = clock(); | |
| cout << "time cost with SIMD = " << (stop - start) / (float)CLOCKS_PER_SEC * 1000.0 << "ms" << endl; | |
| // verify | |
| flag = false; | |
| for (int i = 0; i < n; i++) { | |
| Vec4fb res = (vec[i] != mref); | |
| if (res[0] || res[1] || res[2] || res[3]) { | |
| flag = true; | |
| break; | |
| } | |
| } | |
| if (flag) cout << "Failed." << endl; | |
| else cout << "Succeeded." << endl; | |
| Point4f zero; | |
| cout << zero << endl; | |
| zero = zero + Point4f(1, 2, 3, 4); | |
| cout << zero << endl; | |
| zero = zero * zero; | |
| cout << zero << endl; | |
| zero = zero / zero; | |
| cout << zero << endl; | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment