Skip to content

Instantly share code, notes, and snippets.

@phg1024
Created January 17, 2014 00:49
Show Gist options
  • Save phg1024/8466448 to your computer and use it in GitHub Desktop.
Save phg1024/8466448 to your computer and use it in GitHub Desktop.
#include <iostream>
#include <algorithm>
#include <vector>
#include <assert.h>
#include <time.h>
#include "vectorclass/vectorf128.h"
#include "alignedallocator.hpp"
using namespace std;
struct Point4f {
Point4f(){ sse = _mm_set_ps1(0); }
Point4f(float x, float y, float z, float w) {
sse = _mm_set_ps(w, z, y, x);
}
Point4f(float v){ sse = _mm_set_ps1(v); }
Point4f(__m128 v) { sse = v; }
Point4f& operator=(const Point4f& p) {
sse = p.sse;
return *this;
}
Point4f operator+(const Point4f& p) {
return _mm_add_ps(sse, p.sse);
}
Point4f operator-(const Point4f& p) {
return _mm_sub_ps(sse, p.sse);
}
Point4f operator*(const Point4f& p) {
return _mm_mul_ps(sse, p.sse);
}
Point4f operator/(const Point4f& p) {
return _mm_div_ps(sse, p.sse);
}
__m128 operator==(const Point4f& p) {
return _mm_cmpeq_ps(sse, p.sse);
}
__m128 operator!=(const Point4f& p) {
return _mm_cmpneq_ps(sse, p.sse);
}
operator __m128() {
return sse;
}
float& operator[](int idx) {
assert(idx >= 0 && idx < 4);
return (&x)[idx];
}
float operator[](int idx) const {
assert(idx >= 0 && idx < 4);
return (&x)[idx];
}
friend ostream& operator << (ostream& os, const Point4f& p);
friend istream& operator >> (istream& is, Point4f& p);
union {
__m128 sse;
struct { float x, y, z, w; };
};
};
ostream& operator << (ostream& os, const Point4f& p) {
os << p.x << ' ' << p.y << ' ' << p.z << ' ' << p.w << endl;
return os;
}
istream& operator >>(istream& is, Point4f& p) {
is >> p.x >> p.y >> p.z >> p.w;
return is;
}
int main() {
__m128 m1 = _mm_set_ps1(7.0);
__m128 mref = _mm_set_ps1(sqrtf(7.0));
float res_val[4] = { 0 };
int runs = 1;
int n = 64 * 1024 * 1024;
vector<Point4f, aligned_allocator<Point4f, 16>> vec(n, m1);
clock_t start, stop;
start = clock();
for (int j = 0; j < runs;j++)
for (int i = 0; i < n;i++) {
vec[i] = _mm_sqrt_ps(vec[i]);
}
stop = clock();
cout << "time cost with SIMD = " << (stop - start) / (float)CLOCKS_PER_SEC * 1000.0 << "ms" << endl;
// verify
bool flag = false;
for (int i = 0; i < n; i++) {
Vec4fb res = (vec[i] != mref);
if (res[0] || res[1] || res[2] || res[3]) {
cout << vec[i] << '\t' << mref << endl;
flag = true;
break;
}
}
if (flag) cout << "Failed." << endl;
else cout << "Succeeded." << endl;
start = clock();
for (int j = 0; j < runs; j++)
for (int i = 0; i < n; i++) {
Point4f& p = vec[i];
p[0] = sqrtf(7.0);
p[1] = sqrtf(7.0);
p[2] = sqrtf(7.0);
p[3] = sqrtf(7.0);
}
stop = clock();
cout << "time cost with SIMD = " << (stop - start) / (float)CLOCKS_PER_SEC * 1000.0 << "ms" << endl;
// verify
flag = false;
for (int i = 0; i < n; i++) {
Vec4fb res = (vec[i] != mref);
if (res[0] || res[1] || res[2] || res[3]) {
flag = true;
break;
}
}
if (flag) cout << "Failed." << endl;
else cout << "Succeeded." << endl;
Point4f zero;
cout << zero << endl;
zero = zero + Point4f(1, 2, 3, 4);
cout << zero << endl;
zero = zero * zero;
cout << zero << endl;
zero = zero / zero;
cout << zero << endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment