#include <iostream> #include <stdio.h> #include <math.h> #include <fstream> #include <math.h> using namespace std; #include "megaprofiler.h" const int max_n = 10000000; const double dh = 1; const double dt = 0.5; const double C = dt / (dh * dh); const int step_cnt = 10; float data[2][max_n]; float init_data[max_n]; float* u; float* nu; unsigned long long ov, res; void Init(float* arr, int count, bool forward = false) { for (int i = 1; i < count; ++i) arr[i] = sin ((i + 0.0) / 10) * 100; arr[0] = arr[count] = 0; } void Print(ostream& o, float* arr, int count, int t, bool forward = true) { if (forward) for (int i = 0; i <= count; ++i) o << i << ' ' << arr[i] << ' ' << t << '\n'; else for (int i = count; i >= 0; --i) o << i << ' ' << arr[i] << ' ' << t << '\n'; } float two[4]; float CC[4]; int main() { TEST_HEAD(1000); TEST_START; TEST_FIN(ov); //---------------------------------------------- ofstream out("output.txt"); int n = 100; Init(init_data, n); TEST_HEAD(1000); for (int i = 0; i < n; ++i) data[1][i] = init_data[i]; TEST_START; for (int s = 1; s <= step_cnt; ++s) { u = data[s % 2]; nu = data[(s + 1) % 2]; // Print(out, u, n, s, s % 2); for (int i = 1; i < n; ++i) { cout << u[i] << endl; cout << (u[i-1] + u[i+1] - 2*u[i]) << endl; nu[i] = 0.99 * (u[i-1] + u[i+1] - 2*u[i]) + u[i]; cout << "u[i] = " << u[i] << endl; cout << "nu[i] = " << nu[i] << endl; } nu[0] = nu[n] = 0; } TEST_FIN(ov); //--- sse --- two[0] = two[1] = two[2] = two[3] = 2.0; CC[0] = CC[1] = CC[2] = CC[3] = C; TEST_HEAD(1000); for (int i = 0; i < n; ++i) data[1][i] = init_data[i]; TEST_START; asm (\ "movups two, %xmm1;" //6 load 2 "movups CC, %xmm2;"//2 load C ); for (int s = 1; s <= step_cnt; ++s) { u = data[s % 2]; nu = data[(s + 1) % 2]; for (int i = 0; i < n; i += 4) { asm (\ "movups $-1(u, %0), %%xmm3;" //3 load u + i - 1 "movups (u, %0), %%xmm4;" //4 load u + i "movups $1(u, %0), %%xmm5;" //5 load u + i + 1 "movups %%xmm1, %%xmm6;" //1 <- 4 * 6 "mulps %%xmm4, %%xmm1;" "addps %%xmm3, %%xmm1;" //1 <- 1 + 3 "addps %%xmm5, %%xmm1;" //1 <- 1 + 5 "mulps %%xmm2, %%xmm1;" //1 <- 1 * 2 "addps %%xmm4, %%xmm1;" //1 <- 1 + 4 "movups %%xmm6, (nu, %0);" //nu + i <- 6 : :""(i) : ); } } TEST_FIN(ov); return 0; }