#include <iostream>
#include <stdio.h>
#include <math.h>
#include <fstream>
#include <math.h>
using namespace std;
#include "megaprofiler.h"

const int max_n = 10000000;
const double dh = 1;
const double dt = 0.5;
const double C = dt / (dh * dh);
const int step_cnt = 10;

float data[2][max_n];
float init_data[max_n];
float* u;
float* nu;

unsigned long long ov, res;

void Init(float* arr, int count, bool forward = false)
{
    for (int i = 1; i < count; ++i)
        arr[i] = sin ((i + 0.0) / 10) * 100;
    arr[0] = arr[count] = 0;
}

void Print(ostream& o, float* arr, int count, int t, bool forward = true)
{
    if (forward)
        for (int i = 0; i <= count; ++i)
            o << i << ' ' <<  arr[i] << ' ' << t << '\n';
    else
        for (int i = count; i >= 0; --i)
            o << i << ' ' <<  arr[i] << ' ' << t << '\n';
}

float two[4];
float CC[4];

int main()
{
    TEST_HEAD(1000);
    TEST_START;
    TEST_FIN(ov);        
//----------------------------------------------
    ofstream out("output.txt");
    int n = 100;
    Init(init_data, n);

    TEST_HEAD(1000);
    
    for (int i = 0; i < n; ++i)
        data[1][i] = init_data[i];
    
    TEST_START;
    
    for (int s = 1; s <= step_cnt; ++s)
    {
        u = data[s % 2];
        nu = data[(s + 1) % 2];
//        Print(out, u, n, s, s % 2);
        for (int i = 1; i < n; ++i)
        {
            cout <<  u[i] << endl;
            cout << (u[i-1] + u[i+1] - 2*u[i]) << endl;
            nu[i] = 0.99 * (u[i-1] + u[i+1] - 2*u[i]) + u[i];
            cout << "u[i] = " << u[i] << endl;
            cout << "nu[i] = " << nu[i] << endl;
        }
        nu[0] = nu[n] = 0;
    }

    TEST_FIN(ov);        

//--- sse ---

    two[0] = two[1] = two[2] = two[3] = 2.0;
    CC[0] = CC[1] = CC[2] = CC[3] = C;
    
    TEST_HEAD(1000);
    
    for (int i = 0; i < n; ++i)
        data[1][i] = init_data[i];
    
    TEST_START;
    asm (\
        "movups two, %xmm1;" //6 load 2
        "movups CC, %xmm2;"//2 load C
         );   
    for (int s = 1; s <= step_cnt; ++s)
    {
        u = data[s % 2];
        nu = data[(s + 1) % 2];
        for (int i = 0; i < n; i += 4)
        {
            asm (\
            "movups $-1(u, %0), %%xmm3;"  //3 load u + i - 1
            "movups (u, %0), %%xmm4;"     //4 load u + i
            "movups $1(u, %0), %%xmm5;"   //5 load u + i + 1
            "movups %%xmm1, %%xmm6;"      //1 <- 4 * 6
            "mulps %%xmm4, %%xmm1;"
            "addps %%xmm3, %%xmm1;"       //1 <- 1 + 3
            "addps %%xmm5, %%xmm1;"       //1 <- 1 + 5
            "mulps %%xmm2, %%xmm1;"       //1 <- 1 * 2
            "addps %%xmm4, %%xmm1;"       //1 <- 1 + 4
            "movups %%xmm6, (nu, %0);"    //nu + i <- 6
            :
            :""(i)
            :
            );
        }
    }

    TEST_FIN(ov);        

    
    return 0;
}