Last active
February 14, 2024 15:23
-
-
Save ivan-pi/010c98f9344fd6f0a543f3a753b13365 to your computer and use it in GitHub Desktop.
Lattice Boltzmann in 2D on a periodic domain
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// example.cpp | |
// | |
// Compile with: | |
// icpx -fsycl -O3 -march=native example.cpp | |
// | |
#include <vector> | |
#include <algorithm> | |
#include <cassert> | |
#include <cstdio> | |
#include <string> | |
#include <iostream> | |
#include <chrono> | |
#include <CL/sycl.hpp> | |
const int N = 512; | |
const int NX = N; | |
const int NY = N; | |
const int num_steps = 50000; | |
const int num_steps_io = 10000; | |
const double U0 = 0.1 / std::sqrt(3); | |
const double nu = U0 * (double) N / 3000; | |
const double tau = 3*nu + 0.5; | |
const double omega = 1./tau; | |
const double w0 = 4./9.; | |
const double ws = 1./9.; | |
const double wd = 1./36.; | |
const std::array<double,9> w{w0,ws,ws,ws,ws,wd,wd,wd,wd}; | |
const double PI = 4*std::atan(1.0); | |
struct shearlayer { | |
double length; | |
double kappa; | |
double delta; | |
double umax; | |
std::array<double,2> operator()(std::array<double,2> xy) { | |
const double x = xy[0]/length; | |
const double y = xy[1]/length; | |
const double ux = y < 0.5 ? | |
umax*std::tanh(kappa*(y - 0.25)) : umax*std::tanh(kappa*(0.75 - y)); | |
const double uy = umax*delta*std::sin(2.0*PI*(x + 0.25)); | |
return {ux,uy}; | |
} | |
}; | |
struct shearlayer layer{N,80.,0.05,U0}; | |
template<typename T> | |
void writeVTK_rectilinear( | |
const char* filepath, | |
int nx, int ny, | |
const T rho[], | |
const T ux[], const T uy[]) { | |
std::ofstream vtkfile(filepath); | |
vtkfile << "# vtk DataFile Version 3.0\n"; | |
vtkfile << "lbm grid output\n"; | |
vtkfile << "ASCII\n"; | |
#if 0 | |
vtkfile << "DATASET RECTILINEAR_GRID\n"; | |
vtkfile << "DIMENSIONS " << nx+1 << ' ' << ny+1 << " 2\n"; | |
vtkfile << "X_COORDINATES " << nx+1 << " float\n"; | |
for (int ix = 0; ix <= nx; ++ix) { | |
vtkfile << (T) ix << '\n'; | |
} | |
vtkfile << "Y_COORDINATES " << ny+1 << " float\n"; | |
for (int iy = 0; iy <= ny; ++iy) { | |
vtkfile << (T) iy << '\n'; | |
} | |
vtkfile << "Z_COORDINATES 2 float\n"; | |
vtkfile << (T) 0.0 << '\n'; | |
vtkfile << (T) 1.0 << '\n'; | |
#else | |
vtkfile << "DATASET STRUCTURED_POINTS\n"; | |
vtkfile << "DIMENSIONS " << nx << ' ' << ny << " 1\n"; | |
vtkfile << "ORIGIN 0.5 0.5 0.5\n"; | |
vtkfile << "SPACING 1.0 1.0 0.0\n"; | |
#endif | |
vtkfile << '\n'; | |
vtkfile << "POINT_DATA " << nx*ny << '\n'; | |
vtkfile << "SCALARS Density float 1\n"; | |
vtkfile << "LOOKUP_TABLE default\n"; | |
for (int iy = 0; iy < ny; ++iy) { | |
for (int ix = 0; ix < nx; ++ix) { | |
vtkfile << rho[ix*ny + iy] << '\n'; | |
} | |
} | |
vtkfile << "VECTORS Velocity float\n"; | |
for (int iy = 0; iy < ny; ++iy) { | |
for (int ix = 0; ix < nx; ++ix) { | |
vtkfile << ux[ix*ny+iy] << ' ' << uy[ix*nx+iy] << " 0\n"; | |
} | |
} | |
vtkfile.close(); | |
} | |
void set_equilibrium(sycl::queue &queue, | |
sycl::buffer<double,1> brho, | |
sycl::buffer<double,2> bvel, | |
sycl::buffer<double,2> bf) { | |
queue.submit([&](sycl::handler& h){ | |
sycl::accessor f(bf,h,sycl::write_only); | |
sycl::accessor rho(brho,h,sycl::read_only); | |
sycl::accessor vel(bvel,h,sycl::read_only); | |
h.parallel_for(sycl::range{NX,NY},[=](sycl::id<2> id){ | |
const std::size_t i = id[0]*NY + id[1]; | |
const double ux = vel[0][i]; | |
const double uy = vel[1][i]; | |
const double usqr = ux*ux + uy*uy; | |
double cu[9]; | |
cu[0] = 0; | |
cu[1] = ux; | |
cu[2] = uy; | |
cu[3] = -ux; | |
cu[4] = -uy; | |
cu[5] = ux + uy; | |
cu[6] = -ux + uy; | |
cu[7] = -ux - uy; | |
cu[8] = ux - uy; | |
for (int q = 0; q < 9; ++q) { | |
f[q][i] = rho[i]*w[q]*(1 + 3*cu[q] + 4.5*cu[q]*cu[q] - 1.5*usqr); | |
} | |
}); | |
}); | |
queue.wait(); | |
} | |
void macros(sycl::queue &Q, | |
sycl::buffer<double,2> b_f, | |
sycl::buffer<double,1> b_rho, | |
sycl::buffer<double,2> b_vel) { | |
Q.submit([&](sycl::handler& h) { | |
sycl::accessor f(b_f,h,sycl::read_only); | |
sycl::accessor rho(b_rho,h,sycl::write_only); | |
sycl::accessor vel(b_vel,h,sycl::write_only); | |
h.parallel_for<class macro>(NX*NY,[=](sycl::id<1> i) { | |
const double rho_ = | |
((f[5][i] + f[7][i]) + (f[6][i] + f[8][i])) + | |
((f[1][i] + f[3][i]) + (f[2][i] + f[4][i])) + f[0][i]; | |
rho[i] = rho_; | |
const double invrho = 1.0/rho_; | |
vel[0][i] = invrho * (((f[5][i] - f[7][i]) + (f[8][i] - f[6][i])) + (f[1][i] - f[3][i])); | |
vel[1][i] = invrho * (((f[5][i] - f[7][i]) + (f[6][i] - f[8][i])) + (f[2][i] - f[4][i])); | |
}); | |
}); | |
} | |
void collide(sycl::queue &Q, | |
sycl::buffer<double,2> b_f, | |
sycl::buffer<double,1> b_rho, | |
sycl::buffer<double,2> b_vel, | |
double omega) { | |
Q.submit([&](sycl::handler &h) { | |
//sycl::accessor rho(b_rho,h,sycl::read_only); | |
//sycl::accessor vel(b_vel,h,sycl::read_only); | |
sycl::accessor f(b_f,h,sycl::read_write); | |
const double w[3] = {w0, ws, wd}; | |
const double omegabar = 1.0 - omega; | |
const double one_third = 1.0 / 3.0; | |
const double one_half = 1.0 / 2.0; | |
const double omega_w0 = 3.0 * omega * w[0]; | |
const double omega_ws = 3.0 * omega * w[1]; | |
const double omega_wd = 3.0 * omega * w[2]; | |
h.parallel_for<class collision>(NX*NY,[=](sycl::id<1> i) { | |
// --- constants --- | |
// --- common terms and direction 0 --- | |
const double rho = | |
((f[5][i] + f[7][i]) + (f[6][i] + f[8][i])) + | |
((f[1][i] + f[3][i]) + (f[2][i] + f[4][i])) + f[0][i]; | |
const double invrho = 1.0/rho; | |
const double velx = invrho * (((f[5][i] - f[7][i]) + (f[8][i] - f[6][i])) + (f[1][i] - f[3][i])); | |
const double vely = invrho * (((f[5][i] - f[7][i]) + (f[6][i] - f[8][i])) + (f[2][i] - f[4][i])); | |
const double velxx = velx*velx; | |
const double velyy = vely*vely; | |
const double indp = one_third - one_half * (velxx + velyy); | |
f[0][i] = omegabar * f[0][i] + omega_w0 * rho * indp; | |
// ------ | |
const double vel_trm_13 = indp + (3./2.)*velxx; | |
f[1][i] = omegabar * f[1][i] + omega_ws*rho*(vel_trm_13 + velx); | |
f[3][i] = omegabar * f[3][i] + omega_ws*rho*(vel_trm_13 - velx); | |
// ------ | |
const double vel_trm_24 = indp + (3./2.)*velyy; | |
f[2][i] = omegabar * f[2][i] + omega_ws * rho * (vel_trm_24 + vely); | |
f[4][i] = omegabar * f[4][i] + omega_ws * rho * (vel_trm_24 - vely); | |
// ------ | |
const double velxpy = velx + vely; | |
const double vel_trm_57 = indp + (3./2.) * velxpy * velxpy; | |
f[5][i] = omegabar * f[5][i] + omega_wd * rho * (vel_trm_57 + velxpy); | |
f[7][i] = omegabar * f[7][i] + omega_wd * rho * (vel_trm_57 - velxpy); | |
// ------ | |
const double velxmy = velx - vely; | |
const double vel_trm_68 = indp + (3./2.) * velxmy * velxmy; | |
f[6][i] = omegabar * f[6][i] + omega_wd * rho * (vel_trm_68 - velxmy); | |
f[8][i] = omegabar * f[8][i] + omega_wd * rho * (vel_trm_68 + velxmy); | |
}); | |
}); | |
} | |
void stream(sycl::queue &Q, | |
sycl::buffer<double,2> bfold, | |
sycl::buffer<double,2> bfnew) { | |
Q.submit([&](sycl::handler& h) { | |
sycl::accessor fold(bfold,h,sycl::read_only); | |
sycl::accessor fnew(bfnew,h,sycl::write_only); | |
h.parallel_for(sycl::range{NX,NY},[=](sycl::id<2> id) { | |
auto ix = id[0]; | |
auto iy = id[1]; | |
size_t xp1 = (ix + 1) % NX; | |
size_t xm1 = (ix + NX - 1) % NX; | |
size_t yp1 = (iy + 1) % NY; | |
size_t ym1 = (iy + NY - 1) % NY; | |
size_t curr = ix*NY + iy; | |
fnew[0][curr] = fold[0][curr]; /* central cell, no movement */ | |
#if 1 | |
fnew[1][curr] = fold[1][iy + xp1*NY]; /* east */ | |
fnew[2][curr] = fold[2][yp1 + ix*NY]; /* north */ | |
fnew[3][curr] = fold[3][iy + xm1*NY]; /* west */ | |
fnew[4][curr] = fold[4][ym1 + ix*NY]; /* south */ | |
fnew[5][curr] = fold[5][yp1 + xp1*NY]; /* north-east */ | |
fnew[6][curr] = fold[6][yp1 + xm1*NY]; /* north-west */ | |
fnew[7][curr] = fold[7][ym1 + xm1*NY]; /* south-west */ | |
fnew[8][curr] = fold[8][ym1 + xp1*NY]; /* south-east */ | |
#else | |
fnew[1][iy + xm1*NY] = fold[1][curr]; /* east */ | |
fnew[2][ym1 + ix*NY] = fold[2][curr]; /* north */ | |
fnew[3][iy + xp1*NY] = fold[3][curr]; /* west */ | |
fnew[4][yp1 + ix*NY] = fold[4][curr]; /* south */ | |
fnew[5][ym1 + ym1*NY] = fold[5][curr]; /* north-east */ | |
fnew[6][ym1 + xp1*NY] = fold[6][curr]; /* north-west */ | |
fnew[7][yp1 + xp1*NY] = fold[7][curr]; /* south-west */ | |
fnew[8][yp1 + xm1*NY] = fold[8][curr]; /* south-east */ | |
#endif | |
}); | |
}); | |
} | |
std::string filename(int n, std::string basename) { | |
char buffer[13]; | |
std::snprintf(buffer, sizeof(buffer), "%08d.vtk", n); | |
return basename.append(buffer); | |
} | |
int main(int argc, char const *argv[]) | |
{ | |
sycl::device my_dev{sycl::default_selector{}}; | |
std::cout << "Running tests on " << my_dev.get_info<sycl::info::device::name>() << ".\n"; | |
std::vector<double> fold(NX*NY*9); | |
std::vector<double> fnew(NX*NY*9); | |
std::vector<double> rho(NX*NY); | |
std::vector<double> vel(NX*NY*2); | |
// Initial density | |
std::fill(rho.begin(),rho.end(),1.0); | |
// Initial velocity | |
{ | |
double *vx = &vel[0]; | |
double *vy = &vel[NX*NY]; | |
for (int ix = 0; ix < NX; ++ix) { | |
for (int iy = 0; iy < NY; ++iy) { | |
const double x = (double) ix + 0.5; | |
const double y = (double) iy + 0.5; | |
auto vxy = layer({x,y}); | |
vx[ix*NY + iy] = vxy[0]; | |
vy[ix*NY + iy] = vxy[1]; | |
} | |
} | |
//writeVTK_rectilinear("begin.vtk",NX,NY,rho.data(),vx,vy); | |
} | |
// Catch asynchronous exceptions | |
auto exception_handler = [](sycl::exception_list exceptions) { | |
for (std::exception_ptr const &e : exceptions) { | |
try { | |
std::rethrow_exception(e); | |
} | |
catch (sycl::exception const &e) { | |
std::cout << "Caught asynchronous SYCL " | |
"exception during execution:\n" | |
<< e.what() << std::endl; | |
} | |
} | |
}; | |
auto start = std::chrono::steady_clock::now(); | |
{ /* kernel region */ | |
sycl::queue Q(my_dev, exception_handler); | |
sycl::buffer<double,2> b_fold{fold.data(),{9,NX*NY}}; | |
sycl::buffer<double,2> b_fnew{fnew.data(),{9,NX*NY}}; | |
sycl::buffer<double,1> b_rho{rho.data(),{NX*NY}}; | |
sycl::buffer<double,2> b_vel{vel.data(),{2,NX*NY}}; | |
set_equilibrium(Q,b_rho,b_vel,b_fold); | |
for (int step = 0; step < num_steps; ++step) { | |
if (step % 2 == 0) { | |
//macros(Q,b_fold,b_rho,b_vel); | |
collide(Q,b_fold,b_rho,b_vel,omega); | |
stream(Q,b_fold,b_fnew); | |
} else { | |
//macros(Q,b_fnew,b_rho,b_vel); | |
collide(Q,b_fnew,b_rho,b_vel,omega); | |
stream(Q,b_fnew,b_fold); | |
} | |
if (step % num_steps_io == 0 && num_steps_io > 0) { | |
if (step % 2 == 0) { | |
macros(Q,b_fold,b_rho,b_vel); | |
} else { | |
macros(Q,b_fnew,b_rho,b_vel); | |
} | |
sycl::host_accessor h_rho(b_rho); | |
sycl::host_accessor h_vel(b_vel); | |
double *hvx = h_vel.get_pointer(); | |
double *hvy = hvx + NX*NY; | |
auto file = filename(step,"lbm"); | |
writeVTK_rectilinear(file.c_str(),NX,NY,h_rho.get_pointer(),hvx,hvy); | |
} | |
} | |
} /* end kernel region */ | |
auto end = std::chrono::steady_clock::now(); | |
std::chrono::duration<double> elapsed_seconds = end - start; | |
std::cout << "MLUPS = " << double(NX)*double(NY)*double(num_steps)*1.e-6/elapsed_seconds.count() << '\n'; | |
#if 0 | |
{ | |
double *r = rho.data(); | |
double *vx = &vel[0]; | |
double *vy = &vel[NX*NY]; | |
writeVTK_rectilinear("end.vtk",NX,NY,r,vx,vy); | |
} | |
#endif | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CXX=icpx | |
CXXFLAGS=-Wall -g -O2 -march=native | |
.PHONY: all | |
all: example | |
example: example.cpp | |
$(CXX) -o $@ $(CXXFLAGS) -fsycl -fsycl-targets=spir64_x86_64 $< | |
.PHONY: clean | |
clean: | |
rm -f example |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment