Created
April 26, 2019 09:33
-
-
Save mmha/f118b41fc7147302638c27d232ff596f to your computer and use it in GitHub Desktop.
Updated Wiki SYCL example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <CL/sycl.hpp> | |
#include <iostream> | |
class init_a; | |
class init_b; | |
class matrix_add; | |
using namespace cl::sycl; | |
// Size of the matrices | |
constexpr size_t N = 2000; | |
constexpr size_t M = 3000; | |
int main() { | |
// Create a queue to work on default device | |
queue q; | |
// Create some 2D buffers with N×M float values for our matrices | |
buffer<double, 2> a{{ N, M }}; | |
buffer<double, 2> b{{ N, M }}; | |
buffer<double, 2> c{{ N, M }}; | |
// Launch a first asynchronous kernel to initialize buffer "a" | |
q.submit([&](handler &cgh) { | |
// The kernel write "a", so get a write accessor on it | |
auto A = a.get_access<access::mode::write>(cgh); | |
// Enqueue parallel kernel on an N×M 2D iteration space | |
cgh.parallel_for<init_a>(range<2>{ N, M }, | |
[=] (auto index) { | |
A[index] = index[0]*2 + index[1]; | |
}); | |
}); | |
// Launch an asynchronous kernel to initialize buffer "b" | |
q.submit([&](handler &cgh) { | |
// The kernel write "b", so get a write accessor on it | |
auto B = b.get_access<access::mode::write>(cgh); | |
// Enqueue a parallel kernel on an N×M 2D iteration space | |
cgh.parallel_for<init_b>(range<2>{ N, M }, | |
[=] (auto index) { | |
B[index] = index[0]*2014 + index[1]*42; | |
}); | |
}); | |
// Launch an asynchronous kernel to compute matrix addition c = a + b | |
q.submit([&](handler &cgh) { | |
// In the kernel "a" and "b" are read, but "c" is written | |
// Since the kernel reads "a" and "b", the runtime will add implicitly | |
// a producer-consumer dependency to the previous kernels producing them. | |
auto A = a.get_access<access::mode::read>(cgh); | |
auto B = b.get_access<access::mode::read>(cgh); | |
auto C = c.get_access<access::mode::write>(cgh); | |
// Enqueue a parallel kernel on an N×M 2D iteration space | |
cgh.parallel_for<matrix_add>(range<2>{ N, M }, | |
[=] (auto index) { | |
C[index] = A[index] + B[index]; | |
}); | |
}); | |
/* Request an access to read "c" from the host-side. The SYCL runtime | |
will wait for "c" to be ready available on the host side before | |
returning the accessor. | |
This means that there is no communication happening in the loop nest below. */ | |
auto C = c.get_access<access::mode::read>(); | |
std::cout << std::endl << "Result:" << std::endl; | |
for (size_t i = 0; i < N; i++) | |
for (size_t j = 0; j < M; j++) | |
// Compare the result to the analytic value | |
if (C[i][j] != i*(2 + 2014) + j*(1 + 42)) { | |
std::cout << "Wrong value " << C[i][j] << " on element " | |
<< i << ' ' << j << std::endl; | |
exit(-1); | |
} | |
std::cout << "Good computation!" << std::endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment