mmha · April 26, 2019 09:33
diff --git a/wiki_example.cpp b/wiki_example.cpp
 #include <CL/sycl.hpp>
 #include <iostream>

 class init_a;
 class init_b;
 class matrix_add;

 using namespace cl::sycl;

 // Size of the matrices
 constexpr size_t N = 2000;
 constexpr size_t M = 3000;

 int main() {
  // Create a queue to work on default device
  queue q;
  // Create some 2D buffers with N×M float values for our matrices
  buffer<double, 2> a{{ N, M }};
  buffer<double, 2> b{{ N, M }};
  buffer<double, 2> c{{ N, M }};
  // Launch a first asynchronous kernel to initialize buffer "a"
  q.submit([&](handler &cgh) {
      // The kernel write "a", so get a write accessor on it
      auto A = a.get_access<access::mode::write>(cgh);

      // Enqueue parallel kernel on an N×M 2D iteration space
      cgh.parallel_for<init_a>(range<2>{ N, M },
                         [=] (auto index) {
                           A[index] = index[0]*2 + index[1];
                         });
    });
  // Launch an asynchronous kernel to initialize buffer "b"
  q.submit([&](handler &cgh) {
      // The kernel write "b", so get a write accessor on it
      auto B = b.get_access<access::mode::write>(cgh);
      // Enqueue a parallel kernel on an N×M 2D iteration space
      cgh.parallel_for<init_b>(range<2>{ N, M },
                         [=] (auto index) {
                           B[index] = index[0]*2014 + index[1]*42;
                         });
    });
  // Launch an asynchronous kernel to compute matrix addition c = a + b
  q.submit([&](handler &cgh) {
      // In the kernel "a" and "b" are read, but "c" is written
      // Since the kernel reads "a" and "b", the runtime will add implicitly
      // a producer-consumer dependency to the previous kernels producing them.
      auto A = a.get_access<access::mode::read>(cgh);
      auto B = b.get_access<access::mode::read>(cgh);
      auto C = c.get_access<access::mode::write>(cgh);

      // Enqueue a parallel kernel on an N×M 2D iteration space
      cgh.parallel_for<matrix_add>(range<2>{ N, M },
                                     [=] (auto index) {
                                       C[index] = A[index] + B[index];
                                     });
    });
  /* Request an access to read "c" from the host-side. The SYCL runtime
     will wait for "c" to be ready available on the host side before
     returning the accessor.
     This means that there is no communication happening in the loop nest below.  */
  auto C = c.get_access<access::mode::read>();
  std::cout << std::endl << "Result:" << std::endl;
  for (size_t i = 0; i < N; i++)
    for (size_t j = 0; j < M; j++)
      // Compare the result to the analytic value
      if (C[i][j] != i*(2 + 2014) + j*(1 + 42)) {
        std::cout << "Wrong value " << C[i][j] << " on element "
                  << i << ' ' << j << std::endl;
        exit(-1);
      }

  std::cout << "Good computation!" << std::endl;
  return 0;
 }
	#include <CL/sycl.hpp>
	#include <iostream>

	class init_a;
	class init_b;
	class matrix_add;

	using namespace cl::sycl;

	// Size of the matrices
	constexpr size_t N = 2000;
	constexpr size_t M = 3000;

	int main() {
	// Create a queue to work on default device
	queue q;
	// Create some 2D buffers with N×M float values for our matrices
	buffer<double, 2> a{{ N, M }};
	buffer<double, 2> b{{ N, M }};
	buffer<double, 2> c{{ N, M }};
	// Launch a first asynchronous kernel to initialize buffer "a"
	q.submit([&](handler &cgh) {
	// The kernel write "a", so get a write accessor on it
	auto A = a.get_access<access::mode::write>(cgh);

	// Enqueue parallel kernel on an N×M 2D iteration space
	cgh.parallel_for<init_a>(range<2>{ N, M },
	[=] (auto index) {
	A[index] = index[0]*2 + index[1];
	});
	});
	// Launch an asynchronous kernel to initialize buffer "b"
	q.submit([&](handler &cgh) {
	// The kernel write "b", so get a write accessor on it
	auto B = b.get_access<access::mode::write>(cgh);
	// Enqueue a parallel kernel on an N×M 2D iteration space
	cgh.parallel_for<init_b>(range<2>{ N, M },
	[=] (auto index) {
	B[index] = index[0]2014 + index[1]42;
	});
	});
	// Launch an asynchronous kernel to compute matrix addition c = a + b
	q.submit([&](handler &cgh) {
	// In the kernel "a" and "b" are read, but "c" is written
	// Since the kernel reads "a" and "b", the runtime will add implicitly
	// a producer-consumer dependency to the previous kernels producing them.
	auto A = a.get_access<access::mode::read>(cgh);
	auto B = b.get_access<access::mode::read>(cgh);
	auto C = c.get_access<access::mode::write>(cgh);

	// Enqueue a parallel kernel on an N×M 2D iteration space
	cgh.parallel_for<matrix_add>(range<2>{ N, M },
	[=] (auto index) {
	C[index] = A[index] + B[index];
	});
	});
	/* Request an access to read "c" from the host-side. The SYCL runtime
	will wait for "c" to be ready available on the host side before
	returning the accessor.
	This means that there is no communication happening in the loop nest below. */
	auto C = c.get_access<access::mode::read>();
	std::cout << std::endl << "Result:" << std::endl;
	for (size_t i = 0; i < N; i++)
	for (size_t j = 0; j < M; j++)
	// Compare the result to the analytic value
	if (C[i][j] != i(2 + 2014) + j(1 + 42)) {
	std::cout << "Wrong value " << C[i][j] << " on element "
	<< i << ' ' << j << std::endl;
	exit(-1);
	}

	std::cout << "Good computation!" << std::endl;
	return 0;
	}