Last active
September 13, 2021 14:55
-
-
Save itzmeanjan/6ee0ec55723f8df04b7260f65bc71ff6 to your computer and use it in GitHub Desktop.
Sequential + Parallel Root Finding using Bisection Method, powered by SYCL DPC++
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <CL/sycl.hpp> | |
| #include <array> | |
| #include <chrono> | |
| #include <cmath> | |
| #include <iostream> | |
| using namespace sycl; | |
| constexpr uint N = 32; | |
| float C = powf(10.0, -5.0); | |
| float f(float x) { return x * x * x - 4.0 * x - 5.0; } | |
| float bisect(queue &Q, buffer<float, 1> &args, buffer<float, 1> &evals) { | |
| while (true) { | |
| Q.submit([&](handler &h) { | |
| accessor dacc_args{args, h, read_write}; | |
| accessor dacc_evals{evals, h, read_write}; | |
| h.parallel_for(nd_range<1>{{N}, {N}}, [=](nd_item<1> item) { | |
| int idx = item.get_global_id()[0]; | |
| float idx_f = static_cast<float>(idx); | |
| if (idx == 0) { | |
| dacc_evals[idx] = f(dacc_args[0]); | |
| } else if (idx == N - 1) { | |
| dacc_evals[idx] = f(dacc_args[1]); | |
| } else { | |
| dacc_evals[idx] = f(dacc_args[0] + idx_f * dacc_args[2]); | |
| } | |
| item.barrier(); | |
| if (idx != 0 && dacc_evals[idx - 1] * dacc_evals[idx] < 0.0) { | |
| float a = dacc_args[0] + (idx_f - 1.0) * dacc_args[2]; | |
| float b = a + dacc_args[2]; | |
| dacc_args[0] = a; | |
| dacc_args[1] = b; | |
| dacc_args[2] = (b - a) / float(N - 1); | |
| } | |
| }); | |
| }); | |
| host_accessor hacc_args{args, read_only}; | |
| float a = hacc_args[0]; | |
| float b = hacc_args[1]; | |
| if (std::abs(b - a) < C) { | |
| break; | |
| } | |
| } | |
| host_accessor hacc_args{args, read_only}; | |
| return hacc_args[0]; | |
| } | |
| int main(int argc, char **argv) { | |
| device D{default_selector{}}; | |
| queue Q{D}; | |
| std::cout << "running on: " << D.get_info<info::device::name>() << std::endl; | |
| float a = -10.0; | |
| float b = 10.0; | |
| float s = (b - a) / static_cast<float>(N - 1); | |
| std::array<float, 3> arg_arr = {a, b, s}; | |
| buffer<float, 1> args{arg_arr}; | |
| buffer<float, 1> evals{range<1>{N}}; | |
| Q.submit([&](handler &h) { | |
| accessor dacc{args, h, write_only, noinit}; | |
| h.single_task([=]() { | |
| dacc[0] = a; | |
| dacc[1] = b; | |
| dacc[2] = s; | |
| }); | |
| }); | |
| auto start = std::chrono::steady_clock::now(); | |
| float root = bisect(Q, args, evals); | |
| auto end = std::chrono::steady_clock::now(); | |
| std::cout << "root: " << root << ", in " | |
| << std::chrono::duration_cast<std::chrono::milliseconds>(end - | |
| start) | |
| .count() | |
| << "ms" << std::endl; | |
| return 0; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| use std::time::Instant; | |
| // equation whose root being searched | |
| fn f(x: f32) -> f32 { | |
| x.powi(3) - 4f32 * x - 5f32 | |
| } | |
| // sequential bisection method implementation | |
| fn bisect(f: &dyn Fn(f32) -> f32, mut a: f32, mut b: f32, c: f32) -> f32 { | |
| while (b - a).abs() >= c { | |
| let m = (a + b) / 2f32; | |
| if f(a) * f(m) < 0f32 { | |
| b = m; | |
| } else { | |
| a = m; | |
| } | |
| } | |
| a | |
| } | |
| fn main() { | |
| // approximate with max error 10 ** -5 | |
| let c = 10f32.powi(-5); | |
| let start = Instant::now(); | |
| let a_ = bisect(&f1, -3f32, 3f32, c); | |
| println!("{}, in {:?}", a_, start.elapsed()); | |
| } | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python3 | |
| from matplotlib import pyplot as plt | |
| import numpy as np | |
| f = lambda x: x ** 3 - 4 * x - 5 | |
| x = np.arange(-3, 3, 0.01) | |
| y = f(x) | |
| fig = plt.figure() | |
| ax = fig.add_subplot(1, 1, 1) | |
| ax.spines['left'].set_position('zero') | |
| ax.spines['bottom'].set_position('zero') | |
| ax.spines['top'].set_color('none') | |
| ax.spines['right'].set_color('none') | |
| ax.xaxis.set_ticks_position('bottom') | |
| ax.yaxis.set_ticks_position('left') | |
| plt.plot(x, y) | |
| plt.legend(['f(x) = x^3 - 4x - 5'], loc='upper left') | |
| plt.savefig('plot.png') |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Introduction
If you've not yet read https://itzmeanjan.in/pages/parallel-root-finding-using-sycl-dpcpp.html, you should probably consider doing so, given this gist accompanies that document.
Here I keep both sequential & parallel implementation of bisection method, used for finding root of non-linear equation of single variable. SIMD version of it is written in DPC++.
Usage
bisection.parallel.cppwith./a.out running on: Intel Xeon Processor (Skylake, IBRS) root: 2.45668, in 26msbisection.sequential.rs, you should copy/ paste it in cargo project'smain.rs.f(x) = x ** 3 - 4 * x - 5