-
-
Save ochafik/8766af321b3e98717b0d1d8fd1cc839d to your computer and use it in GitHub Desktop.
OpenSCAD parallel.h w/ TBB directly
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #pragma once | |
| // #include <algorithm> | |
| // #include <execution> | |
| #include <oneapi/tbb/parallel_for_each.h> | |
| #ifdef ENABLE_TBB | |
| #include <thrust/transform.h> | |
| #include <thrust/functional.h> | |
| #include <thrust/execution_policy.h> | |
| #endif | |
| template <class InputIterator, class OutputIterator, class Operation> | |
| void parallelizable_transform( | |
| const InputIterator begin1, const InputIterator end1, | |
| OutputIterator out, | |
| const Operation &op, | |
| size_t batch_size = 1) | |
| { | |
| #ifdef ENABLE_TBB | |
| if (!getenv("OPENSCAD_NO_PARALLEL")) { | |
| auto length = std::distance(begin1, end1); | |
| static oneapi::tbb::affinity_partitioner ap; | |
| if (batch_size == 1) { | |
| oneapi::tbb::parallel_for(oneapi::tbb::blocked_range<size_t>(0, length), | |
| [&](const auto & range) { | |
| std::transform(begin1 + range.begin(), begin1 + range.end(), out + range.begin(), op); | |
| }, | |
| ap); | |
| return; | |
| // thrust::transform(begin1, end1, out, op); | |
| } else if (length > batch_size) { | |
| // auto batch_count = (size_t) std::ceil(length / (double) batch_size); | |
| // thrust::counting_iterator<int> first(0); | |
| // std::for_each_n(std::execution::par_unseq, | |
| // thrust::for_each_n( | |
| // first, batch_count, | |
| oneapi::tbb::parallel_for(oneapi::tbb::blocked_range<size_t>(0, length, batch_size), | |
| [&](const auto & range) { | |
| std::transform(begin1 + range.begin(), begin1 + range.end(), out + range.begin(), op); | |
| }, | |
| ap); | |
| // oneapi::tbb::simple_partitioner()); | |
| // [&](auto i) { | |
| // auto begin_offset = i * batch_size; | |
| // if (begin_offset >= length) { | |
| // return; | |
| // } | |
| // auto end_offset = begin_offset + batch_size; | |
| // if (end_offset > length) { | |
| // end_offset = length; | |
| // } | |
| // std::transform(begin1 + begin_offset, begin1 + end_offset, out + begin_offset, op); | |
| // }); | |
| return; | |
| } | |
| } | |
| #endif // ENABLE_TBB | |
| std::transform(begin1, end1, out, op); | |
| } | |
| template <class Container1, class Container2, class OutputIterator, class Operation> | |
| void parallelizable_cross_product_transform( | |
| const Container1 &cont1, | |
| const Container2 &cont2, | |
| OutputIterator out, | |
| const Operation &op) | |
| { | |
| #ifdef ENABLE_TBB | |
| if (!getenv("OPENSCAD_NO_PARALLEL")) { | |
| struct ReferencePair { | |
| decltype(*cont1.begin()) first; | |
| decltype(*cont2.begin()) second; | |
| ReferencePair(decltype(*cont1.begin()) first, decltype(*cont2.begin()) second) : first(first), second(second) {} | |
| }; | |
| std::vector<ReferencePair> pairs; | |
| pairs.reserve(cont1.size() * cont2.size()); | |
| for (const auto &v1 : cont1) { | |
| for (const auto &v2 : cont2) { | |
| pairs.emplace_back(v1, v2); | |
| } | |
| } | |
| thrust::transform(pairs.begin(), pairs.end(), out, [&](const auto &pair) { | |
| return op(pair.first, pair.second); | |
| }); | |
| } | |
| else | |
| #endif | |
| { | |
| for (const auto &v1 : cont1) { | |
| for (const auto &v2 : cont2) { | |
| *(out++) = op(v1, v2); | |
| } | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment