Created
September 28, 2021 13:49
-
-
Save malfet/47e9a88b3e97f6ddabd8f85544c063d5 to your computer and use it in GitHub Desktop.
clang-13 crash using omp critical pragma
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// c++ -std=c++14 -O2 -fPIC -Xpreprocessor -fopenmp mup-smaller.cpp | |
// | |
#include <omp.h> | |
#include <atomic> | |
#include <exception> | |
#include <algorithm> | |
#define C10_UNLIKELY(expr) (__builtin_expect(static_cast<bool>(expr), 0)) | |
namespace at { | |
inline int64_t divup(int64_t x, int64_t y) { | |
return (x + y - 1) / y; | |
} | |
void init_num_threads(); | |
int get_thread_num(); | |
namespace internal { | |
void set_thread_num(int); | |
class ThreadIdGuard { | |
public: | |
ThreadIdGuard(int new_id): | |
old_id_(at::get_thread_num()) { | |
set_thread_num(new_id); | |
} | |
~ThreadIdGuard() { | |
set_thread_num(old_id_); | |
} | |
private: | |
int old_id_; | |
}; | |
inline void lazy_init_num_threads() { | |
thread_local bool init = false; | |
if (C10_UNLIKELY(!init)) { | |
at::init_num_threads(); | |
init = true; | |
} | |
} | |
template <typename F> | |
inline void invoke_parallel(int64_t begin, int64_t end, int64_t grain_size, const F& f) { | |
std::atomic_flag err_flag = ATOMIC_FLAG_INIT; | |
std::exception_ptr eptr; | |
#pragma omp parallel | |
{ | |
// choose number of tasks based on grain size and number of threads | |
// can't use num_threads clause due to bugs in GOMP's thread pool (See #32008) | |
int64_t num_threads = omp_get_num_threads(); | |
if (grain_size > 0) { | |
num_threads = std::min(num_threads, divup((end - begin), grain_size)); | |
} | |
int64_t tid = omp_get_thread_num(); | |
int64_t chunk_size = divup((end - begin), num_threads); | |
int64_t begin_tid = begin + tid * chunk_size; | |
if (begin_tid < end) { | |
try { | |
internal::ThreadIdGuard tid_guard(tid); | |
f(begin_tid, std::min(end, chunk_size + begin_tid)); | |
} catch (...) { | |
if (!err_flag.test_and_set()) { | |
eptr = std::current_exception(); | |
} | |
} | |
} | |
} | |
if (eptr) { | |
std::rethrow_exception(eptr); | |
} | |
} | |
} // namespace internal | |
template <class F> | |
inline void parallel_for( | |
const int64_t begin, | |
const int64_t end, | |
const int64_t grain_size, | |
const F& f) { | |
if (begin >= end) { | |
return; | |
} | |
at::internal::lazy_init_num_threads(); | |
const auto numiter = end - begin; | |
const bool use_parallel = ( | |
numiter > grain_size && numiter > 1 && | |
omp_get_max_threads() > 1 && !omp_in_parallel()); | |
if (!use_parallel) { | |
internal::ThreadIdGuard tid_guard(0); | |
f(begin, end); | |
return; | |
} | |
internal::invoke_parallel(begin, end, grain_size, f); | |
} | |
namespace native { | |
void cpu_max_unpool( | |
float* input_data, | |
int64_t* indices_data , | |
float* output_data, | |
int64_t numel, | |
int64_t channels, | |
int64_t output_depth, | |
int64_t output_height, | |
int64_t output_width) { | |
int64_t output_image_size = numel / channels; | |
bool has_error = false; | |
int64_t error_index = 0; | |
// parallel on dim N, C, D, H, W: [channels, input_image_size] | |
at::parallel_for(0, numel, 0, [&](int64_t begin, int64_t end) { | |
for (int64_t i = begin; i < end; i++) { | |
float* output_ptr = output_data; | |
int64_t maxp = indices_data[i]; | |
if (maxp < 0 || maxp >= output_image_size) { | |
#pragma omp critical | |
{ | |
has_error = true; | |
error_index = maxp; | |
} | |
} else { | |
output_ptr[maxp] = input_data[i]; | |
} | |
} | |
}); | |
} | |
}} // at::native |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment