Last active
November 1, 2022 18:40
-
-
Save michelerenzullo/76638be18769f2fbf80b96155e6bfea0 to your computer and use it in GitHub Desktop.
Cache-friendly deinterleave RGB image - Comparison between standard and deinterleave for blocks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// DEINTERLEAVE RGB Image | |
template<typename T, typename U> | |
inline void deinterleave_BGR(const T* const interleaved_BGR, U** const deinterleaved_BGR, const uint32_t nsize) { | |
constexpr uint32_t block = 262144 / (3 * std::max(sizeof(T), sizeof(U))); | |
constexpr float round = std::is_integral_v<U> ? std::is_integral_v<T> ? 0 : 0.5f : 0; | |
#pragma omp parallel for | |
for (int32_t x = 0; x < nsize; x += block) | |
{ | |
U* const B = deinterleaved_BGR[0] + x; | |
U* const G = deinterleaved_BGR[1] + x; | |
U* const R = deinterleaved_BGR[2] + x; | |
const T* const interleaved_ptr = interleaved_BGR + x * 3; | |
const int blockx = std::min(nsize, x + block) - x; | |
for (int xx = 0; xx < blockx; ++xx) | |
{ | |
B[xx] = interleaved_ptr[xx * 3 + 0] + round; | |
G[xx] = interleaved_ptr[xx * 3 + 1] + round; | |
R[xx] = interleaved_ptr[xx * 3 + 2] + round; | |
} | |
} | |
} | |
template<typename T, typename U> | |
void interleave_BGR(const U** const deinterleaved_BGR, T* const interleaved_BGR, const uint32_t nsize) { | |
constexpr uint32_t block = 262144 / (3 * std::max(sizeof(T), sizeof(U))); | |
constexpr float round = std::is_integral_v<T> ? std::is_integral_v<U> ? 0 : 0.5f : 0; | |
#pragma omp parallel for | |
for (int32_t x = 0; x < nsize; x += block) | |
{ | |
const U* const B = deinterleaved_BGR[0] + x; | |
const U* const G = deinterleaved_BGR[1] + x; | |
const U* const R = deinterleaved_BGR[2] + x; | |
T* const interleaved_ptr = interleaved_BGR + x * 3; | |
const int blockx = std::min(nsize, x + block) - x; | |
for (int xx = 0; xx < blockx; ++xx) | |
{ | |
interleaved_ptr[xx * 3 + 0] = B[xx] + round; | |
interleaved_ptr[xx * 3 + 1] = G[xx] + round; | |
interleaved_ptr[xx * 3 + 2] = R[xx] + round; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Looks good.
Please note that I have already performed some implicit channel splits in my code to date. One can read in data in, in one format do processing as required, and then put it back in a different format subsequently. I have tended to keep the data in a single array. So instead of pointers initialised at 0, 0, and 0 for three different arrays, I have gone for pointers initialised at 0, green offset and blue offset for the same array.