Last active
September 28, 2022 00:34
-
-
Save michelerenzullo/e6c012b41fc0ab278a523815f14c842e to your computer and use it in GitHub Desktop.
Fast Box Blur + reflected padding without memory waste repo: https://github.com/michelerenzullo/FastBoxBlur
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template<typename T, int C> | |
void horizontal_blur_kernel_reflect(const T* in, T* out, const int w, const int h, const int ksize) | |
{ | |
// change the local variable types depending on the template type for faster calculations | |
using calc_type = std::conditional_t<std::is_integral_v<T>, uint32_t, float>; | |
int r = 0.5f * (ksize - 1); | |
r = std::min(r, w - 1); | |
const float iarr = 1.f / (r + r + 1); | |
#pragma omp parallel for | |
for (int i = 0; i < h; i++) | |
{ | |
const int begin = i * w, end = begin + w, max_end = end - 1; | |
int li = begin + r, ri = begin + r + 1; // left index(mirrored in the beginning), right index(mirrored at the end) | |
calc_type acc[C] = {}; | |
// for ksize = 7, and r = 3, and array length = 11 | |
// array is [ a b c d e f g h i j k ] | |
// emulated array is [d c b _ a b c d e f g h i j k _ j i h] | |
// emulating the left padd: the initial accumulation is (d + c + b + a + b + c + d) --> 2 * (a + b + c + d) - a | |
for (int ch = 0; ch < C; ++ch) | |
{ | |
for (int j = 0; j <= r; j++) | |
acc[ch] += 2 * in[(begin + j) * C + ch]; | |
acc[ch] -= in[begin * C + ch]; // remove extra pivot value | |
// calculated first value | |
out[begin * C + ch] = acc[ch] * iarr + (std::is_integral_v<T> ? 0.5f : 0); | |
} | |
//////////////////////////////////////////////////////////////////////////////////////////////////// | |
for (int j = begin + 1; j < begin + r + 1; ++j) | |
{ | |
for (int ch = 0; ch < C; ++ch) | |
{ | |
//ri < end ? ri : max_end - ri % max_end <-- reading in a reverse way | |
//when reached the end of the row buffer and starting to read the "emulated" right pad | |
acc[ch] += in[(ri < end ? ri : max_end - ri % max_end) * C + ch] - in[li * C + ch]; | |
out[j * C + ch] = acc[ch] * iarr + (std::is_integral_v<T> ? 0.5f : 0); | |
} | |
--li, ++ri; | |
} | |
//this loop won't be executed when r > w / 2 - 2 therefore the end of the image buffer will never be reached | |
for (int j = begin + r + 1; j < end - r - 1; ++j) | |
{ | |
for (int ch = 0; ch < C; ++ch) | |
{ | |
acc[ch] += in[ri * C + ch] - in[li * C + ch]; | |
out[j * C + ch] = acc[ch] * iarr + (std::is_integral_v<T> ? 0.5f : 0); | |
} | |
++li, ++ri; | |
} | |
for (int j = end - r - 1; j < end; ++j) | |
{ | |
for (int ch = 0; ch < C; ++ch) | |
{ | |
acc[ch] += in[(ri < end ? ri : max_end - ri % max_end) * C + ch] - in[li * C + ch]; | |
out[j * C + ch] = acc[ch] * iarr + (std::is_integral_v<T> ? 0.5f : 0); | |
} | |
++li, --ri; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment