Skip to content

Instantly share code, notes, and snippets.

@lydonchandra
Created October 6, 2020 11:37
Show Gist options
  • Save lydonchandra/220eb2aeeae8a9542290a249349ba9a8 to your computer and use it in GitHub Desktop.
Save lydonchandra/220eb2aeeae8a9542290a249349ba9a8 to your computer and use it in GitHub Desktop.
BilinearInterpolationSSE3.cpp
// https://fastcpp.blogspot.com/2011/06/bilinear-pixel-interpolation-using-sse.html
inline Pixel GetPixelSSE3(const Image<Pixel>* img, float x, float y)
{
const int stride = img->width;
const Pixel* p0 = img->data + (int)x + (int)y * stride; // pointer to first pixel
// Load the data (2 pixels in one load)
__m128i p12 = _mm_loadl_epi64((const __m128i*)&p0[0 * stride]);
__m128i p34 = _mm_loadl_epi64((const __m128i*)&p0[1 * stride]);
__m128 weight = CalcWeights(x, y);
// convert RGBA RGBA RGBA RGAB to RRRR GGGG BBBB AAAA (AoS to SoA)
__m128i p1234 = _mm_unpacklo_epi8(p12, p34);
__m128i p34xx = _mm_unpackhi_epi64(p1234, _mm_setzero_si128());
__m128i p1234_8bit = _mm_unpacklo_epi8(p1234, p34xx);
// extend to 16bit
__m128i pRG = _mm_unpacklo_epi8(p1234_8bit, _mm_setzero_si128());
__m128i pBA = _mm_unpackhi_epi8(p1234_8bit, _mm_setzero_si128());
// convert weights to integer
weight = _mm_mul_ps(weight, CONST_256);
__m128i weighti = _mm_cvtps_epi32(weight); // w4 w3 w2 w1
weighti = _mm_packs_epi32(weighti, weighti); // 32->2x16bit
//outRG = [w1*R1 + w2*R2 | w3*R3 + w4*R4 | w1*G1 + w2*G2 | w3*G3 + w4*G4]
__m128i outRG = _mm_madd_epi16(pRG, weighti);
//outBA = [w1*B1 + w2*B2 | w3*B3 + w4*B4 | w1*A1 + w2*A2 | w3*A3 + w4*A4]
__m128i outBA = _mm_madd_epi16(pBA, weighti);
// horizontal add that will produce the output values (in 32bit)
__m128i out = _mm_hadd_epi32(outRG, outBA);
out = _mm_srli_epi32(out, 8); // divide by 256
// convert 32bit->8bit
out = _mm_packus_epi32(out, _mm_setzero_si128());
out = _mm_packus_epi16(out, _mm_setzero_si128());
// return
return _mm_cvtsi128_si32(out);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment