Skip to content

Instantly share code, notes, and snippets.

@danking
Last active December 15, 2015 21:09
Show Gist options
  • Select an option

  • Save danking/5323403 to your computer and use it in GitHub Desktop.

Select an option

Save danking/5323403 to your computer and use it in GitHub Desktop.
SAD on arbitrary length matricies using the machine insn
#include<xmmintrin.h>
#include<stdio.h>
int inner_sad(char* pattern, char* source, int w, int h) {
int words = w/8;
int sad = 0;
int y;
for (y = 0; y < h; ++y) {
__m64* wpattern = (__m64*) (pattern + w*y);
__m64* wsource = (__m64*) (source + w*y);
int word;
for(word = 0; word < words; ++word) {
__m64 res = _mm_sad_pu8(wpattern[word], wsource[word]);
/* the max diff of a byte is 255 (2^8 - 1),
* there are 8 (2^3) bytes, so the total max diff
* is 8 * 255 or < 2^11, which fits in a 32 bit int */
sad += ((long long int) res) & 0xffff;
}
/* if we have 20 pixels, we'll do 16 the quick way and four this way */
int extra;
for(extra = words * 8; extra < w; ++extra) {
char temp = pattern[w*y + extra] - source[w*y + extra];
if(temp < 0)
temp = -temp;
sad += temp;
}
}
return sad;
}
int main() {
unsigned char a[] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x09,
0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x01};
unsigned char b[] = {0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x06, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x03};
/* 3 + 9 + 2 + 6 + 6 = 14 + 12 = 26 */
int sad = inner_sad(a, b, 9, 2);
printf("%d\n", sad);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment