Skip to content

Instantly share code, notes, and snippets.

@danking
Created April 5, 2013 23:30
Show Gist options
  • Select an option

  • Save danking/5323483 to your computer and use it in GitHub Desktop.

Select an option

Save danking/5323483 to your computer and use it in GitHub Desktop.
Run sad on arbitrary size matrices
#include<xmmintrin.h>
#include<stdio.h>
#define WIDTH1 64
#define HEIGHT1 64
#define WIDTH2 1280
#define HEIGHT2 960
int * runsad(void* img1, int bWidth1, int bHeight1,
void* img2, int bWidth2, int bHeight2);
int inner_sad(char* pattern, char* source, int w, int h);
void print_columns_header() {
/* print the space for the row numbers */
printf(" ");
int y;
for (y = 0; y < (HEIGHT2 - HEIGHT1); ++y) {
printf("%3d", y);
}
printf("\n");
}
int main() {
unsigned char * a = malloc(WIDTH1 * HEIGHT1);
unsigned char * b = malloc(WIDTH2 * HEIGHT2);
int * res = runsad((void*)a, WIDTH1, HEIGHT1,
(void*)b, WIDTH2, HEIGHT2);
int x;
for (x = 0; x < (WIDTH2 - WIDTH1); ++x) {
if (x % 200 == 0)
print_columns_header();
printf("%5d ", x);
int y;
for (y = 0; y < (HEIGHT2 - HEIGHT1); ++y) {
int i = y * WIDTH2 + x;
printf("%3x", res[i]);
}
printf("\n");
}
return 0;
}
int inner_sad(char* pattern, char* source, int w, int h) {
int words = w/8;
int sad = 0;
int y;
for (y = 0; y < h; ++y) {
__m64* wpattern = (__m64*) (pattern + w*y);
__m64* wsource = (__m64*) (source + w*y);
int word;
for(word = 0; word < words; ++word) {
__m64 res = _mm_sad_pu8(wpattern[word], wsource[word]);
/* the max diff of a byte is 255 (2^8 - 1),
* there are 8 (2^3) bytes, so the total max diff
* is 8 * 255 or < 2^11, which fits in a 32 bit int */
sad += ((long long int) res) & 0xffff;
}
/* if we have 20 pixels, we'll do 16 the quick way and four this way */
int extra;
for(extra = words * 8; extra < w; ++extra) {
char temp = pattern[w*y + extra] - source[w*y + extra];
if(temp < 0)
temp = -temp;
sad += temp;
}
}
return sad;
}
int * runsad(void* img1, int width1, int height1,
void* img2, int width2, int height2) {
int * sad
= malloc(sizeof(long long int) *
((width2 - width1 + 1)*(height2 - height1 + 1)));
/* the top left y-coord of the pattern within the source */
int tly;
for(tly = 0; tly < (height2 - height1 + 1); ++tly) {
/* the top left x-coord of the pattern within the source */
int tlx;
for (tlx = 0; tlx < (width2 - width1 + 1); ++tlx) {
int tl_index = (tly * width2) + tlx;
sad[tl_index] += inner_sad( (char*)img1
, (char*)(img2 + tl_index)
, height1
, width1);
}
}
return sad;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment