Created
April 5, 2013 23:30
-
-
Save danking/5323483 to your computer and use it in GitHub Desktop.
Run sad on arbitrary size matrices
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include<xmmintrin.h> | |
| #include<stdio.h> | |
| #define WIDTH1 64 | |
| #define HEIGHT1 64 | |
| #define WIDTH2 1280 | |
| #define HEIGHT2 960 | |
| int * runsad(void* img1, int bWidth1, int bHeight1, | |
| void* img2, int bWidth2, int bHeight2); | |
| int inner_sad(char* pattern, char* source, int w, int h); | |
| void print_columns_header() { | |
| /* print the space for the row numbers */ | |
| printf(" "); | |
| int y; | |
| for (y = 0; y < (HEIGHT2 - HEIGHT1); ++y) { | |
| printf("%3d", y); | |
| } | |
| printf("\n"); | |
| } | |
| int main() { | |
| unsigned char * a = malloc(WIDTH1 * HEIGHT1); | |
| unsigned char * b = malloc(WIDTH2 * HEIGHT2); | |
| int * res = runsad((void*)a, WIDTH1, HEIGHT1, | |
| (void*)b, WIDTH2, HEIGHT2); | |
| int x; | |
| for (x = 0; x < (WIDTH2 - WIDTH1); ++x) { | |
| if (x % 200 == 0) | |
| print_columns_header(); | |
| printf("%5d ", x); | |
| int y; | |
| for (y = 0; y < (HEIGHT2 - HEIGHT1); ++y) { | |
| int i = y * WIDTH2 + x; | |
| printf("%3x", res[i]); | |
| } | |
| printf("\n"); | |
| } | |
| return 0; | |
| } | |
| int inner_sad(char* pattern, char* source, int w, int h) { | |
| int words = w/8; | |
| int sad = 0; | |
| int y; | |
| for (y = 0; y < h; ++y) { | |
| __m64* wpattern = (__m64*) (pattern + w*y); | |
| __m64* wsource = (__m64*) (source + w*y); | |
| int word; | |
| for(word = 0; word < words; ++word) { | |
| __m64 res = _mm_sad_pu8(wpattern[word], wsource[word]); | |
| /* the max diff of a byte is 255 (2^8 - 1), | |
| * there are 8 (2^3) bytes, so the total max diff | |
| * is 8 * 255 or < 2^11, which fits in a 32 bit int */ | |
| sad += ((long long int) res) & 0xffff; | |
| } | |
| /* if we have 20 pixels, we'll do 16 the quick way and four this way */ | |
| int extra; | |
| for(extra = words * 8; extra < w; ++extra) { | |
| char temp = pattern[w*y + extra] - source[w*y + extra]; | |
| if(temp < 0) | |
| temp = -temp; | |
| sad += temp; | |
| } | |
| } | |
| return sad; | |
| } | |
| int * runsad(void* img1, int width1, int height1, | |
| void* img2, int width2, int height2) { | |
| int * sad | |
| = malloc(sizeof(long long int) * | |
| ((width2 - width1 + 1)*(height2 - height1 + 1))); | |
| /* the top left y-coord of the pattern within the source */ | |
| int tly; | |
| for(tly = 0; tly < (height2 - height1 + 1); ++tly) { | |
| /* the top left x-coord of the pattern within the source */ | |
| int tlx; | |
| for (tlx = 0; tlx < (width2 - width1 + 1); ++tlx) { | |
| int tl_index = (tly * width2) + tlx; | |
| sad[tl_index] += inner_sad( (char*)img1 | |
| , (char*)(img2 + tl_index) | |
| , height1 | |
| , width1); | |
| } | |
| } | |
| return sad; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment