Created
March 13, 2022 19:52
-
-
Save vhxs/14526f782dc80f34158b79dd7cec738e to your computer and use it in GitHub Desktop.
Example program that times non-SIMD vs SIMD on ARM64 architecture
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <arm_neon.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <time.h> | |
#include <stdbool.h> | |
#include <string.h> | |
void rgb_deinterleave_c(uint8_t *r, uint8_t *g, uint8_t *b, uint8_t *rgb, int len_color) { | |
/* | |
* Take the elements of "rgb" and store the individual colors "r", "g", and "b". | |
*/ | |
for (int i=0; i < len_color; i++) { | |
r[i] = rgb[3*i]; | |
g[i] = rgb[3*i+1]; | |
b[i] = rgb[3*i+2]; | |
} | |
} | |
void rgb_deinterleave_neon(uint8_t *r, uint8_t *g, uint8_t *b, uint8_t *rgb, int len_color) { | |
/* | |
* Take the elements of "rgb" and store the individual colors "r", "g", and "b" | |
*/ | |
int num8x16 = len_color / 16; | |
uint8x16x3_t intlv_rgb; | |
for (int i=0; i < num8x16; i++) { | |
intlv_rgb = vld3q_u8(rgb+3*16*i); | |
vst1q_u8(r+16*i, intlv_rgb.val[0]); | |
vst1q_u8(g+16*i, intlv_rgb.val[1]); | |
vst1q_u8(b+16*i, intlv_rgb.val[2]); | |
} | |
} | |
uint8_t** create_arrays(unsigned int num_arrays, unsigned int size_of_array, bool set_rand) { | |
uint8_t **arrays = calloc(num_arrays, sizeof(uint8_t*)); | |
for (int i = 0; i < num_arrays; i++) { | |
arrays[i] = calloc(size_of_array, sizeof(uint8_t)); | |
if (set_rand) { | |
for (int j = 0; j < size_of_array; j++) { | |
arrays[i][j] = rand(); | |
} | |
} | |
} | |
return arrays; | |
} | |
void free_arrays(uint8_t** arrays, unsigned int num_arrays) { | |
for (int i = 0; i < num_arrays; i++) { | |
free(arrays[i]); | |
} | |
free(arrays); | |
} | |
int main(int argc, char *argv[]) { | |
unsigned int num_arrays = atoi(argv[1]); | |
unsigned int size_of_array = atoi(argv[2]); | |
/* without SIMD */ | |
uint8_t **rgb_arrays_c = create_arrays(num_arrays, 3 * size_of_array, true); | |
uint8_t **r_arrays_c = create_arrays(num_arrays, size_of_array, false); | |
uint8_t **g_arrays_c = create_arrays(num_arrays, size_of_array, false); | |
uint8_t **b_arrays_c = create_arrays(num_arrays, size_of_array, false); | |
clock_t before_c = clock(); | |
for (int i = 0; i < num_arrays; i++) { | |
rgb_deinterleave_c(r_arrays_c[i], g_arrays_c[i], b_arrays_c[i], rgb_arrays_c[i], size_of_array); | |
} | |
double difference_c = (double) (clock() - before_c) / CLOCKS_PER_SEC; | |
printf("Without SIMD: %f\n", difference_c); | |
free_arrays(rgb_arrays_c, num_arrays); | |
free_arrays(r_arrays_c, num_arrays); | |
free_arrays(g_arrays_c, num_arrays); | |
free_arrays(b_arrays_c, num_arrays); | |
/* with SIMD */ | |
uint8_t **rgb_arrays_neon = create_arrays(num_arrays, 3 * size_of_array, true); | |
uint8_t **r_arrays_neon = create_arrays(num_arrays, size_of_array, false); | |
uint8_t **g_arrays_neon = create_arrays(num_arrays, size_of_array, false); | |
uint8_t **b_arrays_neon = create_arrays(num_arrays, size_of_array, false); | |
clock_t before_neon = clock(); | |
for (int i = 0; i < num_arrays; i++) { | |
rgb_deinterleave_neon(r_arrays_neon[i], g_arrays_neon[i], b_arrays_neon[i], rgb_arrays_neon[i], size_of_array); | |
} | |
double difference_neon = (double) (clock() - before_neon) / CLOCKS_PER_SEC; | |
printf("With SIMD: %f\n", difference_neon); | |
free_arrays(rgb_arrays_neon, num_arrays); | |
free_arrays(r_arrays_neon, num_arrays); | |
free_arrays(g_arrays_neon, num_arrays); | |
free_arrays(b_arrays_neon, num_arrays); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment