Created
November 3, 2024 17:14
-
-
Save raindev/712f07b03dde2b2f9a17190eb9215f28 to your computer and use it in GitHub Desktop.
Static memory alignment benchmark on M3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# i +=17 | |
$ hyperfine -w2 ./main # aligned(1) | |
Benchmark 1: ./main | |
Time (mean ± σ): 52.4 ms ± 3.3 ms [User: 16.9 ms, System: 31.7 ms] | |
Range (min … max): 48.5 ms … 65.0 ms 47 runs | |
$ hyperfine -w2 ./main # aligned(4), ~+10% | |
Benchmark 1: ./main | |
Time (mean ± σ): 57.6 ms ± 3.0 ms [User: 18.8 ms, System: 34.5 ms] | |
Range (min … max): 53.8 ms … 66.4 ms 43 runs | |
# i += 1 | |
$ hyperfine -w2 ./main # aligned(1) | |
Benchmark 1: ./main | |
Time (mean ± σ): 63.5 ms ± 1.4 ms [User: 32.9 ms, System: 27.3 ms] | |
Range (min … max): 61.6 ms … 70.1 ms 41 runs | |
$ hyperfine -w2 ./main # aligned(4), ~+11% | |
Benchmark 1: ./main | |
Time (mean ± σ): 70.7 ms ± 1.9 ms [User: 34.5 ms, System: 32.3 ms] | |
Range (min … max): 68.3 ms … 76.1 ms 40 runs |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <mach/mach_time.h> | |
#include <stdio.h> | |
#include <stdalign.h> | |
#define COUNT 100000000 | |
// 7 bytes, a on the cache line boundary | |
struct __attribute__((packed, aligned(1))) { | |
uint32_t a; | |
uint8_t b; | |
uint16_t c; | |
} test_mem[COUNT]; | |
static inline uint64_t read_cntvct(void) { | |
uint64_t cnt; | |
asm volatile("mrs %0, cntvct_el0" : "=r"(cnt)); | |
return cnt; | |
} | |
int main() { | |
uint64_t tsc1 = mach_absolute_time(); | |
// do memory stuff here | |
long sum = 0; | |
for (int i = 0; i < COUNT; i += 17) { | |
test_mem[i].a = i % (2<<16); | |
test_mem[i].b = i % (2<<16) + 1; | |
sum += test_mem[i].a - test_mem[i].b; | |
} | |
uint64_t tsc2 = mach_absolute_time(); | |
printf("tsc diff: %lld\n", tsc2 - tsc1); | |
printf("sum: %lu\n", sum); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment