Skip to content

Instantly share code, notes, and snippets.

@raindev
Last active November 3, 2024 17:07
Show Gist options
  • Save raindev/232f8b782134fe9215eaa600f8527d27 to your computer and use it in GitHub Desktop.
Save raindev/232f8b782134fe9215eaa600f8527d27 to your computer and use it in GitHub Desktop.
Static memory alignment benchmark on Pi 4 (Arm 64)
#include <stdint.h>
#include <stdio.h>
#include <stdalign.h>
#include <assert.h>
#include <sys/mman.h>
//#include <x86intrin.h>
#define COUNT 4096
struct __attribute__((packed)) {
//struct {
uint32_t a;
uint8_t b;
uint16_t c;
} test_mem[COUNT];
//} alignas(64) test_mem[COUNT];
static inline uint64_t read_cntvct(void) {
uint64_t cnt;
asm volatile("mrs %0, cntvct_el0" : "=r"(cnt));
return cnt;
}
//static inline uint64_t read_cycle_counter(void) {
// uint64_t value;
// asm volatile("mrs %0, pmccntr_el0" : "=r"(value));
// return value;
//}
int main() {
unsigned int cpu;
int err = mlockall(MCL_CURRENT|MCL_FUTURE);
assert(err == 0);
//auto tsc1 = __rdtscp(&cpu);
uint64_t tsc1 = read_cntvct();
//uint64_t tsc1 = read_cntvct();
// do memory stuff here
long sum = 0;
for (int i = 0; i < COUNT; i++) {
test_mem[i].a = i % (2<<16);
test_mem[i].b = i % (2<<16) + 1;
sum += test_mem[i].a - test_mem[i].b;
}
//auto tsc2 = __rdtscp(&cpu);
uint64_t tsc2 = read_cntvct();
//uint64_t tsc2 = read_cntvct();
printf("tsc diff: %ld\n", tsc2 - tsc1);
printf("sum: %ld\n", sizeof(test_mem));
}
$ sudo perf stat -r10 ./main
tsc diff: 589
sum: 28672
tsc diff: 737
sum: 28672
tsc diff: 620
sum: 28672
tsc diff: 575
sum: 28672
tsc diff: 593
sum: 28672
tsc diff: 591
sum: 28672
tsc diff: 565
sum: 28672
tsc diff: 572
sum: 28672
tsc diff: 526
sum: 28672
tsc diff: 587
sum: 28672
Performance counter stats for './main' (10 runs):
3.33 msec task-clock # 0.631 CPUs utilized ( +- 3.00% )
0 context-switches # 0.000 /sec
0 cpu-migrations # 0.000 /sec
37 page-faults # 11.124 K/sec ( +- 0.70% )
4613519 cycles # 1.387 GHz ( +- 0.60% )
4474784 instructions # 0.97 insn per cycle ( +- 0.32% )
<not supported> branches
14479 branch-misses ( +- 1.20% )
0.005268 +- 0.000134 seconds time elapsed ( +- 2.54% )
# perf report --stdio # repeated multiple times
# Overhead Command Shared Object Symbol
# ........ ....... ................. ...............................
#
20.55% main [kernel.kallsyms] [k] el0_da
20.19% main [kernel.kallsyms] [k] _raw_spin_unlock
17.52% main [kernel.kallsyms] [k] arch_local_irq_restore
16.29% main [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore
14.47% main [kernel.kallsyms] [k] locks_remove_posix
10.19% main [kernel.kallsyms] [k] strnlen_user
0.77% main [kernel.kallsyms] [k] vma_expand
0.03% perf-ex [kernel.kallsyms] [k] perf_event_enable_on_exec
# Overhead Command Shared Object Symbol
# ........ ....... ................. .............................
#
39.82% main [kernel.kallsyms] [k] arch_local_irq_restore
25.53% main [kernel.kallsyms] [k] handle_softirqs
23.23% main [kernel.kallsyms] [k] percpu_counter_add_batch
10.62% main [kernel.kallsyms] [k] strnlen_user
0.77% main [kernel.kallsyms] [k] mas_find
0.03% perf-ex [kernel.kallsyms] [k] perf_event_enable_on_exec
# Overhead Command Shared Object Symbol
# ........ ....... ..................... .............................
#
31.19% main ld-linux-aarch64.so.1 [.] 0x0000000000008a20
27.40% main [kernel.kallsyms] [k] __pi_clear_page
25.11% main [kernel.kallsyms] [k] release_pages
15.19% main [kernel.kallsyms] [k] kfree
1.06% main [kernel.kallsyms] [k] mas_wr_walk
0.05% perf-ex [kernel.kallsyms] [k] perf_event_enable_on_exec
# Overhead Command Shared Object Symbol
# ........ ....... ................. .............................
#
22.85% main [kernel.kallsyms] [k] release_pages
22.85% main [kernel.kallsyms] [k] vma_interval_tree_remove
19.47% main [kernel.kallsyms] [k] _raw_spin_unlock
18.07% main libc.so.6 [.] write
16.05% main [kernel.kallsyms] [k] __rcu_read_unlock
0.68% main [kernel.kallsyms] [k] get_random_u64
0.03% perf-ex [kernel.kallsyms] [k] perf_event_enable_on_exec
# Overhead Command Shared Object Symbol
# ........ ....... ..................... .............................
#
25.70% main ld-linux-aarch64.so.1 [.] 0x00000000000111d8
23.23% main [kernel.kallsyms] [k] next_uptodate_folio
21.42% main [kernel.kallsyms] [k] __pi_clear_page
18.60% main [kernel.kallsyms] [k] arch_local_irq_restore
10.27% main [kernel.kallsyms] [k] strnlen_user
0.74% main [kernel.kallsyms] [k] get_random_u16
0.03% perf-ex [kernel.kallsyms] [k] perf_event_enable_on_exec
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment