Skip to content

Instantly share code, notes, and snippets.

@zommiommy
Last active May 19, 2021 19:38
Show Gist options
  • Save zommiommy/614364c3bf0d4d5d236b5815b46bbd13 to your computer and use it in GitHub Desktop.
Save zommiommy/614364c3bf0d4d5d236b5815b46bbd13 to your computer and use it in GitHub Desktop.
Experiments on the consistency of rdtsc and rdtscp
build:
nasm -f elf64 -o measure.o ./measure.asm
ld measure.o -o measure.out -lc --dynamic-linker /lib/ld-2.33.so
; build with:
; nasm -f elf64 -o measure.o ./measure.asm
; ld measure.o -o measure.out -lc --dynamic-linker /lib/ld-2.33.so
; ./measure.out
; how many measurements to do for each experiment
%define n_iters 10000000
; where the data for the measruements are stored
section .bss
buffer: resq n_iters
; Pretty print stuff
section .data
fmt_d: db "%d", 10, 0
fmt_s: db "%s", 10, 0
rdtsc_msg: db "[rdtsc]", 0
rdtsc_cpuid_msg: db "[rdtsc_cpuid]", 0
cpuid_rdtsc_msg: db "[cpuid_rdtsc]", 0
cpuid_rdtsc_cpuid_msg: db "[cpuid_rdtsc_cpuid]", 0
rdtscp_msg: db "[rdtscp]", 0
rdtscp_cpuid_msg: db "[rdtscp_cpuid]", 0
cpuid_rdtscp_msg: db "[cpuid_rdtscp]", 0
cpuid_rdtscp_cpuid_msg: db "[cpuid_rdtscp_cpuid]", 0
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Entrypoint
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
section .text
extern printf
global _start
_start:
; measure
call measure_rdtsc
call measure_cpuid_rdtsc
call measure_rdtsc_cpuid
call measure_cpuid_rdtsc_cpuid
call measure_rdtscp
call measure_cpuid_rdtscp
call measure_rdtscp_cpuid
call measure_cpuid_rdtscp_cpuid
; exit(0)
xor rdi, rdi
mov rax, 0x3c
syscall
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Print everything, it's equivalent to
;; for (int i = 0; i < n_iters; i++) {
;; printf("%d", buffer[i]);
;; }
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
print_all:
; r12 = number of iters
mov r12, n_iters
inc r12
while_print:
; while r12 != 0
dec r12
test r12, r12
jz end_print
; printf("%d", buffer[r12])
mov rsi, [buffer + 8 * r12]
mov rdi, fmt_d
mov rax, 0
call printf
jmp while_print
end_print:
ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Benchmark
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
measure_rdtsc:
mov rsi, rdtsc_msg
mov rdi, fmt_s
mov rax, 0
call printf
; r12 = number of iters
mov r12, n_iters
inc r12
while_rdtsc:
; while r12 != 0
dec r12
test r12, r12
jz end_rdtsc
; START
rdtsc
shl rdx, 32
or rax, rdx
; save the start to a temp register (this should be free)
mov r11, rax
; function to benchmark
; END
rdtsc
shl rdx, 32
or rax, rdx
sub rax, r11
mov r10, rax
;; buffer[r12] = END - START
mov [buffer + 8 * r12], r10
jmp while_rdtsc
end_rdtsc:
call print_all
ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
measure_cpuid_rdtsc:
mov rsi, cpuid_rdtsc_msg
mov rdi, fmt_s
mov rax, 0
call printf
; r12 = number of iters
mov r12, n_iters
inc r12
while_cpuid_rdtsc:
; while r12 != 0
dec r12
test r12, r12
jz end_cpuid_rdtsc
; START
cpuid
rdtsc
shl rdx, 32
or rax, rdx
; save the start to a temp register (this should be free)
mov r11, rax
; function to benchmark
; END
rdtsc
shl rdx, 32
or rax, rdx
sub rax, r11
mov r10, rax
;; buffer[r12] = END - START
mov [buffer + 8 * r12], r10
jmp while_cpuid_rdtsc
end_cpuid_rdtsc:
call print_all
ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
measure_rdtsc_cpuid:
mov rsi, rdtsc_cpuid_msg
mov rdi, fmt_s
mov rax, 0
call printf
; r12 = number of iters
mov r12, n_iters
inc r12
while_rdtsc_cpuid:
; while r12 != 0
dec r12
test r12, r12
jz end_rdtsc_cpuid
; START
rdtsc
shl rdx, 32
or rax, rdx
; save the start to a temp register (this should be free)
mov r11, rax
; function to benchmark
; END
rdtsc
shl rdx, 32
or rax, rdx
sub rax, r11
mov r10, rax
cpuid
;; buffer[r12] = END - START
mov [buffer + 8 * r12], r10
jmp while_rdtsc_cpuid
end_rdtsc_cpuid:
call print_all
ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
measure_cpuid_rdtsc_cpuid:
mov rsi, cpuid_rdtsc_cpuid_msg
mov rdi, fmt_s
mov rax, 0
call printf
; r12 = number of iters
mov r12, n_iters
inc r12
while_cpuid_rdtsc_cpuid:
; while r12 != 0
dec r12
test r12, r12
jz end_cpuid_rdtsc_cpuid
; START
cpuid
rdtsc
shl rdx, 32
or rax, rdx
; save the start to a temp register (this should be free)
mov r11, rax
; function to benchmark
; END
rdtsc
shl rdx, 32
or rax, rdx
sub rax, r11
mov r10, rax
cpuid
;; buffer[r12] = END - START
mov [buffer + 8 * r12], r10
jmp while_cpuid_rdtsc_cpuid
end_cpuid_rdtsc_cpuid:
call print_all
ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
measure_rdtscp:
mov rsi, rdtscp_msg
mov rdi, fmt_s
mov rax, 0
call printf
; r12 = number of iters
mov r12, n_iters
inc r12
while_rdtscp:
; while r12 != 0
dec r12
test r12, r12
jz end_rdtscp
; START
rdtscp
shl rdx, 32
or rax, rdx
; save the start to a temp register (this should be free)
mov r11, rax
; function to benchmark
; END
rdtscp
shl rdx, 32
or rax, rdx
sub rax, r11
mov r10, rax
;; buffer[r12] = END - START
mov [buffer + 8 * r12], r10
jmp while_rdtscp
end_rdtscp:
call print_all
ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
measure_rdtscp_cpuid:
mov rsi, rdtscp_cpuid_msg
mov rdi, fmt_s
mov rax, 0
call printf
; r12 = number of iters
mov r12, n_iters
inc r12
while_rdtscp_cpuid:
; while r12 != 0
dec r12
test r12, r12
jz end_rdtscp_cpuid
; START
rdtscp
shl rdx, 32
or rax, rdx
; save the start to a temp register (this should be free)
mov r11, rax
; function to benchmark
; END
rdtscp
shl rdx, 32
or rax, rdx
sub rax, r11
mov r10, rax
cpuid
;; buffer[r12] = END - START
mov [buffer + 8 * r12], r10
jmp while_rdtscp_cpuid
end_rdtscp_cpuid:
call print_all
ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
measure_cpuid_rdtscp:
mov rsi, cpuid_rdtscp_msg
mov rdi, fmt_s
mov rax, 0
call printf
; r12 = number of iters
mov r12, n_iters
inc r12
while_cpuid_rdtscp:
; while r12 != 0
dec r12
test r12, r12
jz end_cpuid_rdtscp
; START
cpuid
rdtscp
shl rdx, 32
or rax, rdx
; save the start to a temp register (this should be free)
mov r11, rax
; function to benchmark
; END
rdtscp
shl rdx, 32
or rax, rdx
sub rax, r11
mov r10, rax
;; buffer[r12] = END - START
mov [buffer + 8 * r12], r10
jmp while_cpuid_rdtscp
end_cpuid_rdtscp:
call print_all
ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
measure_cpuid_rdtscp_cpuid:
mov rsi, cpuid_rdtscp_cpuid_msg
mov rdi, fmt_s
mov rax, 0
call printf
; r12 = number of iters
mov r12, n_iters
inc r12
while_cpuid_rdtscp_cpuid:
; while r12 != 0
dec r12
test r12, r12
jz end_cpuid_rdtscp_cpuid
; START
cpuid
rdtscp
shl rdx, 32
or rax, rdx
; save the start to a temp register (this should be free)
mov r11, rax
; function to benchmark
; END
rdtscp
shl rdx, 32
or rax, rdx
sub rax, r11
mov r10, rax
cpuid
;; buffer[r12] = END - START
mov [buffer + 8 * r12], r10
jmp while_cpuid_rdtscp_cpuid
end_cpuid_rdtscp_cpuid:
call print_all
ret
from collections import Counter
with open("measurements.log", "r") as f:
data = f.read()
result = {}
key = ""
for line in data.split("\n"):
if line.startswith("["):
key = line[1:-1]
result[key] = []
continue
if line == "":
continue
result[key].append(int(line))
for key, vals in result.items():
print("<tr>\n<td markdown='span'>{}</td>\n<td markdown='span'>{}</td>\n</tr>".format(key, {
k: v
for k,v in Counter(result[key]).items()
if v > 100
}))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment