Created
November 29, 2014 14:10
-
-
Save nkurz/d64f5b4ded4e19e17aae to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Calculate cycles spent on overhead of function calls | |
// See http://cs.coloradocollege.edu/~bylvisaker/CallReturn/ | |
// gcc -g -std=gnu99 -O3 -Wall -Wextra call-return.c -o call-return | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <stdlib.h> | |
#define DEFAULT_LOOP_COUNT (1000 * 1000) | |
// set starting cycles to current 64-bit rdtsc value | |
#define RDTSC_START(cycles) \ | |
do { \ | |
register unsigned cyc_high, cyc_low; \ | |
asm volatile("cpuid\n\t" \ | |
"rdtsc\n\t" \ | |
"mov %%edx, %0\n\t" \ | |
"mov %%eax, %1\n\t" \ | |
: "=r" (cyc_high), "=r" (cyc_low) \ | |
:: "%rax", "%rbx", "%rcx", "%rdx"); \ | |
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \ | |
} while (0) | |
// set final cycles to current 64-bit rdtsc value | |
#define RDTSC_FINAL(cycles) \ | |
do { \ | |
register unsigned cyc_high, cyc_low; \ | |
asm volatile("rdtscp\n\t" \ | |
"mov %%edx, %0\n\t" \ | |
"mov %%eax, %1\n\t" \ | |
"cpuid\n\t" \ | |
: "=r" (cyc_high), "=r" (cyc_low) \ | |
:: "%rax", "%rbx", "%rcx", "%rdx"); \ | |
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \ | |
} while(0) | |
#define COMPILER_NO_INLINE __attribute__((noinline)) | |
COMPILER_NO_INLINE uint64_t f0( uint64_t x ) { | |
/* The details of this calculation are not important */ | |
return x + 1; | |
} | |
COMPILER_NO_INLINE uint64_t f1( uint64_t x ) { | |
return f0( f0( x ) ) + 1; | |
} | |
COMPILER_NO_INLINE uint64_t f2( uint64_t x ) { | |
return f1( f1( x ) ) + 1; | |
} | |
COMPILER_NO_INLINE uint64_t f3( uint64_t x ) { | |
return f2( f2( x ) ) + 1; | |
} | |
COMPILER_NO_INLINE uint64_t f4( uint64_t x ) { | |
return f3( f3( x ) ) + 1; | |
} | |
COMPILER_NO_INLINE uint64_t f5( uint64_t x ) { | |
return f4( f4( x ) ) + 1; | |
} | |
COMPILER_NO_INLINE uint64_t f6( uint64_t x ) { | |
return f5( f5( x ) ) + 1; | |
} | |
COMPILER_NO_INLINE uint64_t f7( uint64_t x ) { | |
return f6( f6( x ) ) + 1; | |
} | |
COMPILER_NO_INLINE uint64_t f8( uint64_t x ) { | |
return f7( f7( x ) ) + 1; | |
} | |
int main(int argc, char** argv) { | |
if (argc > 2) goto die_usage; | |
uint64_t loop_count = DEFAULT_LOOP_COUNT; | |
if (argc > 1) loop_count = atoi(argv[1]); | |
if (! loop_count) goto die_usage; | |
printf("Calling f8() %ld times\n", loop_count); | |
uint64_t calls_per_iteration = f8(0); // prewarm instruction cache | |
uint64_t expected_val = calls_per_iteration * loop_count; | |
uint64_t cycles_start, cycles_final; | |
RDTSC_START(cycles_start); | |
uint64_t val = 0; | |
while (loop_count--) { | |
val = f8(val); // f8() calls f0() 2^8 times | |
} | |
RDTSC_FINAL(cycles_final); | |
uint64_t cycles_spent = cycles_final - cycles_start; | |
float cycles_per_call = cycles_spent / (float) val; | |
printf("%ld calls to f0() took %ld cycles (%.2f cycles per call)\n", | |
val, cycles_spent, cycles_per_call); | |
if (val == expected_val) return 0; // comparison to prevent optimization code removal | |
else printf("Warning --- expected %ld calls but got %ld\n", expected_val, val); | |
return 1; | |
die_usage: | |
printf("Usage: %s [count] (default count %d)\n", argv[0], DEFAULT_LOOP_COUNT); | |
return 1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment