Skip to content

Instantly share code, notes, and snippets.

@lilyball
Last active June 30, 2016 04:24
Show Gist options
  • Save lilyball/4614652b281505645da3c4698a72c9bd to your computer and use it in GitHub Desktop.
Save lilyball/4614652b281505645da3c4698a72c9bd to your computer and use it in GitHub Desktop.
import Darwin.Mach.mach_time
/// Benchmarks a function and prints the results to stdout.
func benchmark<T,R>(label: String, @noescape setup: () -> T, @noescape f: T -> R) {
// get a ballpark time estimate
let estimateInput = setup()
let estimateStart = get_current_ns()
let _ = blackBox(f, args: estimateInput)
let estimateEnd = get_current_ns()
let estimate = estimateEnd - estimateStart
var average_ns: UInt64
if estimate >= 100_000_000 {
// it took longer than 100ms, lets just measure every invocation
var times: [UInt64] = []
for _ in 0..<5 {
let input = setup()
let start = get_current_ns()
let _ = blackBox(f, args: input)
let end = get_current_ns()
times.append(end - start)
}
times.sortInPlace(<)
// throw away the lowest and highest and average the others
average_ns = times[1...3].reduce(0, combine: +) / 3
} else {
// we're going to use getpid() to avoid hoisting calculations out of the
// loop. Run it once now to avoid any first-run overhead.
let _ = getpid()
// TODO: be more intelligent about this
// For now, lets just estimate how many runs will make for 100ms based
// on the first-run timing.
//let estimateCount = max(min(100_000_000 / estimate, 10_000), 100) // put some cap on it
let estimateCount: UInt64 = 100_000
// Split this into 10 runs and throw away the top/bottom 2
let perRunCount = estimateCount / 10
var times: [UInt64] = []
for _ in 0..<10 {
let input = setup()
let start = get_current_ns()
for _ in 0..<perRunCount {
let _ = blackBox(f, args: input)
let _ = getpid() // fast syscall that should help defeat optimizations
}
let end = get_current_ns()
assert(perRunCount > 0, "invalid perRunCount: \(perRunCount)")
times.append((end - start) / perRunCount)
}
times.sortInPlace(<)
average_ns = times[2...7].reduce(0, combine: +) / 6
}
var timestr: String
if average_ns >= 1_000_000_000 {
timestr = "\(Double(average_ns) / 1_000_000_000) s"
} else if average_ns >= 1_000_000 {
timestr = "\(Double(average_ns) / 1_000_000) ms"
} else if average_ns >= 1_000 {
timestr = "\(Double(average_ns) / 1_000) µs"
} else {
timestr = "\(average_ns) ns"
}
print("\(label): average: \(timestr)")
}
func benchmark<R>(label: String, @noescape f: () -> R) {
benchmark(label, setup: {}, f: f)
}
func get_current_ns() -> UInt64 {
struct Static {
static let timebase: mach_timebase_info = {
var timebase = mach_timebase_info(numer: 0, denom: 0)
let err = mach_timebase_info(&timebase)
if err != 0 {
print("Error in mach_timebase_info: \(err)")
exit(1)
}
return timebase
}()
}
let timebase = Static.timebase
let time = mach_absolute_time()
return time * UInt64(timebase.numer) / UInt64(timebase.denom)
}
/// Run a function while disabling inlining.
///
/// This attempts to defeat some compiler optimizations that may hurt the
/// profiling. For example, if the results of a computation are unused, the
/// compiler may eliminate the compilation. Or it may hoist compilations out of
/// a looop. Use this function to try to get rid of some of that. Though to
/// be honest, this probably doesn't do much.
@inline(never) func blackBox<T,R>(@noescape f: T -> R, args: T) -> R {
return f(args)
}
@inline(never) func blackBox<R>(@noescape f: () -> R) -> R {
return f()
}
import Foundation
benchmark("no sync") {
return getpid()
}
var spinLock = OS_SPINLOCK_INIT
benchmark("spinlock") { () -> pid_t in
OSSpinLockLock(&spinLock)
defer {
OSSpinLockUnlock(&spinLock)
}
return getpid()
}
let sema = dispatch_semaphore_create(1)
benchmark("semaphore") { () -> pid_t in
dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER)
defer {
dispatch_semaphore_signal(sema)
}
return getpid()
}
let nslock = NSLock()
benchmark("NSLock") { () -> pid_t in
nslock.lock()
defer {
nslock.unlock()
}
return getpid()
}
var mutex = pthread_mutex_t()
pthread_mutex_init(&mutex, nil)
benchmark("mutex") { () -> pid_t in
pthread_mutex_lock(&mutex)
defer {
pthread_mutex_unlock(&mutex)
}
return getpid()
}
let obj = NSObject()
benchmark("synchronized") { () -> pid_t in
objc_sync_enter(obj)
defer {
objc_sync_exit(obj)
}
return getpid()
}
let queue = dispatch_queue_create("test queue", DISPATCH_QUEUE_SERIAL)
benchmark("queue") { () -> pid_t in
var pid = pid_t()
dispatch_sync(queue) {
pid = getpid()
}
return pid
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment