Last active
June 30, 2016 04:24
-
-
Save lilyball/4614652b281505645da3c4698a72c9bd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Darwin.Mach.mach_time | |
/// Benchmarks a function and prints the results to stdout. | |
func benchmark<T,R>(label: String, @noescape setup: () -> T, @noescape f: T -> R) { | |
// get a ballpark time estimate | |
let estimateInput = setup() | |
let estimateStart = get_current_ns() | |
let _ = blackBox(f, args: estimateInput) | |
let estimateEnd = get_current_ns() | |
let estimate = estimateEnd - estimateStart | |
var average_ns: UInt64 | |
if estimate >= 100_000_000 { | |
// it took longer than 100ms, lets just measure every invocation | |
var times: [UInt64] = [] | |
for _ in 0..<5 { | |
let input = setup() | |
let start = get_current_ns() | |
let _ = blackBox(f, args: input) | |
let end = get_current_ns() | |
times.append(end - start) | |
} | |
times.sortInPlace(<) | |
// throw away the lowest and highest and average the others | |
average_ns = times[1...3].reduce(0, combine: +) / 3 | |
} else { | |
// we're going to use getpid() to avoid hoisting calculations out of the | |
// loop. Run it once now to avoid any first-run overhead. | |
let _ = getpid() | |
// TODO: be more intelligent about this | |
// For now, lets just estimate how many runs will make for 100ms based | |
// on the first-run timing. | |
//let estimateCount = max(min(100_000_000 / estimate, 10_000), 100) // put some cap on it | |
let estimateCount: UInt64 = 100_000 | |
// Split this into 10 runs and throw away the top/bottom 2 | |
let perRunCount = estimateCount / 10 | |
var times: [UInt64] = [] | |
for _ in 0..<10 { | |
let input = setup() | |
let start = get_current_ns() | |
for _ in 0..<perRunCount { | |
let _ = blackBox(f, args: input) | |
let _ = getpid() // fast syscall that should help defeat optimizations | |
} | |
let end = get_current_ns() | |
assert(perRunCount > 0, "invalid perRunCount: \(perRunCount)") | |
times.append((end - start) / perRunCount) | |
} | |
times.sortInPlace(<) | |
average_ns = times[2...7].reduce(0, combine: +) / 6 | |
} | |
var timestr: String | |
if average_ns >= 1_000_000_000 { | |
timestr = "\(Double(average_ns) / 1_000_000_000) s" | |
} else if average_ns >= 1_000_000 { | |
timestr = "\(Double(average_ns) / 1_000_000) ms" | |
} else if average_ns >= 1_000 { | |
timestr = "\(Double(average_ns) / 1_000) µs" | |
} else { | |
timestr = "\(average_ns) ns" | |
} | |
print("\(label): average: \(timestr)") | |
} | |
func benchmark<R>(label: String, @noescape f: () -> R) { | |
benchmark(label, setup: {}, f: f) | |
} | |
func get_current_ns() -> UInt64 { | |
struct Static { | |
static let timebase: mach_timebase_info = { | |
var timebase = mach_timebase_info(numer: 0, denom: 0) | |
let err = mach_timebase_info(&timebase) | |
if err != 0 { | |
print("Error in mach_timebase_info: \(err)") | |
exit(1) | |
} | |
return timebase | |
}() | |
} | |
let timebase = Static.timebase | |
let time = mach_absolute_time() | |
return time * UInt64(timebase.numer) / UInt64(timebase.denom) | |
} | |
/// Run a function while disabling inlining. | |
/// | |
/// This attempts to defeat some compiler optimizations that may hurt the | |
/// profiling. For example, if the results of a computation are unused, the | |
/// compiler may eliminate the compilation. Or it may hoist compilations out of | |
/// a looop. Use this function to try to get rid of some of that. Though to | |
/// be honest, this probably doesn't do much. | |
@inline(never) func blackBox<T,R>(@noescape f: T -> R, args: T) -> R { | |
return f(args) | |
} | |
@inline(never) func blackBox<R>(@noescape f: () -> R) -> R { | |
return f() | |
} | |
import Foundation | |
benchmark("no sync") { | |
return getpid() | |
} | |
var spinLock = OS_SPINLOCK_INIT | |
benchmark("spinlock") { () -> pid_t in | |
OSSpinLockLock(&spinLock) | |
defer { | |
OSSpinLockUnlock(&spinLock) | |
} | |
return getpid() | |
} | |
let sema = dispatch_semaphore_create(1) | |
benchmark("semaphore") { () -> pid_t in | |
dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER) | |
defer { | |
dispatch_semaphore_signal(sema) | |
} | |
return getpid() | |
} | |
let nslock = NSLock() | |
benchmark("NSLock") { () -> pid_t in | |
nslock.lock() | |
defer { | |
nslock.unlock() | |
} | |
return getpid() | |
} | |
var mutex = pthread_mutex_t() | |
pthread_mutex_init(&mutex, nil) | |
benchmark("mutex") { () -> pid_t in | |
pthread_mutex_lock(&mutex) | |
defer { | |
pthread_mutex_unlock(&mutex) | |
} | |
return getpid() | |
} | |
let obj = NSObject() | |
benchmark("synchronized") { () -> pid_t in | |
objc_sync_enter(obj) | |
defer { | |
objc_sync_exit(obj) | |
} | |
return getpid() | |
} | |
let queue = dispatch_queue_create("test queue", DISPATCH_QUEUE_SERIAL) | |
benchmark("queue") { () -> pid_t in | |
var pid = pid_t() | |
dispatch_sync(queue) { | |
pid = getpid() | |
} | |
return pid | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment