lilyball · June 30, 2016 04:24
diff --git a/sync benchmark.swift b/sync benchmark.swift
 import Darwin.Mach.mach_time

 /// Benchmarks a function and prints the results to stdout.
 func benchmark<T,R>(label: String, @noescape setup: () -> T, @noescape f: T -> R) {
    // get a ballpark time estimate
    let estimateInput = setup()
    let estimateStart = get_current_ns()
    let _ = blackBox(f, args: estimateInput)
    let estimateEnd = get_current_ns()
    let estimate = estimateEnd - estimateStart

    var average_ns: UInt64
    if estimate >= 100_000_000 {
        // it took longer than 100ms, lets just measure every invocation
        var times: [UInt64] = []
        for _ in 0..<5 {
            let input = setup()
            let start = get_current_ns()
            let _ = blackBox(f, args: input)
            let end = get_current_ns()
            times.append(end - start)
        }
        times.sortInPlace(<)
        // throw away the lowest and highest and average the others
        average_ns = times[1...3].reduce(0, combine: +) / 3
    } else {
        // we're going to use getpid() to avoid hoisting calculations out of the
        // loop. Run it once now to avoid any first-run overhead.
        let _ = getpid()

        // TODO: be more intelligent about this
        // For now, lets just estimate how many runs will make for 100ms based
        // on the first-run timing.
        //let estimateCount = max(min(100_000_000 / estimate, 10_000), 100) // put some cap on it
        let estimateCount: UInt64 = 100_000
        // Split this into 10 runs and throw away the top/bottom 2
        let perRunCount = estimateCount / 10
        var times: [UInt64] = []
        for _ in 0..<10 {
            let input = setup()
            let start = get_current_ns()
            for _ in 0..<perRunCount {
                let _ = blackBox(f, args: input)
                let _ = getpid() // fast syscall that should help defeat optimizations
            }
            let end = get_current_ns()
            assert(perRunCount > 0, "invalid perRunCount: \(perRunCount)")
            times.append((end - start) / perRunCount)
        }
        times.sortInPlace(<)
        average_ns = times[2...7].reduce(0, combine: +) / 6
    }

    var timestr: String
    if average_ns >= 1_000_000_000 {
        timestr = "\(Double(average_ns) / 1_000_000_000) s"
    } else if average_ns >= 1_000_000 {
        timestr = "\(Double(average_ns) / 1_000_000) ms"
    } else if average_ns >= 1_000 {
        timestr = "\(Double(average_ns) / 1_000) µs"
    } else {
        timestr = "\(average_ns) ns"
    }
    print("\(label): average: \(timestr)")
 }

 func benchmark<R>(label: String, @noescape f: () -> R) {
    benchmark(label, setup: {}, f: f)
 }

 func get_current_ns() -> UInt64 {
    struct Static {
        static let timebase: mach_timebase_info = {
            var timebase = mach_timebase_info(numer: 0, denom: 0)
            let err = mach_timebase_info(&timebase)
            if err != 0 {
                print("Error in mach_timebase_info: \(err)")
                exit(1)
            }
            return timebase
        }()
    }

    let timebase = Static.timebase
    let time = mach_absolute_time()
    return time * UInt64(timebase.numer) / UInt64(timebase.denom)
 }

 /// Run a function while disabling inlining.
 ///
 /// This attempts to defeat some compiler optimizations that may hurt the
 /// profiling. For example, if the results of a computation are unused, the
 /// compiler may eliminate the compilation. Or it may hoist compilations out of
 /// a looop. Use this function to try to get rid of some of that. Though to
 /// be honest, this probably doesn't do much.
 @inline(never) func blackBox<T,R>(@noescape f: T -> R, args: T) -> R {
    return f(args)
 }

 @inline(never) func blackBox<R>(@noescape f: () -> R) -> R {
    return f()
 }

 import Foundation

 benchmark("no sync") {
    return getpid()
 }

 var spinLock = OS_SPINLOCK_INIT
 benchmark("spinlock") { () -> pid_t in
    OSSpinLockLock(&spinLock)
    defer {
        OSSpinLockUnlock(&spinLock)
    }
    return getpid()
 }

 let sema = dispatch_semaphore_create(1)
 benchmark("semaphore") { () -> pid_t in
    dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER)
    defer {
        dispatch_semaphore_signal(sema)
    }
    return getpid()
 }

 let nslock = NSLock()
 benchmark("NSLock") { () -> pid_t in
    nslock.lock()
    defer {
        nslock.unlock()
    }
    return getpid()
 }

 var mutex = pthread_mutex_t()
 pthread_mutex_init(&mutex, nil)
 benchmark("mutex") { () -> pid_t in
    pthread_mutex_lock(&mutex)
    defer {
        pthread_mutex_unlock(&mutex)
    }
    return getpid()
 }

 let obj = NSObject()
 benchmark("synchronized") { () -> pid_t in
    objc_sync_enter(obj)
    defer {
        objc_sync_exit(obj)
    }
    return getpid()
 }

 let queue = dispatch_queue_create("test queue", DISPATCH_QUEUE_SERIAL)
 benchmark("queue") { () -> pid_t in
    var pid = pid_t()
    dispatch_sync(queue) {
        pid = getpid()
    }
    return pid
 }
	import Darwin.Mach.mach_time

	/// Benchmarks a function and prints the results to stdout.
	func benchmark<T,R>(label: String, @noescape setup: () -> T, @noescape f: T -> R) {
	// get a ballpark time estimate
	let estimateInput = setup()
	let estimateStart = get_current_ns()
	let _ = blackBox(f, args: estimateInput)
	let estimateEnd = get_current_ns()
	let estimate = estimateEnd - estimateStart

	var average_ns: UInt64
	if estimate >= 100_000_000 {
	// it took longer than 100ms, lets just measure every invocation
	var times: [UInt64] = []
	for _ in 0..<5 {
	let input = setup()
	let start = get_current_ns()
	let _ = blackBox(f, args: input)
	let end = get_current_ns()
	times.append(end - start)
	}
	times.sortInPlace(<)
	// throw away the lowest and highest and average the others
	average_ns = times[1...3].reduce(0, combine: +) / 3
	} else {
	// we're going to use getpid() to avoid hoisting calculations out of the
	// loop. Run it once now to avoid any first-run overhead.
	let _ = getpid()

	// TODO: be more intelligent about this
	// For now, lets just estimate how many runs will make for 100ms based
	// on the first-run timing.
	//let estimateCount = max(min(100_000_000 / estimate, 10_000), 100) // put some cap on it
	let estimateCount: UInt64 = 100_000
	// Split this into 10 runs and throw away the top/bottom 2
	let perRunCount = estimateCount / 10
	var times: [UInt64] = []
	for _ in 0..<10 {
	let input = setup()
	let start = get_current_ns()
	for _ in 0..<perRunCount {
	let _ = blackBox(f, args: input)
	let _ = getpid() // fast syscall that should help defeat optimizations
	}
	let end = get_current_ns()
	assert(perRunCount > 0, "invalid perRunCount: \(perRunCount)")
	times.append((end - start) / perRunCount)
	}
	times.sortInPlace(<)
	average_ns = times[2...7].reduce(0, combine: +) / 6
	}

	var timestr: String
	if average_ns >= 1_000_000_000 {
	timestr = "\(Double(average_ns) / 1_000_000_000) s"
	} else if average_ns >= 1_000_000 {
	timestr = "\(Double(average_ns) / 1_000_000) ms"
	} else if average_ns >= 1_000 {
	timestr = "\(Double(average_ns) / 1_000) µs"
	} else {
	timestr = "\(average_ns) ns"
	}
	print("\(label): average: \(timestr)")
	}

	func benchmark<R>(label: String, @noescape f: () -> R) {
	benchmark(label, setup: {}, f: f)
	}

	func get_current_ns() -> UInt64 {
	struct Static {
	static let timebase: mach_timebase_info = {
	var timebase = mach_timebase_info(numer: 0, denom: 0)
	let err = mach_timebase_info(&timebase)
	if err != 0 {
	print("Error in mach_timebase_info: \(err)")
	exit(1)
	}
	return timebase
	}()
	}

	let timebase = Static.timebase
	let time = mach_absolute_time()
	return time * UInt64(timebase.numer) / UInt64(timebase.denom)
	}

	/// Run a function while disabling inlining.
	///
	/// This attempts to defeat some compiler optimizations that may hurt the
	/// profiling. For example, if the results of a computation are unused, the
	/// compiler may eliminate the compilation. Or it may hoist compilations out of
	/// a looop. Use this function to try to get rid of some of that. Though to
	/// be honest, this probably doesn't do much.
	@inline(never) func blackBox<T,R>(@noescape f: T -> R, args: T) -> R {
	return f(args)
	}

	@inline(never) func blackBox<R>(@noescape f: () -> R) -> R {
	return f()
	}

	import Foundation

	benchmark("no sync") {
	return getpid()
	}

	var spinLock = OS_SPINLOCK_INIT
	benchmark("spinlock") { () -> pid_t in
	OSSpinLockLock(&spinLock)
	defer {
	OSSpinLockUnlock(&spinLock)
	}
	return getpid()
	}

	let sema = dispatch_semaphore_create(1)
	benchmark("semaphore") { () -> pid_t in
	dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER)
	defer {
	dispatch_semaphore_signal(sema)
	}
	return getpid()
	}

	let nslock = NSLock()
	benchmark("NSLock") { () -> pid_t in
	nslock.lock()
	defer {
	nslock.unlock()
	}
	return getpid()
	}

	var mutex = pthread_mutex_t()
	pthread_mutex_init(&mutex, nil)
	benchmark("mutex") { () -> pid_t in
	pthread_mutex_lock(&mutex)
	defer {
	pthread_mutex_unlock(&mutex)
	}
	return getpid()
	}

	let obj = NSObject()
	benchmark("synchronized") { () -> pid_t in
	objc_sync_enter(obj)
	defer {
	objc_sync_exit(obj)
	}
	return getpid()
	}

	let queue = dispatch_queue_create("test queue", DISPATCH_QUEUE_SERIAL)
	benchmark("queue") { () -> pid_t in
	var pid = pid_t()
	dispatch_sync(queue) {
	pid = getpid()
	}
	return pid
	}