|
package cbounce |
|
|
|
import ( |
|
"sync" |
|
"testing" |
|
"sync/atomic" |
|
) |
|
|
|
var sink uint64 |
|
|
|
// Cache line bouncing via false sharing: |
|
// - False sharing occurs when threads on different processors modify variables that reside on the same cache line. |
|
// - This invalidates the cache line and forces an update, which hurts performance. |
|
// per https://software.intel.com/en-us/articles/avoiding-and-identifying-false-sharing-among-threads |
|
|
|
// Finding out your cache line size |
|
// Mac: $ sysctl hw.cachelinesize -> 64 |
|
// Linux: $ getconf LEVEL1_DCACHE_LINESIZE -> 64 |
|
|
|
type CacheBounce struct { |
|
m []sync.Mutex |
|
} |
|
|
|
var cb CacheBounce |
|
|
|
func benchmarkCachelineBouncing(b *testing.B, numThreads int) { |
|
cb = CacheBounce{m: make([]sync.Mutex, numThreads)} |
|
wg := sync.WaitGroup{} |
|
|
|
b.ResetTimer() |
|
for i := 0; i < numThreads; i++ { |
|
wg.Add(1) |
|
go func(i int, c uint64) { |
|
for j := 0; j < b.N; j++ { |
|
cb.m[i].Lock() |
|
c++ |
|
cb.m[i].Unlock() |
|
} |
|
atomic.AddUint64(&sink, c) // To make sure the loops aren't being optimized out |
|
wg.Done() |
|
}(i, 0) |
|
} |
|
wg.Wait() |
|
} |
|
|
|
type NoCacheBounce struct { |
|
m []PaddedMutex |
|
} |
|
|
|
type PaddedMutex struct { |
|
sync.Mutex // 8 bytes |
|
_ [7]uint64 // + 7 * 8 bytes |
|
} // = 64 bytes |
|
|
|
var ncb NoCacheBounce |
|
|
|
func benchNoCacheLineBouncing(b *testing.B, numThreads int) { |
|
ncb = NoCacheBounce{m: make([]PaddedMutex, numThreads)} |
|
wg := sync.WaitGroup{} |
|
|
|
b.ResetTimer() |
|
for i := 0; i < numThreads; i++ { |
|
wg.Add(1) |
|
go func(i int, c uint64) { |
|
for j := 0; j < b.N; j++ { |
|
ncb.m[i].Lock() |
|
c++ |
|
ncb.m[i].Unlock() |
|
} |
|
atomic.AddUint64(&sink, c) // To make sure the loops aren't being optimized out |
|
wg.Done() |
|
}(i, 0) |
|
} |
|
|
|
wg.Wait() |
|
} |
|
|
|
func Benchmark1ThreadNoCacheLineBouncing(b *testing.B) { |
|
benchNoCacheLineBouncing(b, 1) |
|
} |
|
|
|
func Benchmark1ThreadCacheLineBouncing(b *testing.B) { |
|
benchmarkCachelineBouncing(b, 1) |
|
} |
|
|
|
func Benchmark2ThreadsNoCacheLineBouncing(b *testing.B) { |
|
benchNoCacheLineBouncing(b, 2) |
|
} |
|
|
|
func Benchmark2ThreadsCacheLineBouncing(b *testing.B) { |
|
benchmarkCachelineBouncing(b, 2) |
|
} |
|
|
|
func Benchmark4ThreadsNoCacheLineBouncing(b *testing.B) { |
|
benchNoCacheLineBouncing(b, 4) |
|
} |
|
|
|
func Benchmark4ThreadsCacheLineBouncing(b *testing.B) { |
|
benchmarkCachelineBouncing(b, 4) |
|
} |
|
|
|
func Benchmark8ThreadsNoCacheLineBouncing(b *testing.B) { |
|
benchNoCacheLineBouncing(b, 8) |
|
} |
|
|
|
func Benchmark8ThreadsCacheLineBouncing(b *testing.B) { |
|
benchmarkCachelineBouncing(b, 8) |
|
} |