Created
April 15, 2012 22:20
-
-
Save awreece/2395102 to your computer and use it in GitHub Desktop.
Check Malloc for False Sharing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
areece@areece-laptop:~/coding/gomalloc$ ./malloccheck --checkcache=true --procs=1 | |
Malloc check ran with 1 procs for 1000000 iters of size 16 | |
Cache line shared 0 (0.00%) times | |
Cache was reused 993536 (99.35%) times | |
areece@areece-laptop:~/coding/gomalloc$ ./malloccheck --checkcache=true --procs=2 | |
Malloc check ran with 2 procs for 1000000 iters of size 16 | |
Cache line shared 128186 (12.82%) times | |
Cache was reused 859014 (85.90%) times | |
areece@areece-laptop:~/coding/gomalloc$ ./malloccheck --checkcache=true --procs=3 | |
Malloc check ran with 3 procs for 1000000 iters of size 16 | |
Cache line shared 157340 (15.73%) times | |
Cache was reused 830371 (83.04%) times | |
areece@areece-laptop:~/coding/gomalloc$ ./malloccheck --checkcache=true --procs=4 | |
Malloc check ran with 4 procs for 1000000 iters of size 16 | |
Cache line shared 177130 (17.71%) times | |
Cache was reused 810390 (81.04%) times |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for i in {1..4}; do | |
./malloccheck --checkcache=false --procs=$i --prof=out.prof; | |
go tool pprof --text malloccheck out.prof; | |
done | |
Total: 20 samples | |
8 40.0% 40.0% 20 100.0% main.do_thread | |
3 15.0% 55.0% 3 15.0% runtime.MCache_Free | |
2 10.0% 65.0% 12 60.0% runtime.makeslice | |
2 10.0% 75.0% 9 45.0% runtime.mallocgc | |
2 10.0% 85.0% 5 25.0% sweep | |
1 5.0% 90.0% 1 5.0% cachestats | |
1 5.0% 95.0% 10 50.0% makeslice1 | |
1 5.0% 100.0% 1 5.0% runtime.MCache_Alloc | |
0 0.0% 100.0% 6 30.0% runtime.gc | |
0 0.0% 100.0% 20 100.0% schedunlock | |
Total: 23 samples | |
6 26.1% 26.1% 22 95.7% main.do_thread | |
4 17.4% 43.5% 13 56.5% runtime.mallocgc | |
3 13.0% 56.5% 3 13.0% scanblock | |
2 8.7% 65.2% 16 69.6% runtime.makeslice | |
1 4.3% 69.6% 14 60.9% makeslice1 | |
1 4.3% 73.9% 2 8.7% runtime.MCache_Alloc | |
1 4.3% 78.3% 1 4.3% runtime.SizeToClass | |
1 4.3% 82.6% 1 4.3% runtime.casp | |
1 4.3% 87.0% 1 4.3% runtime.lock | |
1 4.3% 91.3% 2 8.7% runtime.markallocated | |
1 4.3% 95.7% 1 4.3% runtime.markspan | |
1 4.3% 100.0% 1 4.3% runtime.purgecachedstats | |
0 0.0% 100.0% 1 4.3% MCentral_Free | |
0 0.0% 100.0% 1 4.3% MCentral_Grow | |
0 0.0% 100.0% 1 4.3% ReleaseN | |
0 0.0% 100.0% 3 13.0% mark | |
0 0.0% 100.0% 1 4.3% runtime.MCache_Free | |
0 0.0% 100.0% 1 4.3% runtime.MCentral_AllocList | |
0 0.0% 100.0% 1 4.3% runtime.MCentral_FreeList | |
0 0.0% 100.0% 1 4.3% runtime.MHeap_Free | |
0 0.0% 100.0% 4 17.4% runtime.gc | |
0 0.0% 100.0% 22 95.7% schedunlock | |
0 0.0% 100.0% 1 4.3% sweep | |
Total: 23 samples | |
7 30.4% 30.4% 14 60.9% runtime.mallocgc | |
4 17.4% 47.8% 20 87.0% main.do_thread | |
2 8.7% 56.5% 16 69.6% runtime.makeslice | |
2 8.7% 65.2% 3 13.0% runtime.markallocated | |
1 4.3% 69.6% 2 8.7% MCentral_Free | |
1 4.3% 73.9% 14 60.9% makeslice1 | |
1 4.3% 78.3% 3 13.0% runtime.MCache_Alloc | |
1 4.3% 82.6% 1 4.3% runtime.casp | |
1 4.3% 87.0% 1 4.3% runtime.markspan | |
1 4.3% 91.3% 1 4.3% runtime.memclr | |
1 4.3% 95.7% 1 4.3% runtime.unmarkspan | |
1 4.3% 100.0% 3 13.0% sweep | |
0 0.0% 100.0% 1 4.3% MCentral_Grow | |
0 0.0% 100.0% 2 8.7% ReleaseN | |
0 0.0% 100.0% 2 8.7% nextgandunlock | |
0 0.0% 100.0% 2 8.7% runtime.MCache_Free | |
0 0.0% 100.0% 1 4.3% runtime.MCentral_AllocList | |
0 0.0% 100.0% 2 8.7% runtime.MCentral_FreeList | |
0 0.0% 100.0% 1 4.3% runtime.clone | |
0 0.0% 100.0% 1 4.3% runtime.gc | |
0 0.0% 100.0% 2 8.7% runtime.gchelper | |
0 0.0% 100.0% 1 4.3% runtime.mcall | |
0 0.0% 100.0% 1 4.3% runtime.mstart | |
0 0.0% 100.0% 2 8.7% schedule | |
0 0.0% 100.0% 20 87.0% schedunlock | |
Total: 26 samples | |
6 23.1% 23.1% 22 84.6% main.do_thread | |
4 15.4% 38.5% 7 26.9% runtime.MCache_Free | |
4 15.4% 53.8% 4 15.4% runtime.casp | |
3 11.5% 65.4% 14 53.8% runtime.mallocgc | |
2 7.7% 73.1% 3 11.5% MCentral_Free | |
2 7.7% 80.8% 8 30.8% sweep | |
1 3.8% 84.6% 15 57.7% makeslice1 | |
1 3.8% 88.5% 1 3.8% runtime.SizeToClass | |
1 3.8% 92.3% 16 61.5% runtime.makeslice | |
1 3.8% 96.2% 5 19.2% runtime.markallocated | |
1 3.8% 100.0% 1 3.8% runtime.xchg | |
0 0.0% 100.0% 3 11.5% ReleaseN | |
0 0.0% 100.0% 3 11.5% nextgandunlock | |
0 0.0% 100.0% 3 11.5% runtime.MCentral_FreeList | |
0 0.0% 100.0% 3 11.5% runtime.clone | |
0 0.0% 100.0% 5 19.2% runtime.gc | |
0 0.0% 100.0% 3 11.5% runtime.gchelper | |
0 0.0% 100.0% 1 3.8% runtime.lock | |
0 0.0% 100.0% 3 11.5% runtime.mstart | |
0 0.0% 100.0% 3 11.5% schedule | |
0 0.0% 100.0% 22 84.6% schedunlock |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// malloc test for false sharing | |
package main | |
import ( | |
"flag" | |
"fmt" | |
"log" | |
"os" | |
"runtime" | |
"runtime/pprof" | |
"sync" | |
"unsafe" | |
) | |
var ( | |
allocSize int // Size of memory allocation. Flag. | |
checkCache bool // Run cache simulater. Flag. | |
logOut string // File to send log output. Flag. | |
niters int // Number of iterations to run over all threads. Flag. | |
nprocs int // Number of threads to run concurrently. Flag. | |
profOut string // File to send pprof output. Flag. | |
) | |
var emitlog bool // True iff logOut != "". | |
var cLock sync.Mutex // Lock for cache simulator. | |
var cache = make(map[uintptr]int) // Map of cache line to last tid to access it. | |
var reacquire int // Counter for number of times cache was valid. | |
var move int // Counter for number of times cache line was shared. | |
const cacheWidthBytes uint = 6 // A 64 byte cache | |
func init() { | |
flag.IntVar(&nprocs, "procs", 1, "Value to set GOMAXPROCS") | |
flag.IntVar(&allocSize, "size", 16, "Size of block to allocate") | |
flag.IntVar(&niters, "iters", 1000000, "Number of repetitions to make") | |
flag.StringVar(&profOut, "prof", "", "Emit profiling data to file") | |
flag.StringVar(&logOut, "log", "", "Emit log messages to file") | |
flag.BoolVar(&checkCache, "checkCache", true, "Check for cache sharing") | |
} | |
// Update the cache to know thread tid got pointer ptr. | |
func updateCache(tid int, ptr uintptr) { | |
// Round to cache line, and make no longer valid ptr so gc will free. | |
cacheLine := uintptr(ptr >> cacheWidthBytes) | |
// Don't defer Unlock because we want to avoid holding lock while | |
// printing log messages below. | |
cLock.Lock() | |
t, p := cache[cacheLine] | |
cache[cacheLine] = tid | |
if p { | |
if t == tid { | |
reacquire++ | |
} else { | |
move++ | |
} | |
} | |
cLock.Unlock() | |
if emitlog { | |
if !p { | |
log.Printf("%d] got %#x", tid, ptr) | |
} else if t != tid { | |
log.Printf("%d] got %#x (previously %d)", tid, ptr, t) | |
} else { | |
log.Printf("%d] regot %#x", tid, ptr) | |
} | |
} | |
} | |
// Convert byte slice to raw pointer. | |
func ptr2uintptr(ptr []byte) uintptr { | |
return uintptr(unsafe.Pointer(&ptr[0])) | |
} | |
// Run worker thread. Thread locks an os thread then makes a bunch of memory | |
// allocations and writes, hopefully to generate sharing of allocated blocks. | |
func do_thread(tid int, niters int, wg *sync.WaitGroup) { | |
runtime.LockOSThread() | |
defer runtime.UnlockOSThread() | |
for iter := 0; iter < niters; iter++ { | |
b := make([]byte, allocSize) | |
if checkCache { | |
updateCache(tid, ptr2uintptr(b)) | |
} | |
// Write a bunch to b. | |
for i, _ := range b { | |
b[i] = byte(i) | |
} | |
// Intentionally drop reference to b. | |
b = nil | |
} | |
wg.Done() | |
} | |
func printCacheStats() { | |
fmt.Printf("Malloc check ran with %d procs for %d iters of size %d\n", | |
nprocs, niters, allocSize) | |
fmt.Printf("\tCache line shared %d (%.2f%%) times\n", | |
move, float32(move)/float32(niters)*100) | |
fmt.Printf("\tCache was reused %d (%.2f%%) times\n", | |
reacquire, float32(reacquire)/float32(niters)*100) | |
} | |
func main() { | |
flag.Parse() | |
if profOut != "" { | |
if f, err := os.Create(profOut); err != nil { | |
log.Fatal(err) | |
} else { | |
pprof.StartCPUProfile(f) | |
defer pprof.StopCPUProfile() | |
} | |
} | |
if logOut != "" { | |
if f, err := os.Create(logOut); err != nil { | |
log.Fatal(err) | |
} else { | |
emitlog = true | |
log.SetOutput(f) | |
log.SetFlags(log.Lmicroseconds) | |
} | |
} | |
runtime.GOMAXPROCS(nprocs) | |
runtime.GC() // clean up garbage from init | |
var wg sync.WaitGroup | |
for i := 0; i < nprocs; i++ { | |
wg.Add(1) | |
go do_thread(i, niters/nprocs, &wg) | |
} | |
wg.Wait() | |
if checkCache { | |
printCacheStats() | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for i in {1..4}; do | |
perf stat -e cache-misses -e migrations ./malloccheck --procs=$i --checkcache=false; | |
done | |
Performance counter stats for './malloccheck --procs=1 --checkcache=false': | |
13,281 cache-misses | |
8 migrations | |
0.214455000 seconds time elapsed | |
Performance counter stats for './malloccheck --procs=2 --checkcache=false': | |
15,124 cache-misses | |
41 migrations | |
0.175444727 seconds time elapsed | |
Performance counter stats for './malloccheck --procs=3 --checkcache=false': | |
18,581 cache-misses | |
70 migrations | |
0.164377330 seconds time elapsed | |
Performance counter stats for './malloccheck --procs=4 --checkcache=false': | |
19,848 cache-misses | |
74 migrations | |
0.159023927 seconds time elapsed |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment