-
-
Save vessenes/ce7d1254ad92c6d2a996 to your computer and use it in GitHub Desktop.
package main | |
import ( | |
"errors" | |
"flag" | |
"log" | |
"os" | |
"runtime" | |
"strconv" | |
"sync" | |
"time" | |
"github.com/spaolacci/murmur3" | |
) | |
type Record struct { | |
Key [35]byte | |
Index int32 | |
} | |
type MyHash struct { | |
Table []Record | |
Size uint32 | |
} | |
func (hash *MyHash) Init(size int) { | |
hash.Size = uint32(size) | |
hash.Table = make([]Record, hash.Size) | |
} | |
func (hash *MyHash) Keys() [][35]byte { | |
var keys [][35]byte | |
for i := 0; i < len(hash.Table); i++ { | |
if hash.Table[i].Key != [35]byte{} { | |
keys = append(keys, hash.Table[i].Key) | |
} | |
} | |
return keys | |
} | |
func (hash *MyHash) Get(key [35]byte) int32 { | |
offset := murmur3.Sum32(key[:]) % hash.Size | |
nullkey := [35]byte{} | |
for i := uint32(0); ; i++ { | |
if hash.Table[(offset+i)%hash.Size].Key == key { | |
return hash.Table[(offset+i)%hash.Size].Index | |
} else if hash.Table[(offset+i)%hash.Size].Key == nullkey { | |
return 0 | |
} | |
} | |
} | |
func (hash *MyHash) Set(key [35]byte, value int32) (int32, error) { | |
seen_zero := 0 | |
offset := murmur3.Sum32(key[:]) % hash.Size | |
nullkey := [35]byte{} | |
tries := 0 | |
for i := uint32(0); ; i++ { | |
if (offset+i)%hash.Size == 0 { | |
seen_zero++ | |
} | |
if seen_zero > 1 { | |
return int32(0), errors.New("Hash table full. We looked and looked, couldn't find a spot.") | |
} | |
if hash.Table[(offset+i)%hash.Size].Key == key { | |
hash.Table[(offset+i)%hash.Size].Index = value | |
return hash.Table[(offset+i)%hash.Size].Index, nil | |
} else if hash.Table[(offset+i)%hash.Size].Key == nullkey { | |
trySet: | |
hash.Table[(offset+i)%hash.Size].Index = value | |
hash.Table[(offset+i)%hash.Size].Key = key | |
if key == hash.Table[(offset+i)%hash.Size].Key && value == hash.Table[(offset+i)%hash.Size].Index { | |
// everything is awesome, we're done | |
return hash.Table[(offset+i)%hash.Size].Index, nil | |
} else if key != hash.Table[(offset+i)%hash.Size].Key { | |
// someone clobbered our key. Be polite and try again. | |
log.Println("Someone clobbered our key. We're going to try again. Try", tries, "nesting..for", string(key[:]), value) | |
return hash.Set(key, value) | |
} else if value != hash.Table[(offset+i)%hash.Size].Index && key == hash.Table[(offset+i)%hash.Size].Key { | |
// this is our key. Fuck 'em. | |
log.Println("Someone clobbered our value. This is OUR key. writing again for the ", tries, "time") | |
tries += 1 | |
goto trySet | |
} | |
hash.Table[(offset+i)%hash.Size].Index = value | |
hash.Table[(offset+i)%hash.Size].Key = key | |
return hash.Table[(offset+i)%hash.Size].Index, nil | |
} | |
} | |
} | |
var ( | |
pressure = flag.Int("pressure", 5, "Amount of write pressure to apply.") | |
tablesize = flag.Int("size", 3e7, "Hash Table Size") | |
numkeys = flag.Int("keys", 2.5e7, "Number of Keys. Go larger than tablesize to check full table behaviour.") | |
maxprocs = flag.Int("maxprocs", runtime.NumCPU()*6, "Max threads. Defaults to 6 * core") | |
) | |
func main() { | |
flag.Parse() | |
log.Println("Commencing test with pressure", *pressure, "table size", *tablesize, "keys", *numkeys) | |
very_start := time.Now() | |
runtime.GOMAXPROCS(*maxprocs) | |
myhash := MyHash{} | |
myhash.Init(*tablesize) | |
numthreads := runtime.NumCPU() * 6 | |
numsets := *numkeys / numthreads | |
var wg sync.WaitGroup | |
start := time.Now() | |
for threadnum := 0; threadnum < numthreads; threadnum++ { | |
wg.Add(1) | |
//log.Println("goroutine for", threadnum, "with numsets", numsets) | |
go func(i int) { | |
var mykey [35]byte | |
for counter := (numsets * i); counter < numsets*(i+1+*pressure); counter++ { | |
mykey = [35]byte{} | |
copy(mykey[:], "heyy"+strconv.Itoa(counter)) | |
_, err := myhash.Set(mykey, int32(counter)) | |
if err != nil { | |
log.Println("Error setting:", err) | |
os.Exit(1) | |
} | |
} | |
wg.Done() | |
}(threadnum) | |
} | |
wg.Wait() | |
log.Println("Done filling hashtable with ", numsets*numthreads, "items. Took", time.Since(start), "to fill") | |
var mykey [35]byte | |
for i := int32(*tablesize / 2); i < int32(*tablesize/2)+5; i++ { | |
mykey = [35]byte{} | |
start = time.Now() | |
copy(mykey[:], "heyy"+strconv.Itoa(int(i))) | |
_ = myhash.Get(mykey) | |
log.Println("Lookup took", time.Since(start)) | |
} | |
start = time.Now() | |
log.Println("Commencing Garbage Collection.") | |
runtime.GC() | |
log.Println("Done. Took", time.Since(start)) | |
start = time.Now() | |
log.Println("Commencing Second Garbage Collection.") | |
runtime.GC() | |
log.Println("Done. Took", time.Since(start)) | |
log.Println("All done with test hash containing", len(myhash.Table), ". Total runtime", time.Since(very_start)) | |
} |
The reason it's not thread safe is that writing an int32 isn't guaranteed atomic by go. But, for this use case, we don't mind a garbage value being written. It would be far slower to lock and unlock. Instead, if a garbage int32 is written half by one thread and half by the other, the checks further down the function will sort it out; providing the key was fully written by one thread or the other, one or the other will claim the slot and rewrite.
If the threads clobbered the [35]byte key by writing simultaneously, they will both move on to another slot leaving some garbage in the table.
Whether this is a good compromise in exchange for huge speedups, I leave to you. But note, I do say in big letters not to use it at the top. :)
On further thinking, I believe it's necessary to use a Compare and Swap on key setting to ensure that only one thread has the slot. In go, that means you'll probably be using numeric keys. You could use pointers, but for a large table you will end up with significant GC worries as the number of pointers grows.
This is absolutely not goroutine-safe!