Skip to content

Instantly share code, notes, and snippets.

@vderyagin
Last active December 11, 2015 17:18
Show Gist options
  • Save vderyagin/4633308 to your computer and use it in GitHub Desktop.
Save vderyagin/4633308 to your computer and use it in GitHub Desktop.
package hash_table
import "fmt"
type bucket struct {
head *entry
}
type entry struct {
key string
value interface{}
next *entry
}
// Inserts entry in hash.
// Returns true if value with the given key did not exist before
// insertion, false otherwise.
func (b *bucket) set(key string, value interface{}) bool {
for e := b.head; e != nil; e = e.next {
if e.key == key {
e.value = value
return false
}
}
b.head = &entry{
key: key,
value: value,
next: b.head,
}
return true
}
func (b *bucket) get(key string) (interface{}, error) {
if b.isEmpty() {
return nil, keyNotFoundError(key)
}
for e := b.head; e != nil; e = e.next {
if e.key == key {
return e.value, nil
}
}
return nil, keyNotFoundError(key)
}
func (b *bucket) remove(key string) error {
if b.isEmpty() {
return keyNotFoundError(key)
}
if first := b.head; first.key == key {
b.head = first.next
return nil
}
prev := b.head
for e := b.head; e != nil; e = e.next {
if e.key == key {
next := e.next
prev.next = next
}
prev = e
}
return keyNotFoundError(key)
}
func (b *bucket) isEmpty() bool {
return b == nil || b.head == nil
}
func (b *bucket) hasKey(key string) bool {
for e := b.head; e != nil; e = e.next {
if e.key == key {
return true
}
}
return false
}
func (b *bucket) size() int {
if b.isEmpty() {
return 0
}
n := 0
for e := b.head; e != nil; e = e.next {
n++
}
return n
}
func (b *bucket) forEach(f func(string, interface{})) {
if b == nil {
return
}
for e := b.head; e != nil; e = e.next {
f(e.key, e.value)
}
}
func keyNotFoundError(key string) error {
return fmt.Errorf("key '%s' not found", key)
}
package hash_table
import "testing"
func TestBucketIsEmpty(t *testing.T) {
var bb *bucket
if !bb.isEmpty() {
t.Error("non-initialized bucket should be empty")
}
b := new(bucket)
if !b.isEmpty() {
t.Error("newly created bucket should be empty")
}
b.set("foo", 9)
if b.isEmpty() {
t.Error("bucket with items is not considered empty")
}
}
func TestBucketGet(t *testing.T) {
bt := new(bucket)
bt.set("a", 1)
bt.set("b", 2)
bt.set("c", 3)
a, errA := bt.get("a")
b, errB := bt.get("b")
c, errC := bt.get("c")
if errA != nil || errB != nil || errC != nil {
t.Error("querying existing entries should not return errors")
}
if a != 1 || b != 2 || c != 3 {
t.Error("quirying by key should return right values")
}
x, err := bt.get("abc")
if err == nil {
t.Error("should return error when querying by non-existant key")
}
if x != nil {
t.Error("should return nil as result of invalid query")
}
}
func TestBucketRepeatedSet(t *testing.T) {
bt := new(bucket)
aNew := bt.set("a", 1)
bNew := bt.set("b", 2)
cNew := bt.set("c", 3)
if !(aNew && bNew && cNew) {
t.Error("all elements were newly created")
}
if size := bt.size(); size != 3 {
t.Errorf("%d != %d", size, 3)
}
aNew = bt.set("a", 123)
bNew = bt.set("b", 456)
cNew = bt.set("c", 789)
if aNew || bNew || cNew {
t.Error("none of elements were newly created")
}
if size := bt.size(); size != 3 {
t.Error("size should not have changed")
t.Errorf("%d != %d", size, 3)
}
a, errA := bt.get("a")
b, errB := bt.get("b")
c, errC := bt.get("c")
if errA != nil || errB != nil || errC != nil {
t.Error("querying existing entries should not return errors")
}
if a != 123 || b != 456 || c != 789 {
t.Error("quirying by key should return right values")
}
}
func TestBucketRemove(t *testing.T) {
bt := new(bucket)
err := bt.remove("foo")
if err == nil {
t.Error("should return error when deleting non-existing key")
}
bt.set("a", 1)
bt.set("b", 2)
bt.set("c", 3)
bt.set("d", 4)
bt.set("e", 5)
if bt.remove("a"); bt.hasKey("a") {
t.Error("failure to remove last element in the bucket")
}
if bt.remove("e"); bt.hasKey("e") {
t.Error("failure to remove first element in the bucket")
}
if bt.remove("c"); bt.hasKey("c") {
t.Error("failure to remove middle element in the bucket")
}
}
func TestBucketForEach(t *testing.T) {
b := new(bucket)
b.set("a", 1)
b.set("b", 2)
b.set("c", 3)
b.set("d", 4)
b.set("e", 5)
iteratedOver := map[string]bool{
"a": false,
"b": false,
"c": false,
"d": false,
"e": false,
}
b.forEach(func(key string, _ interface{}) {
iteratedOver[key] = true
})
for _, val := range iteratedOver {
if !val {
t.Error("should iterate over every key")
}
}
}
package hash_table
import (
"bytes"
"fmt"
)
const (
initial_size = 1000
optimal_load = 0.75
load_threshold_max = 1.25
load_threshold_min = 0.25
)
type hashTable struct {
buckets []*bucket
size int
}
func New() *hashTable {
ht := new(hashTable)
ht.init()
return ht
}
func (t *hashTable) init() {
t.buckets = make([]*bucket, initial_size)
t.size = 0
}
func (t *hashTable) Set(key string, value interface{}) {
b := t.bucketFor(key)
newEntry := b.set(key, value)
if newEntry {
t.size++
t.maybeRehash()
}
}
func (t *hashTable) Get(key string) (interface{}, error) {
return t.bucketFor(key).get(key)
}
func (t *hashTable) Remove(key string) error {
err := t.bucketFor(key).remove(key)
if err == nil {
t.size--
t.maybeRehash()
}
return err
}
func (t *hashTable) HasKey(key string) bool {
return t.bucketFor(key).hasKey(key)
}
func (t *hashTable) Size() int {
return t.size
}
func (t *hashTable) IsEmpty() bool {
return t.size == 0
}
func (t *hashTable) ForEach(f func(string, interface{})) {
for _, bucket := range t.buckets {
bucket.forEach(f)
}
}
func (t *hashTable) Clear() {
t.init()
}
func (t *hashTable) String() string {
var s bytes.Buffer
s.WriteString("{ ")
t.ForEach(func(key string, value interface{}) {
fmt.Fprintf(&s, "%#v: %v ", key, value)
})
s.WriteString("}")
return s.String()
}
func (t *hashTable) load() float32 {
return float32(t.size) / float32(len(t.buckets))
}
func (t *hashTable) needsRehashing() bool {
load := t.load()
// Too small:
if load > load_threshold_max {
return true
}
// Too big:
neededBuckets := int(float32(t.size) / optimal_load)
if load < load_threshold_min && neededBuckets > 1000 {
return true
}
return false
}
func (t *hashTable) maybeRehash() {
if t.needsRehashing() {
t.rehash()
}
}
func (t *hashTable) rehash() {
oldBuckets := t.buckets
t.buckets = make([]*bucket, int(float32(t.Size())/optimal_load))
for _, bucket := range oldBuckets {
bucket.forEach(func(key string, value interface{}) {
t.bucketFor(key).set(key, value)
})
}
}
// Find (or initialize) and return bucket approptiate for storing
// given key
func (t *hashTable) bucketFor(key string) *bucket {
idx := stringHash(key) % len(t.buckets)
if t.buckets[idx] == nil {
t.buckets[idx] = new(bucket)
}
return t.buckets[idx]
}
package hash_table
import "testing"
func TestNew(t *testing.T) {
ht := New()
if !ht.IsEmpty() {
t.Error("newly created map should be empty")
}
}
func TestGet(t *testing.T) {
ht := New()
ht.Set("foo", 1)
ht.Set("bar", 2)
foo, err := ht.Get("foo")
if err != nil {
t.Error(err.Error())
}
if foo := foo.(int); foo != 1 {
t.Errorf("%s != %s", foo, 2)
}
bar, err := ht.Get("bar")
if err != nil {
t.Error(err.Error())
}
if bar := bar.(int); bar != 2 {
t.Errorf("%s != %s", bar, 2)
}
}
func TestSet(t *testing.T) {
ht := New()
ht.Set("a", 1)
ht.Set("a", 3)
ht.Set("a", 42)
a, err := ht.Get("a")
if err != nil {
t.Error(err.Error())
}
if a := a.(int); a != 42 {
t.Errorf("%s != %s", a, 42)
}
foo, err := ht.Get("foo")
if err == nil {
t.Error("should return error when querying non-existant key")
}
if foo != nil {
t.Error("should return nil when querying non-existant key")
}
}
func TestRemove(t *testing.T) {
ht := New()
ht.Set("a", 1)
ht.Set("b", 1)
ht.Set("c", 1)
err := ht.Remove("a")
if err != nil {
t.Error("should not return error when removing existing item")
}
if s := ht.Size(); s != 2 {
t.Error("hash table size should decrease after removing item")
}
err = ht.Remove("a")
if err == nil {
t.Error("should return error when removing non-existant item")
}
if s := ht.Size(); s != 2 {
t.Error("hash table size should not change when removing non-existant item")
}
}
func TestSize(t *testing.T) {
ht := New()
if size := ht.Size(); size != 0 {
t.Error("new hash table should have size of zero")
}
ht.Set("a", 1)
if size := ht.Size(); size != 1 {
t.Errorf("%d != %d", size, 1)
}
ht.Set("b", 1)
if size := ht.Size(); size != 2 {
t.Errorf("%d != %d", size, 2)
}
ht.Set("c", 1)
if size := ht.Size(); size != 3 {
t.Errorf("%d != %d", size, 3)
}
ht.Set("b", 1)
if size := ht.Size(); size != 3 {
t.Errorf("%d != %d", size, 3)
}
ht.Remove("c")
if size := ht.Size(); size != 2 {
t.Errorf("%d != %d", size, 2)
}
ht.Remove("foo")
if size := ht.Size(); size != 2 {
t.Errorf("%d != %d", size, 2)
}
ht.rehash()
if size := ht.Size(); size != 2 {
t.Errorf("%d != %d", size, 2)
}
}
func TestHasKey(t *testing.T) {
ht := New()
if ht.HasKey("foo") {
t.Error("newly created hash table should not have any keys")
}
ht.Set("foo", 1)
if !ht.HasKey("foo") {
t.Error("should be true for existant keys")
}
ht.Remove("foo")
if ht.HasKey("foo") {
t.Error("should be false for removed keys")
}
}
func TestForEach(t *testing.T) {
ht := New()
iteratedOver := map[string]bool{
"bar": false,
"corge": false,
"fred": false,
"garply": false,
"grault": false,
"qux": false,
"waldo": false,
"xyzzy": false,
}
for key := range iteratedOver {
ht.Set(key, struct{}{})
}
ht.ForEach(func(key string, _ interface{}) {
iteratedOver[key] = true
})
for key, value := range iteratedOver {
if !value {
t.Errorf("did not iterate over '%s'", key)
}
}
}
func TestClear(t *testing.T) {
ht := New()
ht.Set("a", struct{}{})
ht.Set("b", struct{}{})
ht.Set("c", struct{}{})
ht.Clear()
if !ht.IsEmpty() {
t.Error("hash table should be cleared of content")
}
}
package hash_table
const (
small_prime = (2 << 6) - 1
big_prime = (2 << 30) - 1
)
func stringHash(s string) int {
var hash int64 = 0
for _, char := range s {
hash = (small_prime*hash + int64(char)) % big_prime
}
return int(hash)
}
package hash_table
import "testing"
func TestReturnsSameHashesForEqualStrings(t *testing.T) {
h1 := stringHash("pqa")
h2 := stringHash("pqa")
if h1 != h2 {
t.Error("hashes of equal strings should be equal")
}
}
func TestReturnsDifferentHashesForDifferentStrings(t *testing.T) {
h1 := stringHash("123")
h2 := stringHash("abc")
h3 := stringHash("sjflksfjk")
h4 := stringHash("'/.]/]'[/'/][]")
if h1 == h2 || h1 == h3 || h1 == h4 || h2 == h3 || h3 == h4 {
t.Error("hashes of different strings should not be equal")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment