Skip to content

Instantly share code, notes, and snippets.

@ykdojo
Created October 29, 2020 03:18
Show Gist options
  • Save ykdojo/4f9741398c3653d3dc8b95ef52bb3fcf to your computer and use it in GitHub Desktop.
Save ykdojo/4f9741398c3653d3dc8b95ef52bb3fcf to your computer and use it in GitHub Desktop.
# Here is my Python implementation of the hash table data structure.
# And here's my video where I talk about it in depth: https://youtu.be/sfWyugl4JWA
class Hashtable:
# Assumption: table_length is a prime number (for example, 5, 701, or 30011)
def __init__(self, table_length):
self.table = [None] * table_length
## An internal search function.
# If it finds the given key in the table, it will return (True, index)
# If not, it will return (False, the index where it would be inserted)
# If the table is full, it will return (False, -1)
# If test_mode is true, it will also add a third return value that shows
# the number of elements that have been checked (in the case where
# the key is not found and the table is not full).
# Assumption: key is a string.
def _search(self, key, test_mode = False):
hash1 = hash(key)
m = len(self.table)
initial_i = hash1 % m # initial_i's range: [0, m - 1] (inclusive)
count = 1 # the number of elements that have been checked - only used when test_mode = True.
if not self.table[initial_i]:
if test_mode:
return (False, initial_i, count)
return (False, initial_i)
elif self.table[initial_i][0] == key:
return (True, initial_i)
## We have a collision!
# We're going to deal with it with double-hashing.
# First, create a new hashed value by slightly modifying the input key.
hash2 = hash(key + 'd')
# hash2 = hash1 # I tried this method and it worked as well as the above method.
# hash2 = 0 # This would be linear probing. It did NOT perform as well as the above method.
# Our constraint here: 1 <= c < m
# Note that hash2 % (m - 1) produces a number in the range [0, m - 2] inclusive
c = hash2 % (m - 1) + 1
i = (initial_i + c) % m
while i != initial_i:
count += 1
if not self.table[i]:
if test_mode:
return (False, i, count)
return (False, i)
elif self.table[i][0] == key:
return (True, i)
else:
i = (i + c) % m
return (False, -1) # The table is full.
## Inserts a key value pair. If the key exists, it updates the value.
# If it doesn't exit, it inserts it.
# If the table is full, it returns without doing anything.
# Assumption: key is a string.
# Returns: nothing.
def insert(self, key, value):
result = self._search(key)
if result[1] == -1:
return # The table is full.
# If the key already exists, update the value.
if result[0]:
i = result[1]
self.table[i][1] = value
return
# At this point, we'll know that the given key doesn't exist
# in the table yet, and that result[1] is the index
# where the new key-value pair should be inserted.
i = result[1]
self.table[i] = [key, value]
## Returns the value if the key is found.
# If not, it will return False.
# Assumption: key is a string.
def search(self, key):
result = self._search(key)
if result[1] == -1:
return False # The table is full.
if not result[0]:
return False
i = result[1]
return self.table[i][1]
## I haven't implemented this yet.
# To implement this one with open-addressing (double-hashing),
# you should replace the deleted entry with a dummy value instead of deleting it.
def delete(key):
pass
## The following is for testing the Hashtable class.
if __name__ == "__main__":
ht = Hashtable(5)
ht.insert('key1', 9)
ht.insert('key2', 2)
ht.insert('key3', 10)
ht.insert('key4', 100)
assert not ht.search('key5') # Since this key doesn't exist yet, it should return False.
ht.insert('key5', 10)
assert ht.search('key1') == 9
assert ht.search('key2') == 2
assert ht.search('key3') == 10
assert ht.search('key4') == 100
assert ht.search('key5') == 10
assert not ht.search('key6') # Since this key doesn't exist, it should return False.
# You should be able to update existing values, too.
ht.insert('key3', -1)
ht.insert('key5', 42)
assert ht.search('key3') == -1
assert ht.search('key5') == 42
## The following part is for checking the number of
# elements being checked for un unsuccessful search.
ht2 = Hashtable(30011)
for i in range(1, 20004): # We're going to fill in about two thirds of the table.
ht2.insert('key' + str(i), 1)
# Try searching for a bunch of new keys.
# Then, find the average number of elements that were checked.
total = 0
num_trials = 10**5
max_num = 0
for i in range(0, num_trials):
num_checked = ht2._search('new_key_' + str(i), test_mode=True)[2]
total += num_checked
if num_checked > max_num:
max_num = num_checked
average = total / num_trials
print('Average number of elements checked:', average)
print('Max number of elements checked:', max_num)
@ceferrari
Copy link

ceferrari commented Aug 6, 2022

Golang implementation, based on @CaptainLupa 's one:

package main

import "fmt"

type Person struct {
	name string
	age  int
}

type KeyValue struct {
	key   string
	value interface{}
}

type HashMap struct {
	size  int
	table []*KeyValue
}

func NewHashMap(size int) HashMap {
	return HashMap{
		size:  size,
		table: make([]*KeyValue, size),
	}
}

func (m *HashMap) findIndex(key string) (bool, int) {
	hash1 := hash(key)
	index1 := hash1 % m.size
	if m.table[index1] == nil {
		return false, index1
	}
	if m.table[index1].key == key {
		return true, index1
	}

	// we have a collision - let's start double hashing
	hash2 := hash(key + string('d'))
	stride := hash2%(m.size-1) + 1
	index2 := (index1 + stride) % m.size
	for index2 != index1 {
		if m.table[index2] == nil {
			return false, index2
		}
		if m.table[index2].key == key {
			return true, index2
		}
		index2 = (index2 + stride) % m.size
	}

	// table is full
	return false, -1
}

func (m *HashMap) Insert(key string, value interface{}) {
	found, index := m.findIndex(key)
	if index == -1 {
		fmt.Println("could not insert into the map because it is full")
		return
	}
	if found {
		m.table[index].value = value
	} else {
		m.table[index] = &KeyValue{key, value}
	}
}

func (m *HashMap) Search(key string) interface{} {
	found, index := m.findIndex(key)
	if found {
		return m.table[index].value
	}
	return nil
}

func hash(s string) int {
	hash := 5381
	for _, c := range s {
		hash = hash*33 + int(c)
	}
	return hash
}

func main() {
	// the size of the map must be a prime number
	hashMap := NewHashMap(7)

	p1 := Person{"Carlos", 36}
	p2 := Person{"Marcia", 39}
	p3 := Person{"Rebeca", 2}
	p4 := Person{"Isadora", 1}
	p5 := Person{"Silvio", 77}
	p6 := Person{"Marlene", 65}
	p7 := Person{"Belinha", 16}

	hashMap.Insert(p1.name, p1.age)
	fmt.Println(hashMap.table)
	hashMap.Insert(p2.name, p2.age)
	fmt.Println(hashMap.table)
	hashMap.Insert(p3.name, p3.age)
	fmt.Println(hashMap.table)
	hashMap.Insert(p4.name, p4.age)
	fmt.Println(hashMap.table)
	hashMap.Insert(p5.name, p5.age)
	fmt.Println(hashMap.table)
	hashMap.Insert(p6.name, p6.age)
	fmt.Println(hashMap.table)
	hashMap.Insert(p7.name, p7.age)
	fmt.Println(hashMap.table)

	fmt.Println(hashMap.Search(p1.name))
	fmt.Println(hashMap.Search(p2.name))
	fmt.Println(hashMap.Search(p3.name))
	fmt.Println(hashMap.Search(p4.name))
	fmt.Println(hashMap.Search(p5.name))
	fmt.Println(hashMap.Search(p6.name))
	fmt.Println(hashMap.Search(p7.name))

	hashMap.Insert(p1.name, 999)
	fmt.Println(hashMap.Search(p1.name))

	p8 := Person{"Renato", 34}
	hashMap.Insert(p8.name, p8.age)
	fmt.Println(hashMap.Search(p8.name))
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment