Created
October 29, 2020 03:18
-
-
Save ykdojo/4f9741398c3653d3dc8b95ef52bb3fcf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Here is my Python implementation of the hash table data structure. | |
# And here's my video where I talk about it in depth: https://youtu.be/sfWyugl4JWA | |
class Hashtable: | |
# Assumption: table_length is a prime number (for example, 5, 701, or 30011) | |
def __init__(self, table_length): | |
self.table = [None] * table_length | |
## An internal search function. | |
# If it finds the given key in the table, it will return (True, index) | |
# If not, it will return (False, the index where it would be inserted) | |
# If the table is full, it will return (False, -1) | |
# If test_mode is true, it will also add a third return value that shows | |
# the number of elements that have been checked (in the case where | |
# the key is not found and the table is not full). | |
# Assumption: key is a string. | |
def _search(self, key, test_mode = False): | |
hash1 = hash(key) | |
m = len(self.table) | |
initial_i = hash1 % m # initial_i's range: [0, m - 1] (inclusive) | |
count = 1 # the number of elements that have been checked - only used when test_mode = True. | |
if not self.table[initial_i]: | |
if test_mode: | |
return (False, initial_i, count) | |
return (False, initial_i) | |
elif self.table[initial_i][0] == key: | |
return (True, initial_i) | |
## We have a collision! | |
# We're going to deal with it with double-hashing. | |
# First, create a new hashed value by slightly modifying the input key. | |
hash2 = hash(key + 'd') | |
# hash2 = hash1 # I tried this method and it worked as well as the above method. | |
# hash2 = 0 # This would be linear probing. It did NOT perform as well as the above method. | |
# Our constraint here: 1 <= c < m | |
# Note that hash2 % (m - 1) produces a number in the range [0, m - 2] inclusive | |
c = hash2 % (m - 1) + 1 | |
i = (initial_i + c) % m | |
while i != initial_i: | |
count += 1 | |
if not self.table[i]: | |
if test_mode: | |
return (False, i, count) | |
return (False, i) | |
elif self.table[i][0] == key: | |
return (True, i) | |
else: | |
i = (i + c) % m | |
return (False, -1) # The table is full. | |
## Inserts a key value pair. If the key exists, it updates the value. | |
# If it doesn't exit, it inserts it. | |
# If the table is full, it returns without doing anything. | |
# Assumption: key is a string. | |
# Returns: nothing. | |
def insert(self, key, value): | |
result = self._search(key) | |
if result[1] == -1: | |
return # The table is full. | |
# If the key already exists, update the value. | |
if result[0]: | |
i = result[1] | |
self.table[i][1] = value | |
return | |
# At this point, we'll know that the given key doesn't exist | |
# in the table yet, and that result[1] is the index | |
# where the new key-value pair should be inserted. | |
i = result[1] | |
self.table[i] = [key, value] | |
## Returns the value if the key is found. | |
# If not, it will return False. | |
# Assumption: key is a string. | |
def search(self, key): | |
result = self._search(key) | |
if result[1] == -1: | |
return False # The table is full. | |
if not result[0]: | |
return False | |
i = result[1] | |
return self.table[i][1] | |
## I haven't implemented this yet. | |
# To implement this one with open-addressing (double-hashing), | |
# you should replace the deleted entry with a dummy value instead of deleting it. | |
def delete(key): | |
pass | |
## The following is for testing the Hashtable class. | |
if __name__ == "__main__": | |
ht = Hashtable(5) | |
ht.insert('key1', 9) | |
ht.insert('key2', 2) | |
ht.insert('key3', 10) | |
ht.insert('key4', 100) | |
assert not ht.search('key5') # Since this key doesn't exist yet, it should return False. | |
ht.insert('key5', 10) | |
assert ht.search('key1') == 9 | |
assert ht.search('key2') == 2 | |
assert ht.search('key3') == 10 | |
assert ht.search('key4') == 100 | |
assert ht.search('key5') == 10 | |
assert not ht.search('key6') # Since this key doesn't exist, it should return False. | |
# You should be able to update existing values, too. | |
ht.insert('key3', -1) | |
ht.insert('key5', 42) | |
assert ht.search('key3') == -1 | |
assert ht.search('key5') == 42 | |
## The following part is for checking the number of | |
# elements being checked for un unsuccessful search. | |
ht2 = Hashtable(30011) | |
for i in range(1, 20004): # We're going to fill in about two thirds of the table. | |
ht2.insert('key' + str(i), 1) | |
# Try searching for a bunch of new keys. | |
# Then, find the average number of elements that were checked. | |
total = 0 | |
num_trials = 10**5 | |
max_num = 0 | |
for i in range(0, num_trials): | |
num_checked = ht2._search('new_key_' + str(i), test_mode=True)[2] | |
total += num_checked | |
if num_checked > max_num: | |
max_num = num_checked | |
average = total / num_trials | |
print('Average number of elements checked:', average) | |
print('Max number of elements checked:', max_num) |
Golang implementation, based on @CaptainLupa 's one:
package main
import "fmt"
type Person struct {
name string
age int
}
type KeyValue struct {
key string
value interface{}
}
type HashMap struct {
size int
table []*KeyValue
}
func NewHashMap(size int) HashMap {
return HashMap{
size: size,
table: make([]*KeyValue, size),
}
}
func (m *HashMap) findIndex(key string) (bool, int) {
hash1 := hash(key)
index1 := hash1 % m.size
if m.table[index1] == nil {
return false, index1
}
if m.table[index1].key == key {
return true, index1
}
// we have a collision - let's start double hashing
hash2 := hash(key + string('d'))
stride := hash2%(m.size-1) + 1
index2 := (index1 + stride) % m.size
for index2 != index1 {
if m.table[index2] == nil {
return false, index2
}
if m.table[index2].key == key {
return true, index2
}
index2 = (index2 + stride) % m.size
}
// table is full
return false, -1
}
func (m *HashMap) Insert(key string, value interface{}) {
found, index := m.findIndex(key)
if index == -1 {
fmt.Println("could not insert into the map because it is full")
return
}
if found {
m.table[index].value = value
} else {
m.table[index] = &KeyValue{key, value}
}
}
func (m *HashMap) Search(key string) interface{} {
found, index := m.findIndex(key)
if found {
return m.table[index].value
}
return nil
}
func hash(s string) int {
hash := 5381
for _, c := range s {
hash = hash*33 + int(c)
}
return hash
}
func main() {
// the size of the map must be a prime number
hashMap := NewHashMap(7)
p1 := Person{"Carlos", 36}
p2 := Person{"Marcia", 39}
p3 := Person{"Rebeca", 2}
p4 := Person{"Isadora", 1}
p5 := Person{"Silvio", 77}
p6 := Person{"Marlene", 65}
p7 := Person{"Belinha", 16}
hashMap.Insert(p1.name, p1.age)
fmt.Println(hashMap.table)
hashMap.Insert(p2.name, p2.age)
fmt.Println(hashMap.table)
hashMap.Insert(p3.name, p3.age)
fmt.Println(hashMap.table)
hashMap.Insert(p4.name, p4.age)
fmt.Println(hashMap.table)
hashMap.Insert(p5.name, p5.age)
fmt.Println(hashMap.table)
hashMap.Insert(p6.name, p6.age)
fmt.Println(hashMap.table)
hashMap.Insert(p7.name, p7.age)
fmt.Println(hashMap.table)
fmt.Println(hashMap.Search(p1.name))
fmt.Println(hashMap.Search(p2.name))
fmt.Println(hashMap.Search(p3.name))
fmt.Println(hashMap.Search(p4.name))
fmt.Println(hashMap.Search(p5.name))
fmt.Println(hashMap.Search(p6.name))
fmt.Println(hashMap.Search(p7.name))
hashMap.Insert(p1.name, 999)
fmt.Println(hashMap.Search(p1.name))
p8 := Person{"Renato", 34}
hashMap.Insert(p8.name, p8.age)
fmt.Println(hashMap.Search(p8.name))
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Tried to make line by line breakdown of this for people who aren't getting it still(took me like 15 minutes to get it as well lol):