Skip to content

Instantly share code, notes, and snippets.

@ykdojo
Created October 29, 2020 03:18
Show Gist options
  • Save ykdojo/4f9741398c3653d3dc8b95ef52bb3fcf to your computer and use it in GitHub Desktop.
Save ykdojo/4f9741398c3653d3dc8b95ef52bb3fcf to your computer and use it in GitHub Desktop.
# Here is my Python implementation of the hash table data structure.
# And here's my video where I talk about it in depth: https://youtu.be/sfWyugl4JWA
class Hashtable:
# Assumption: table_length is a prime number (for example, 5, 701, or 30011)
def __init__(self, table_length):
self.table = [None] * table_length
## An internal search function.
# If it finds the given key in the table, it will return (True, index)
# If not, it will return (False, the index where it would be inserted)
# If the table is full, it will return (False, -1)
# If test_mode is true, it will also add a third return value that shows
# the number of elements that have been checked (in the case where
# the key is not found and the table is not full).
# Assumption: key is a string.
def _search(self, key, test_mode = False):
hash1 = hash(key)
m = len(self.table)
initial_i = hash1 % m # initial_i's range: [0, m - 1] (inclusive)
count = 1 # the number of elements that have been checked - only used when test_mode = True.
if not self.table[initial_i]:
if test_mode:
return (False, initial_i, count)
return (False, initial_i)
elif self.table[initial_i][0] == key:
return (True, initial_i)
## We have a collision!
# We're going to deal with it with double-hashing.
# First, create a new hashed value by slightly modifying the input key.
hash2 = hash(key + 'd')
# hash2 = hash1 # I tried this method and it worked as well as the above method.
# hash2 = 0 # This would be linear probing. It did NOT perform as well as the above method.
# Our constraint here: 1 <= c < m
# Note that hash2 % (m - 1) produces a number in the range [0, m - 2] inclusive
c = hash2 % (m - 1) + 1
i = (initial_i + c) % m
while i != initial_i:
count += 1
if not self.table[i]:
if test_mode:
return (False, i, count)
return (False, i)
elif self.table[i][0] == key:
return (True, i)
else:
i = (i + c) % m
return (False, -1) # The table is full.
## Inserts a key value pair. If the key exists, it updates the value.
# If it doesn't exit, it inserts it.
# If the table is full, it returns without doing anything.
# Assumption: key is a string.
# Returns: nothing.
def insert(self, key, value):
result = self._search(key)
if result[1] == -1:
return # The table is full.
# If the key already exists, update the value.
if result[0]:
i = result[1]
self.table[i][1] = value
return
# At this point, we'll know that the given key doesn't exist
# in the table yet, and that result[1] is the index
# where the new key-value pair should be inserted.
i = result[1]
self.table[i] = [key, value]
## Returns the value if the key is found.
# If not, it will return False.
# Assumption: key is a string.
def search(self, key):
result = self._search(key)
if result[1] == -1:
return False # The table is full.
if not result[0]:
return False
i = result[1]
return self.table[i][1]
## I haven't implemented this yet.
# To implement this one with open-addressing (double-hashing),
# you should replace the deleted entry with a dummy value instead of deleting it.
def delete(key):
pass
## The following is for testing the Hashtable class.
if __name__ == "__main__":
ht = Hashtable(5)
ht.insert('key1', 9)
ht.insert('key2', 2)
ht.insert('key3', 10)
ht.insert('key4', 100)
assert not ht.search('key5') # Since this key doesn't exist yet, it should return False.
ht.insert('key5', 10)
assert ht.search('key1') == 9
assert ht.search('key2') == 2
assert ht.search('key3') == 10
assert ht.search('key4') == 100
assert ht.search('key5') == 10
assert not ht.search('key6') # Since this key doesn't exist, it should return False.
# You should be able to update existing values, too.
ht.insert('key3', -1)
ht.insert('key5', 42)
assert ht.search('key3') == -1
assert ht.search('key5') == 42
## The following part is for checking the number of
# elements being checked for un unsuccessful search.
ht2 = Hashtable(30011)
for i in range(1, 20004): # We're going to fill in about two thirds of the table.
ht2.insert('key' + str(i), 1)
# Try searching for a bunch of new keys.
# Then, find the average number of elements that were checked.
total = 0
num_trials = 10**5
max_num = 0
for i in range(0, num_trials):
num_checked = ht2._search('new_key_' + str(i), test_mode=True)[2]
total += num_checked
if num_checked > max_num:
max_num = num_checked
average = total / num_trials
print('Average number of elements checked:', average)
print('Max number of elements checked:', max_num)
@zz-ml
Copy link

zz-ml commented Nov 16, 2020 via email

@shawn120
Copy link

Thannnnnnnnnks!

@afridi69
Copy link

afridi69 commented Jan 4, 2021

dojo not understand please quickly make another video.

@Jens-Eckert
Copy link

Jens-Eckert commented Oct 19, 2021

Tried to make line by line breakdown of this for people who aren't getting it still(took me like 15 minutes to get it as well lol):

# This simply lets us add a type hint of Any to function arguments to let python
# know that it can be literally anything.
from typing import Any


class HashMap:
    def __init__(self, size):
        # Create list of size size
        # i.e., size = 5, self.map = [None, None, None, None, None]
        self.map = [None] * size
        # store initial length of list
        self.size = len(self.map)

    def checkForKey(self, key: str):
        h1 = hash(key)  # create hash of key
        firstIndex = h1 % self.size  # Make hashed key fit inside self.map

        if not self.map[firstIndex]:  # if self.map[firstIndex] is None
            return (False, firstIndex)
            # Key does not exist and there is nothing there, so return False and firstIndex to insert()
        elif self.map[firstIndex][0] == key:
            return (True, firstIndex)
            # Both the key and the value are stored in self.map as [key, value],
            # so self.map[firstIndex][0] would grab index 0 of the [key, value] which is the key.
            # If the existing key is equal to the key we are checking for, we tell the insert function to
            # simply replace the value of that [key, value] pair.

        # Think of this next part as a big else statement

        # Since both of the above conditions were skipped, there must be a collision where we wanted to
        # place our [key, value] pair.
        # To deal with this, we use double hashing.
        h2 = hash(key + 'd')
        # We create a new hash that is slightly different than our first one

        # We create our 'c' variable as Dojo calls it, I will call it stride here because it will represent
        # our stride across the list as we search for a new home for our [key, value] pair.
        stride = h2 % (self.size - 1) + 1
        # Going through this with order of operations we get a stride between 1 and self.size - 1, Dojo
        # explained this well enough in his video

        # Our second index variable
        secondIndex = (firstIndex + stride) % self.size

        while secondIndex != firstIndex:  # Go through the entire table
            # Same conditions as the first if and elif statements
            if not self.map[secondIndex]:
                return (False, secondIndex)
            elif self.map[secondIndex][0] == key:
                return (True, secondIndex)
            else:
                secondIndex = (secondIndex + stride) % self.size

        return (False, -1)  # Table is full

    def insert(self, key: str, value: Any):
        # Run the search function we just wrote
        searchResult = self.checkForKey(key)

        # A Result of -1 means the table is full, so end the function here
        if searchResult[1] == -1:
            return

        # A result of True means that the key already exists and we should simply update the value
        # associated with it
        if searchResult[0]:
            index = searchResult[1]
            self.map[index][1] = value
            return

        # There is now only one option left, and that is to put the new [key, value] pair where the hashing
        # we did in the checkForKey() function tells us to
        index = searchResult[1]
        self.map[index] = [key, value]

    def get(self, key: str):
        # Basically the same process as the insert() function, except here we just return the value of the
        # [key, value] pair
        searchResult = self.checkForKey(key)

        # Table is full
        if searchResult[1] == -1:
            return

        # Key does not exist
        if not searchResult[0]:
            return False

        # Return value of [key, value]
        index = searchResult[1]
        return self.map[index][1]


map = HashMap(10)

map.insert('key1', 12)
print(map.get('key1'))
map.insert('key2', 66)
map.insert('key3', 5)
# Because we used Any when building the insert function, the value can vary from key to key.
map.insert('key4', 'elephant')

# Prints False because key5 does not exist in our map yet.
print(map.get('key5'))
map.insert('key5', 11)
# Prints 11 now
print(map.get('key5'))
print(map.get('key4'))
# Update key4
map.insert('key4', 'Yolo')
print(map.get('key4'))

@ceferrari
Copy link

ceferrari commented Aug 6, 2022

Golang implementation, based on @CaptainLupa 's one:

package main

import "fmt"

type Person struct {
	name string
	age  int
}

type KeyValue struct {
	key   string
	value interface{}
}

type HashMap struct {
	size  int
	table []*KeyValue
}

func NewHashMap(size int) HashMap {
	return HashMap{
		size:  size,
		table: make([]*KeyValue, size),
	}
}

func (m *HashMap) findIndex(key string) (bool, int) {
	hash1 := hash(key)
	index1 := hash1 % m.size
	if m.table[index1] == nil {
		return false, index1
	}
	if m.table[index1].key == key {
		return true, index1
	}

	// we have a collision - let's start double hashing
	hash2 := hash(key + string('d'))
	stride := hash2%(m.size-1) + 1
	index2 := (index1 + stride) % m.size
	for index2 != index1 {
		if m.table[index2] == nil {
			return false, index2
		}
		if m.table[index2].key == key {
			return true, index2
		}
		index2 = (index2 + stride) % m.size
	}

	// table is full
	return false, -1
}

func (m *HashMap) Insert(key string, value interface{}) {
	found, index := m.findIndex(key)
	if index == -1 {
		fmt.Println("could not insert into the map because it is full")
		return
	}
	if found {
		m.table[index].value = value
	} else {
		m.table[index] = &KeyValue{key, value}
	}
}

func (m *HashMap) Search(key string) interface{} {
	found, index := m.findIndex(key)
	if found {
		return m.table[index].value
	}
	return nil
}

func hash(s string) int {
	hash := 5381
	for _, c := range s {
		hash = hash*33 + int(c)
	}
	return hash
}

func main() {
	// the size of the map must be a prime number
	hashMap := NewHashMap(7)

	p1 := Person{"Carlos", 36}
	p2 := Person{"Marcia", 39}
	p3 := Person{"Rebeca", 2}
	p4 := Person{"Isadora", 1}
	p5 := Person{"Silvio", 77}
	p6 := Person{"Marlene", 65}
	p7 := Person{"Belinha", 16}

	hashMap.Insert(p1.name, p1.age)
	fmt.Println(hashMap.table)
	hashMap.Insert(p2.name, p2.age)
	fmt.Println(hashMap.table)
	hashMap.Insert(p3.name, p3.age)
	fmt.Println(hashMap.table)
	hashMap.Insert(p4.name, p4.age)
	fmt.Println(hashMap.table)
	hashMap.Insert(p5.name, p5.age)
	fmt.Println(hashMap.table)
	hashMap.Insert(p6.name, p6.age)
	fmt.Println(hashMap.table)
	hashMap.Insert(p7.name, p7.age)
	fmt.Println(hashMap.table)

	fmt.Println(hashMap.Search(p1.name))
	fmt.Println(hashMap.Search(p2.name))
	fmt.Println(hashMap.Search(p3.name))
	fmt.Println(hashMap.Search(p4.name))
	fmt.Println(hashMap.Search(p5.name))
	fmt.Println(hashMap.Search(p6.name))
	fmt.Println(hashMap.Search(p7.name))

	hashMap.Insert(p1.name, 999)
	fmt.Println(hashMap.Search(p1.name))

	p8 := Person{"Renato", 34}
	hashMap.Insert(p8.name, p8.age)
	fmt.Println(hashMap.Search(p8.name))
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment