Created
October 29, 2020 03:18
-
-
Save ykdojo/4f9741398c3653d3dc8b95ef52bb3fcf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Here is my Python implementation of the hash table data structure. | |
# And here's my video where I talk about it in depth: https://youtu.be/sfWyugl4JWA | |
class Hashtable: | |
# Assumption: table_length is a prime number (for example, 5, 701, or 30011) | |
def __init__(self, table_length): | |
self.table = [None] * table_length | |
## An internal search function. | |
# If it finds the given key in the table, it will return (True, index) | |
# If not, it will return (False, the index where it would be inserted) | |
# If the table is full, it will return (False, -1) | |
# If test_mode is true, it will also add a third return value that shows | |
# the number of elements that have been checked (in the case where | |
# the key is not found and the table is not full). | |
# Assumption: key is a string. | |
def _search(self, key, test_mode = False): | |
hash1 = hash(key) | |
m = len(self.table) | |
initial_i = hash1 % m # initial_i's range: [0, m - 1] (inclusive) | |
count = 1 # the number of elements that have been checked - only used when test_mode = True. | |
if not self.table[initial_i]: | |
if test_mode: | |
return (False, initial_i, count) | |
return (False, initial_i) | |
elif self.table[initial_i][0] == key: | |
return (True, initial_i) | |
## We have a collision! | |
# We're going to deal with it with double-hashing. | |
# First, create a new hashed value by slightly modifying the input key. | |
hash2 = hash(key + 'd') | |
# hash2 = hash1 # I tried this method and it worked as well as the above method. | |
# hash2 = 0 # This would be linear probing. It did NOT perform as well as the above method. | |
# Our constraint here: 1 <= c < m | |
# Note that hash2 % (m - 1) produces a number in the range [0, m - 2] inclusive | |
c = hash2 % (m - 1) + 1 | |
i = (initial_i + c) % m | |
while i != initial_i: | |
count += 1 | |
if not self.table[i]: | |
if test_mode: | |
return (False, i, count) | |
return (False, i) | |
elif self.table[i][0] == key: | |
return (True, i) | |
else: | |
i = (i + c) % m | |
return (False, -1) # The table is full. | |
## Inserts a key value pair. If the key exists, it updates the value. | |
# If it doesn't exit, it inserts it. | |
# If the table is full, it returns without doing anything. | |
# Assumption: key is a string. | |
# Returns: nothing. | |
def insert(self, key, value): | |
result = self._search(key) | |
if result[1] == -1: | |
return # The table is full. | |
# If the key already exists, update the value. | |
if result[0]: | |
i = result[1] | |
self.table[i][1] = value | |
return | |
# At this point, we'll know that the given key doesn't exist | |
# in the table yet, and that result[1] is the index | |
# where the new key-value pair should be inserted. | |
i = result[1] | |
self.table[i] = [key, value] | |
## Returns the value if the key is found. | |
# If not, it will return False. | |
# Assumption: key is a string. | |
def search(self, key): | |
result = self._search(key) | |
if result[1] == -1: | |
return False # The table is full. | |
if not result[0]: | |
return False | |
i = result[1] | |
return self.table[i][1] | |
## I haven't implemented this yet. | |
# To implement this one with open-addressing (double-hashing), | |
# you should replace the deleted entry with a dummy value instead of deleting it. | |
def delete(key): | |
pass | |
## The following is for testing the Hashtable class. | |
if __name__ == "__main__": | |
ht = Hashtable(5) | |
ht.insert('key1', 9) | |
ht.insert('key2', 2) | |
ht.insert('key3', 10) | |
ht.insert('key4', 100) | |
assert not ht.search('key5') # Since this key doesn't exist yet, it should return False. | |
ht.insert('key5', 10) | |
assert ht.search('key1') == 9 | |
assert ht.search('key2') == 2 | |
assert ht.search('key3') == 10 | |
assert ht.search('key4') == 100 | |
assert ht.search('key5') == 10 | |
assert not ht.search('key6') # Since this key doesn't exist, it should return False. | |
# You should be able to update existing values, too. | |
ht.insert('key3', -1) | |
ht.insert('key5', 42) | |
assert ht.search('key3') == -1 | |
assert ht.search('key5') == 42 | |
## The following part is for checking the number of | |
# elements being checked for un unsuccessful search. | |
ht2 = Hashtable(30011) | |
for i in range(1, 20004): # We're going to fill in about two thirds of the table. | |
ht2.insert('key' + str(i), 1) | |
# Try searching for a bunch of new keys. | |
# Then, find the average number of elements that were checked. | |
total = 0 | |
num_trials = 10**5 | |
max_num = 0 | |
for i in range(0, num_trials): | |
num_checked = ht2._search('new_key_' + str(i), test_mode=True)[2] | |
total += num_checked | |
if num_checked > max_num: | |
max_num = num_checked | |
average = total / num_trials | |
print('Average number of elements checked:', average) | |
print('Max number of elements checked:', max_num) |
Great to hear that!
…On Sun, Nov 15, 2020 at 10:29 PM ykdojo ***@***.***> wrote:
***@***.**** commented on this gist.
------------------------------
Okay I'm not sure if I can, but I'll add it to my list of videos to make
in the future.
—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub
<https://gist.github.com/4f9741398c3653d3dc8b95ef52bb3fcf#gistcomment-3528716>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/ANHOL7MBH24EHMJDLGMZ2STSQC2D5ANCNFSM4TFU7OZQ>
.
Thannnnnnnnnks!
dojo not understand please quickly make another video.
Tried to make line by line breakdown of this for people who aren't getting it still(took me like 15 minutes to get it as well lol):
# This simply lets us add a type hint of Any to function arguments to let python
# know that it can be literally anything.
from typing import Any
class HashMap:
def __init__(self, size):
# Create list of size size
# i.e., size = 5, self.map = [None, None, None, None, None]
self.map = [None] * size
# store initial length of list
self.size = len(self.map)
def checkForKey(self, key: str):
h1 = hash(key) # create hash of key
firstIndex = h1 % self.size # Make hashed key fit inside self.map
if not self.map[firstIndex]: # if self.map[firstIndex] is None
return (False, firstIndex)
# Key does not exist and there is nothing there, so return False and firstIndex to insert()
elif self.map[firstIndex][0] == key:
return (True, firstIndex)
# Both the key and the value are stored in self.map as [key, value],
# so self.map[firstIndex][0] would grab index 0 of the [key, value] which is the key.
# If the existing key is equal to the key we are checking for, we tell the insert function to
# simply replace the value of that [key, value] pair.
# Think of this next part as a big else statement
# Since both of the above conditions were skipped, there must be a collision where we wanted to
# place our [key, value] pair.
# To deal with this, we use double hashing.
h2 = hash(key + 'd')
# We create a new hash that is slightly different than our first one
# We create our 'c' variable as Dojo calls it, I will call it stride here because it will represent
# our stride across the list as we search for a new home for our [key, value] pair.
stride = h2 % (self.size - 1) + 1
# Going through this with order of operations we get a stride between 1 and self.size - 1, Dojo
# explained this well enough in his video
# Our second index variable
secondIndex = (firstIndex + stride) % self.size
while secondIndex != firstIndex: # Go through the entire table
# Same conditions as the first if and elif statements
if not self.map[secondIndex]:
return (False, secondIndex)
elif self.map[secondIndex][0] == key:
return (True, secondIndex)
else:
secondIndex = (secondIndex + stride) % self.size
return (False, -1) # Table is full
def insert(self, key: str, value: Any):
# Run the search function we just wrote
searchResult = self.checkForKey(key)
# A Result of -1 means the table is full, so end the function here
if searchResult[1] == -1:
return
# A result of True means that the key already exists and we should simply update the value
# associated with it
if searchResult[0]:
index = searchResult[1]
self.map[index][1] = value
return
# There is now only one option left, and that is to put the new [key, value] pair where the hashing
# we did in the checkForKey() function tells us to
index = searchResult[1]
self.map[index] = [key, value]
def get(self, key: str):
# Basically the same process as the insert() function, except here we just return the value of the
# [key, value] pair
searchResult = self.checkForKey(key)
# Table is full
if searchResult[1] == -1:
return
# Key does not exist
if not searchResult[0]:
return False
# Return value of [key, value]
index = searchResult[1]
return self.map[index][1]
map = HashMap(10)
map.insert('key1', 12)
print(map.get('key1'))
map.insert('key2', 66)
map.insert('key3', 5)
# Because we used Any when building the insert function, the value can vary from key to key.
map.insert('key4', 'elephant')
# Prints False because key5 does not exist in our map yet.
print(map.get('key5'))
map.insert('key5', 11)
# Prints 11 now
print(map.get('key5'))
print(map.get('key4'))
# Update key4
map.insert('key4', 'Yolo')
print(map.get('key4'))
Golang implementation, based on @CaptainLupa 's one:
package main
import "fmt"
type Person struct {
name string
age int
}
type KeyValue struct {
key string
value interface{}
}
type HashMap struct {
size int
table []*KeyValue
}
func NewHashMap(size int) HashMap {
return HashMap{
size: size,
table: make([]*KeyValue, size),
}
}
func (m *HashMap) findIndex(key string) (bool, int) {
hash1 := hash(key)
index1 := hash1 % m.size
if m.table[index1] == nil {
return false, index1
}
if m.table[index1].key == key {
return true, index1
}
// we have a collision - let's start double hashing
hash2 := hash(key + string('d'))
stride := hash2%(m.size-1) + 1
index2 := (index1 + stride) % m.size
for index2 != index1 {
if m.table[index2] == nil {
return false, index2
}
if m.table[index2].key == key {
return true, index2
}
index2 = (index2 + stride) % m.size
}
// table is full
return false, -1
}
func (m *HashMap) Insert(key string, value interface{}) {
found, index := m.findIndex(key)
if index == -1 {
fmt.Println("could not insert into the map because it is full")
return
}
if found {
m.table[index].value = value
} else {
m.table[index] = &KeyValue{key, value}
}
}
func (m *HashMap) Search(key string) interface{} {
found, index := m.findIndex(key)
if found {
return m.table[index].value
}
return nil
}
func hash(s string) int {
hash := 5381
for _, c := range s {
hash = hash*33 + int(c)
}
return hash
}
func main() {
// the size of the map must be a prime number
hashMap := NewHashMap(7)
p1 := Person{"Carlos", 36}
p2 := Person{"Marcia", 39}
p3 := Person{"Rebeca", 2}
p4 := Person{"Isadora", 1}
p5 := Person{"Silvio", 77}
p6 := Person{"Marlene", 65}
p7 := Person{"Belinha", 16}
hashMap.Insert(p1.name, p1.age)
fmt.Println(hashMap.table)
hashMap.Insert(p2.name, p2.age)
fmt.Println(hashMap.table)
hashMap.Insert(p3.name, p3.age)
fmt.Println(hashMap.table)
hashMap.Insert(p4.name, p4.age)
fmt.Println(hashMap.table)
hashMap.Insert(p5.name, p5.age)
fmt.Println(hashMap.table)
hashMap.Insert(p6.name, p6.age)
fmt.Println(hashMap.table)
hashMap.Insert(p7.name, p7.age)
fmt.Println(hashMap.table)
fmt.Println(hashMap.Search(p1.name))
fmt.Println(hashMap.Search(p2.name))
fmt.Println(hashMap.Search(p3.name))
fmt.Println(hashMap.Search(p4.name))
fmt.Println(hashMap.Search(p5.name))
fmt.Println(hashMap.Search(p6.name))
fmt.Println(hashMap.Search(p7.name))
hashMap.Insert(p1.name, 999)
fmt.Println(hashMap.Search(p1.name))
p8 := Person{"Renato", 34}
hashMap.Insert(p8.name, p8.age)
fmt.Println(hashMap.Search(p8.name))
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Okay I'm not sure if I can, but I'll add it to my list of videos to make in the future.